View Javadoc

1   /* Copyright (c) 2008 Sascha Kohlmann
2    *
3    * This program is free software: you can redistribute it and/or modify
4    * it under the terms of the GNU Affero General Public License as published by
5    * the Free Software Foundation, either version 3 of the License, or
6    * (at your option) any later version.
7    *
8    * This program is distributed in the hope that it will be useful,
9    * but WITHOUT ANY WARRANTY; without even the implied warranty of
10   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11   * GNU Affero General Public License for more details.
12   *
13   * You should have received a copy of the GNU Affero General Public License
14   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
15   */
16  package net.sf.eos.hadoop.mapred.decompose;
17  
18  import net.sf.eos.EosException;
19  import net.sf.eos.config.Configuration;
20  import net.sf.eos.document.EosDocument;
21  import net.sf.eos.hadoop.mapred.AbstractKeyGenerator;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.io.Text;
26  
27  import java.util.HashMap;
28  import java.util.List;
29  import java.util.Map;
30  
31  public class TextMetaKeyGenerator extends AbstractKeyGenerator<Text> {
32  
33      /** For logging. */
34      private static final Log LOG =
35          LogFactory.getLog(TextMetaKeyGenerator.class.getName());
36  
37      /** The meta field for separation.
38       * <p>Default value is {@link EosDocument#YEAR_META_KEY}.</p> */
39      @SuppressWarnings("nls")
40      public static final String META_FIELD_FOR_KEY_CONFIG_NAME =
41          "net.sf.eos.hadoop.mapred.sentencer.TextMetadataKeyGenerator.metaKey";
42  
43      @SuppressWarnings("nls")
44      public Map<Text, EosDocument> createKeysForDocument(final EosDocument doc)
45              throws EosException {
46  
47          final Configuration conf = getConfiguration();
48          assert conf != null;
49          final Map<Text, EosDocument> retval =
50              new HashMap<Text, EosDocument>();
51          final String metaKey = conf.get(META_FIELD_FOR_KEY_CONFIG_NAME);
52  
53          if (LOG.isDebugEnabled()) {
54              LOG.debug("metaKey: " + metaKey);
55          }
56  
57          if (metaKey == null || metaKey.length() == 0) {
58              final Text t = new Text("");
59              retval.put(t, doc);
60          } else {
61              final Map<String, List<String>> meta = doc.getMeta();
62              final List<String> metadata = meta.get(metaKey);
63              for (final String s : metadata) {
64                  final Text t = new Text(s);
65                  retval.put(t, doc);
66              }
67          }
68  
69          return retval;
70      }
71  
72      @Override
73      public void configure(
74              @SuppressWarnings("hiding") final Configuration config) {
75          LOG.info(config);
76          super.configure(config);
77      }
78  }