1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package net.sf.eos.hadoop.mapred.decompose;
17
18 import net.sf.eos.EosException;
19 import net.sf.eos.config.Configuration;
20 import net.sf.eos.document.EosDocument;
21 import net.sf.eos.hadoop.mapred.AbstractKeyGenerator;
22
23 import org.apache.commons.logging.Log;
24 import org.apache.commons.logging.LogFactory;
25 import org.apache.hadoop.io.Text;
26
27 import java.util.HashMap;
28 import java.util.List;
29 import java.util.Map;
30
31 public class TextMetaKeyGenerator extends AbstractKeyGenerator<Text> {
32
33
34 private static final Log LOG =
35 LogFactory.getLog(TextMetaKeyGenerator.class.getName());
36
37
38
39 @SuppressWarnings("nls")
40 public static final String META_FIELD_FOR_KEY_CONFIG_NAME =
41 "net.sf.eos.hadoop.mapred.sentencer.TextMetadataKeyGenerator.metaKey";
42
43 @SuppressWarnings("nls")
44 public Map<Text, EosDocument> createKeysForDocument(final EosDocument doc)
45 throws EosException {
46
47 final Configuration conf = getConfiguration();
48 assert conf != null;
49 final Map<Text, EosDocument> retval =
50 new HashMap<Text, EosDocument>();
51 final String metaKey = conf.get(META_FIELD_FOR_KEY_CONFIG_NAME);
52
53 if (LOG.isDebugEnabled()) {
54 LOG.debug("metaKey: " + metaKey);
55 }
56
57 if (metaKey == null || metaKey.length() == 0) {
58 final Text t = new Text("");
59 retval.put(t, doc);
60 } else {
61 final Map<String, List<String>> meta = doc.getMeta();
62 final List<String> metadata = meta.get(metaKey);
63 for (final String s : metadata) {
64 final Text t = new Text(s);
65 retval.put(t, doc);
66 }
67 }
68
69 return retval;
70 }
71
72 @Override
73 public void configure(
74 @SuppressWarnings("hiding") final Configuration config) {
75 LOG.info(config);
76 super.configure(config);
77 }
78 }