1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package net.sf.eos.sentence;
17
18 import static net.sf.eos.config.ConfigurationKey.Type.CLASSNAME;
19
20 import org.apache.commons.logging.Log;
21 import org.apache.commons.logging.LogFactory;
22
23 import net.sf.eos.EosException;
24 import net.sf.eos.analyzer.ResettableTokenizer;
25 import net.sf.eos.analyzer.SentenceTokenizer;
26 import net.sf.eos.analyzer.TextBuilder;
27 import net.sf.eos.analyzer.TokenizerException;
28 import net.sf.eos.config.Configuration;
29 import net.sf.eos.config.ConfigurationKey;
30 import net.sf.eos.config.Configured;
31 import net.sf.eos.config.FactoryMethod;
32 import net.sf.eos.document.EosDocument;
33
34 import java.security.MessageDigest;
35 import java.security.NoSuchAlgorithmException;
36 import java.util.Map;
37
38
39
40
41
42
43
44
45 public abstract class Sentencer extends Configured {
46
47
48 private static final Log LOG =
49 LogFactory.getLog(Sentencer.class.getName());
50
51
52 @SuppressWarnings("nls")
53 public static final String DEFAULT_MESSAGE_DIGEST = "md5";
54
55
56 @SuppressWarnings("nls")
57 @ConfigurationKey(type=CLASSNAME,
58 defaultValue=DEFAULT_MESSAGE_DIGEST,
59 description="The message digest.")
60 public static final String MESSAGE_DIGEST_CONFIG_NAME =
61 "net.sf.eos.sentence.Sentencer.messageDigest";
62
63
64
65 @SuppressWarnings("nls")
66 @ConfigurationKey(type=CLASSNAME,
67 description="Configuration key of the sentencer.")
68 public final static String SENTENCER_IMPL_CONFIG_NAME =
69 "net.sf.eos.sentence.Sentencer.impl";
70
71
72
73
74
75
76
77
78
79
80
81
82 @SuppressWarnings("nls")
83 @FactoryMethod(key=SENTENCER_IMPL_CONFIG_NAME,
84 implementation=DefaultSentencer.class)
85 public final static Sentencer newInstance(final Configuration config)
86 throws EosException {
87
88 final Thread t = Thread.currentThread();
89 ClassLoader classLoader = t.getContextClassLoader();
90 if (classLoader == null) {
91 classLoader = Sentencer.class.getClassLoader();
92 }
93
94 final String clazzName = config.get(SENTENCER_IMPL_CONFIG_NAME,
95 DefaultSentencer.class.getName());
96
97 try {
98 final Class<? extends Sentencer> clazz =
99 (Class<? extends Sentencer>)
100 Class.forName(clazzName, true, classLoader);
101 try {
102
103 final Sentencer sentencer = clazz.newInstance();
104 sentencer.configure(config);
105 if (LOG.isDebugEnabled()) {
106 LOG.debug("Sentencer instance: "
107 + sentencer.getClass().getName());
108 }
109 return sentencer;
110
111 } catch (final InstantiationException e) {
112 throw new TokenizerException(e);
113 } catch (final IllegalAccessException e) {
114 throw new TokenizerException(e);
115 }
116 } catch (final ClassNotFoundException e) {
117 throw new TokenizerException(e);
118 }
119 }
120
121
122
123
124 protected Sentencer() {
125 super();
126 }
127
128
129
130
131
132
133
134
135
136
137 protected MessageDigest createDigester() throws EosException {
138 try {
139 final Configuration config = getConfiguration();
140 String algorithm = DEFAULT_MESSAGE_DIGEST;
141 if (config != null) {
142 algorithm = config.get(MESSAGE_DIGEST_CONFIG_NAME,
143 DEFAULT_MESSAGE_DIGEST);
144 }
145 MessageDigest md;
146 md = MessageDigest.getInstance(algorithm);
147 return md;
148 } catch (final NoSuchAlgorithmException e) {
149 throw new EosException(e);
150 }
151 }
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168 public abstract Map<String, EosDocument>
169 toSentenceDocuments(final EosDocument doc,
170 final SentenceTokenizer sentencer,
171 final ResettableTokenizer tokenizer,
172 final TextBuilder builder)
173 throws EosException;
174 }