1 /* Copyright (c) 2008 Sascha Kohlmann
2 *
3 * This program is free software: you can redistribute it and/or modify
4 * it under the terms of the GNU Affero General Public License as published by
5 * the Free Software Foundation, either version 3 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU Affero General Public License for more details.
12 *
13 * You should have received a copy of the GNU Affero General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16 package net.sf.eos.entity;
17
18 import net.sf.eos.analyzer.TextBuilder;
19
20 import java.util.Map;
21 import java.util.Set;
22
23 /**
24 * The {@code DictionaryBasedEntityRecognizer} uses a {@link Map} to
25 * recognize entities in a text. An entity is identified thru the
26 * {@link net.sf.eos.analyzer.Token#getType()} {@link #ENTITY_TYPE}. The ID
27 * coming in the map is stored in the meta data with the key
28 * {@link #ENTITY_ID_KEY}.
29 * @author Sascha Kohlmann
30 */
31 public interface DictionaryBasedEntityRecognizer extends EntityRecognizer {
32
33 /** ID meta key. */
34 @SuppressWarnings("nls")
35 static String ENTITY_ID_KEY =
36 DictionaryBasedEntityRecognizer.class.getName() + "." + ENTITY_TYPE;
37
38 // /** Configuration key for the maximum token. The value of the key must
39 // * follow the rules of an Java integer. */
40 // @SuppressWarnings("nls")
41 // static String MAX_TOKEN_CONFIG_NAME =
42 // "net.sf.eos.entity.DictionaryBasedEntityRecognizer.maxToken";
43
44 /**
45 * Set the entity map.
46 * @param entities the entity map
47 * @see net.sf.eos.trie.Trie
48 */
49 void setEntityMap(final Map<CharSequence, Set<CharSequence>> entities);
50
51 /**
52 * Return the entity map.
53 * @return the entity map. May be {@code null}
54 */
55 Map<CharSequence, Set<CharSequence>> getEntityMap();
56
57 /**
58 * Sets a builder. The implementation has default builder of instance
59 * {@link TextBuilder#SPACE_BUILDER} setted at construction time.
60 * @param builder a builder to set or {@code null}
61 */
62 void setTextBuilder(final TextBuilder builder);
63
64 /**
65 * Returns a setted builder.
66 * @return a setted builder or {@code null}.
67 */
68 TextBuilder getTextBuilder();
69
70 /**
71 * @return the maxToken
72 */
73 int getMaxToken();
74
75 /**
76 * @param maxToken the maxToken to set
77 * @throws IllegalArgumentException if and only if
78 * <em>{@literal token > 1}</em>
79 */
80 void setMaxToken(final int maxToken);
81 }