View Javadoc

1   /* Copyright (c) 2008 Sascha Kohlmann
2    *
3    * This program is free software: you can redistribute it and/or modify
4    * it under the terms of the GNU Affero General Public License as published by
5    * the Free Software Foundation, either version 3 of the License, or
6    * (at your option) any later version.
7    *
8    * This program is distributed in the hope that it will be useful,
9    * but WITHOUT ANY WARRANTY; without even the implied warranty of
10   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11   * GNU Affero General Public License for more details.
12   *
13   * You should have received a copy of the GNU Affero General Public License
14   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
15   */
16  package net.sf.eos.entity;
17  
18  import net.sf.eos.analyzer.TextBuilder;
19  
20  import java.util.Map;
21  import java.util.Set;
22  
23  /**
24   * The {@code DictionaryBasedEntityRecognizer} uses a {@link Map} to
25   * recognize entities in a text. An entity is identified thru the 
26   * {@link net.sf.eos.analyzer.Token#getType()} {@link #ENTITY_TYPE}. The ID
27   * coming in the map is stored in the meta data with the key
28   * {@link #ENTITY_ID_KEY}.
29   * @author Sascha Kohlmann
30   */
31  public interface DictionaryBasedEntityRecognizer extends EntityRecognizer {
32  
33      /** ID meta key. */
34      @SuppressWarnings("nls")
35      static String ENTITY_ID_KEY = 
36          DictionaryBasedEntityRecognizer.class.getName() + "." + ENTITY_TYPE;
37  
38  //    /** Configuration key for the maximum token. The value of the key must
39  //     * follow the rules of an Java integer. */
40  //    @SuppressWarnings("nls")
41  //    static String MAX_TOKEN_CONFIG_NAME = 
42  //        "net.sf.eos.entity.DictionaryBasedEntityRecognizer.maxToken";
43  
44      /**
45       * Set the entity map.
46       * @param entities the entity map
47       * @see net.sf.eos.trie.Trie
48       */
49      void setEntityMap(final Map<CharSequence, Set<CharSequence>> entities);
50  
51      /**
52       * Return the entity map.
53       * @return the entity map. May be {@code null}
54       */
55      Map<CharSequence, Set<CharSequence>> getEntityMap();
56  
57      /**
58       * Sets a builder. The implementation has default builder of instance
59       * {@link TextBuilder#SPACE_BUILDER} setted at construction time.
60       * @param builder a builder to set or {@code null}
61       */
62      void setTextBuilder(final TextBuilder builder);
63  
64      /**
65       * Returns a setted builder.
66       * @return a setted builder or {@code null}.
67       */
68      TextBuilder getTextBuilder();
69  
70      /**
71       * @return the maxToken
72       */
73      int getMaxToken();
74  
75      /**
76       * @param maxToken the maxToken to set
77       * @throws IllegalArgumentException if and only if 
78       *                                  <em>{@literal token > 1}</em>
79       */
80      void setMaxToken(final int maxToken);
81  }