View Javadoc

1   /* Copyright (c) 2008 Sascha Kohlmann
2    *
3    * This program is free software: you can redistribute it and/or modify
4    * it under the terms of the GNU Affero General Public License as published by
5    * the Free Software Foundation, either version 3 of the License, or
6    * (at your option) any later version.
7    *
8    * This program is distributed in the hope that it will be useful,
9    * but WITHOUT ANY WARRANTY; without even the implied warranty of
10   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11   * GNU Affero General Public License for more details.
12   *
13   * You should have received a copy of the GNU Affero General Public License
14   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
15   */
16  package net.sf.eos.analyzer;
17  
18  
19  import org.apache.commons.logging.Log;
20  import org.apache.commons.logging.LogFactory;
21  
22  import net.sf.eos.EosException;
23  import net.sf.eos.config.Configuration;
24  import net.sf.eos.config.ConfigurationKey;
25  import static net.sf.eos.config.ConfigurationKey.Type.CLASSNAME;
26  import net.sf.eos.config.Configured;
27  import net.sf.eos.config.FactoryMethod;
28  
29  import java.util.Arrays;
30  import java.util.List;
31  
32  /**
33   * Implementation creates new text sequences from {@link Token}- or
34   * {@link CharSequence}-lists. Use {@link #newInstance(Configuration)} to
35   * create a new instance.
36   * @author Sascha Kohlmann
37   */
38  public abstract class TextBuilder extends Configured {
39  
40      /** For logging. */
41      private static final Log LOG = LogFactory.getLog(TextBuilder.class);
42  
43      /** The configuration key name for the classname of the builder.
44       * @see #newInstance(Configuration) */
45      @SuppressWarnings("nls")
46      @ConfigurationKey(type=CLASSNAME,
47                              description="Instances are used to create a new text "
48                                          + "thru Token concationation.")
49      public final static String TEXT_BUILDER_IMPL_CONFIG_NAME =
50          "net.sf.eos.analyzer.TextBuilder.impl";
51  
52      /**
53       * Simple implementation concats all texts from the tokens delimited
54       * by space (ASCII <tt>0x20</tt>).
55       */
56      @SuppressWarnings("nls")
57      public static final TextBuilder SPACE_BUILDER = new SpaceBuilder();
58  
59      /**
60       * Simple implementation concats all texts from the tokens delimited
61       * by space (ASCII <tt>0x20</tt>).
62       */
63      public static final class SpaceBuilder extends TextBuilder {
64          @SuppressWarnings("nls")
65          public final static String SPACE = new String(new char[] {0x20});
66          @Override
67          public CharSequence buildText(final List<Token> tokens) {
68              final StringBuilder sb = new StringBuilder();
69              for (final Token token : tokens) {
70                  final CharSequence text = token.getTokenText();
71                  sb.append(text);
72                  sb.append(SPACE);
73              }
74              return sb.length() > 0 ? sb.subSequence(0, sb.length() - 1) : "";
75          }
76          @Override
77          public CharSequence buildText(final Token... tokens) {
78              return buildText(Arrays.asList(tokens));
79          }
80          @Override
81          public CharSequence buildText(final CharSequence... seq) {
82              final StringBuilder sb = new StringBuilder();
83              for (final CharSequence cs : seq) {
84                  sb.append(cs);
85                  sb.append(SPACE);
86              }
87              return sb.length() > 0 ? sb.subSequence(0, sb.length() - 1) : "";
88          }
89      };
90  
91      /**
92       * Creates a new instance of a of the builder. If the
93       * {@code Configuration} contains a key
94       * {@link #TEXT_BUILDER_IMPL_CONFIG_NAME} a new instance of the
95       * classname of the value will instantiate. The
96       * {@link #SPACE_BUILDER} will instantiate if there is no
97       * value setted.
98       * @param config the configuration
99       * @return a new instance
100      * @throws EosException if it is not possible to instantiate an instance
101      */
102     @SuppressWarnings("nls")
103     @FactoryMethod(key=TEXT_BUILDER_IMPL_CONFIG_NAME,
104                    implementation=SpaceBuilder.class)
105     public final static TextBuilder newInstance(final Configuration config)
106             throws EosException {
107 
108         final Thread t = Thread.currentThread();
109         ClassLoader classLoader = t.getContextClassLoader();
110         if (classLoader == null) {
111             classLoader = TextBuilder.class.getClassLoader();
112         }
113 
114         final String clazzName = config.get(TEXT_BUILDER_IMPL_CONFIG_NAME,
115                                             SPACE_BUILDER.getClass().getName());
116         if (clazzName.equals(SPACE_BUILDER.getClass().getName())) {
117             return SPACE_BUILDER;
118         }
119 
120         try {
121             final Class<? extends TextBuilder> clazz = 
122                 (Class<? extends TextBuilder>) 
123                     Class.forName(clazzName, true, classLoader);
124             try {
125 
126                 final TextBuilder textBuilder = clazz.newInstance();
127                 textBuilder.configure(config);
128                 if (LOG.isDebugEnabled()) {
129                     LOG.debug("TextBuilder instance: "
130                                + textBuilder.getClass().getName());
131                 }
132                 return textBuilder;
133 
134             } catch (final InstantiationException e) {
135                 throw new TokenizerException(e);
136             } catch (final IllegalAccessException e) {
137                 throw new TokenizerException(e);
138             }
139         } catch (final ClassNotFoundException e) {
140             throw new TokenizerException(e);
141         }
142     }
143 
144     /**
145      * Creates a new text from the given token.
146      * @param tokens a list of token. If <em>tokens</em> is {@code null}
147      *               an exception will raise.
148      * @return a new text, never {@code null}
149      */
150     public abstract CharSequence buildText(final List<Token> tokens);
151 
152     /**
153      * Creates a new text from the given token.
154      * @param tokens a list of token If <em>tokens</em> is {@code null}
155      *               an exception will raise.
156      * @return a new text, never {@code null}
157      */
158     public abstract CharSequence buildText(final Token... tokens);
159 
160     /**
161      * Creates a new text from the given {@code CharSequence}.
162      * @param seq a list of {@code CharSequence} If <em>tokens</em> is
163      *            {@code null} an exception will raise.
164      * @return a new text, never {@code null}
165      */
166     public abstract CharSequence buildText(final CharSequence... seq);
167 }