View Javadoc

1   /* Copyright (c) 2008 Sascha Kohlmann
2    *
3    * This program is free software: you can redistribute it and/or modify
4    * it under the terms of the GNU Affero General Public License as published by
5    * the Free Software Foundation, either version 3 of the License, or
6    * (at your option) any later version.
7    *
8    * This program is distributed in the hope that it will be useful,
9    * but WITHOUT ANY WARRANTY; without even the implied warranty of
10   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11   * GNU Affero General Public License for more details.
12   *
13   * You should have received a copy of the GNU Affero General Public License
14   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
15   */
16  package net.sf.eos.analyzer.lucene;
17  
18  import net.sf.eos.analyzer.AbstractToken;
19  import net.sf.eos.analyzer.Token;
20  import net.sf.eos.analyzer.TokenFilter;
21  import net.sf.eos.analyzer.Tokenizer;
22  import net.sf.eos.analyzer.TokenizerException;
23  
24  import java.io.IOException;
25  
26  /**
27   * Wraps the functionality of the &#949;&#959;s {@link Tokenizer} for the
28   * reuse of Lucene tokenizer.
29   * @author Sascha Kohlmann
30   */
31  public final class LuceneTokenizerWrapper extends TokenFilter {
32  
33      private final org.apache.lucene.analysis.Tokenizer delagate;
34      private org.apache.lucene.analysis.Token luceneToken;
35  
36      /** Creates a new wrapper.
37       * @param tokenizer a Lucene tokenizer to reuse in the &#949;&#959;s
38       *                  environment
39       */
40      public LuceneTokenizerWrapper(final org.apache.lucene.analysis.Tokenizer tokenizer) {
41          this(new NullTokenizer(), tokenizer);
42      }
43  
44      /**
45       * Creates a new wrapper.
46       * @param source a &#949;&#959;s tokenizer
47       * @param tokenizer a Lucene tokenizer to reuse in the &#949;&#959;s
48       *                  environment
49       */
50      public LuceneTokenizerWrapper(
51              final Tokenizer source,
52              final org.apache.lucene.analysis.Tokenizer tokenizer) {
53          super(source);
54          this.delagate = tokenizer;
55      }
56  
57      /*
58       * @see net.sf.eos.analyzer.TokenFilter#next()
59       */
60      @Override
61      public Token next() throws TokenizerException {
62          if (this.luceneToken == null) {
63              nextLuceneToken();
64              if (this.luceneToken != null) {
65                  final org.apache.lucene.analysis.Token retval =
66                      (org.apache.lucene.analysis.Token) this.luceneToken.clone();
67                   return new LuceneTokenWrapper(retval);
68                  
69              }
70          }
71  
72          if (this.luceneToken == null) {
73              return null;
74          }
75  
76          try {
77              this.luceneToken = this.delagate.next(this.luceneToken);
78              if (this.luceneToken == null) {
79                  nextLuceneToken();
80                  if (this.luceneToken != null) {
81                      final org.apache.lucene.analysis.Token retval =
82                          (org.apache.lucene.analysis.Token) this.luceneToken.clone();
83                      return new LuceneTokenWrapper(retval);
84                  }
85                  return null;
86              }
87          } catch (final IOException e) {
88              throw new TokenizerException(e);
89          }
90  
91          final org.apache.lucene.analysis.Token retval =
92             (org.apache.lucene.analysis.Token) this.luceneToken.clone();
93          return new LuceneTokenWrapper(retval);
94      }
95  
96      final void nextLuceneToken() throws TokenizerException {
97          final Tokenizer tokenizer = getSource();
98          if (tokenizer.getClass() == NullTokenizer.class) {
99              try {
100                 this.luceneToken = this.delagate.next();
101             } catch (final IOException e) {
102                 throw new TokenizerException(e);
103             }
104         } else {
105             final Token token = tokenizer.next();
106             if (token == null) {
107                 return;
108             }
109             final CharSequence seq = token.getTokenText();
110             this.luceneToken = new org.apache.lucene.analysis.Token("" + seq,
111                     0, seq.length());
112         }
113     }
114 
115     private final static class LuceneTokenWrapper extends AbstractToken {
116 
117         private final org.apache.lucene.analysis.Token token;
118         private final String type;
119 
120         public LuceneTokenWrapper(final org.apache.lucene.analysis.Token token)
121         {
122             super("");
123             this.token = token;
124             this.type = token.type();
125         }
126 
127         @Override
128         public CharSequence getTokenText() {
129             final char[] termBuffer = this.token.termBuffer();
130             final int termLength = this.token.termLength();
131             return new String(termBuffer, 0, termLength);
132         }
133 
134         @Override
135         public String getType() {
136             return this.type;
137         }
138     }
139 }