1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package net.sf.eos.search;
17
18 import java.util.ArrayList;
19 import java.util.List;
20 import java.util.StringTokenizer;
21
22 import net.sf.eos.Function;
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43 public class UserQueryParser implements Function<String, List<String> > {
44
45
46
47
48
49
50
51 @SuppressWarnings("nls")
52 final List<String> parseUserQuery(final String query) {
53
54 if (query == null) {
55 return new ArrayList<String>(0);
56 }
57
58 final String adjusted = " " + removeNewLines(query);
59 if (! query.contains("\"")) {
60 return splitSimpleTerms(adjusted);
61 }
62
63 final List<String> retval = new ArrayList<String>();
64 boolean inQuote = false;
65 for (final StringTokenizer st = new StringTokenizer(adjusted, "\"");
66 st.hasMoreElements(); ) {
67 final String part = st.nextToken();
68 if (inQuote) {
69 final String s = adjustWhiteSpace(part);
70 if (s.length() != 0) {
71 retval.add(s);
72 }
73 inQuote = false;
74 } else {
75 final List<String> terms = splitSimpleTerms(part);
76 retval.addAll(terms);
77 inQuote = true;
78 }
79 }
80
81 return retval;
82 }
83
84 @SuppressWarnings("nls")
85 final static String adjustWhiteSpace(final String s) {
86 final StringBuilder sb = new StringBuilder();
87 for (final StringTokenizer st = new StringTokenizer(s);
88 st.hasMoreTokens(); ) {
89 final String part = st.nextToken();
90 sb.append(part);
91 sb.append(" ");
92 }
93 return sb.toString().trim();
94 }
95
96 @SuppressWarnings("nls")
97 final static String removeNewLines(final String s) {
98 assert s != null;
99 final StringBuilder sb = new StringBuilder();
100 for (final StringTokenizer st = new StringTokenizer(s, "\n\r\f");
101 st.hasMoreTokens(); ) {
102 final String part = st.nextToken();
103 sb.append(part);
104 }
105 return sb.toString();
106 }
107
108 @SuppressWarnings("nls")
109 final static List<String> splitSimpleTerms(final String terms) {
110 final List<String> retval = new ArrayList<String>();
111 if (terms == null) {
112 return retval;
113 }
114 for (final StringTokenizer st =
115 new StringTokenizer(terms,
116 " \t\u001c\u001d\u001e\u001f\u00a0\u2007\u202f");
117 st.hasMoreTokens(); ) {
118 final String part = st.nextToken();
119 retval.add(part);
120 }
121 return retval;
122 }
123
124
125
126
127
128
129
130 public List<String> apply(final String query) {
131 return parseUserQuery(query);
132 }
133 }