1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package de.smartics.tagcloud.collector;
17
18 import static java.io.StreamTokenizer.TT_EOF;
19 import static java.io.StreamTokenizer.TT_WORD;
20
21 import java.io.BufferedReader;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.io.InputStreamReader;
25 import java.io.Reader;
26 import java.io.StreamTokenizer;
27
28 import org.codehaus.plexus.util.IOUtil;
29
30 import de.smartics.tagcloud.TagCloud;
31
32
33
34
35
36
37
38 public final class TokenizerTagCollector implements TagCollector
39 {
40
41
42
43
44
45
46
47
48
49 private final String encoding;
50
51
52
53
54 private final TagCloud tagCloud;
55
56
57
58
59
60
61
62
63 public TokenizerTagCollector(final String encoding, final TagCloud tagCloud)
64 {
65 this.encoding = encoding;
66 this.tagCloud = tagCloud;
67 }
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84 @Override
85 public void collect(final InputStream input) throws IOException
86 {
87 final Reader reader =
88 new BufferedReader(new InputStreamReader(input, encoding));
89 try
90 {
91 final StreamTokenizer tokenizer = createTokenizer(reader);
92 tokenize(tokenizer);
93 }
94 finally
95 {
96 IOUtil.close(reader);
97 }
98 }
99
100 private static StreamTokenizer createTokenizer(final Reader reader)
101 {
102 final StreamTokenizer tokenizer = new StreamTokenizer(reader);
103 tokenizer.slashSlashComments(true);
104 tokenizer.slashStarComments(true);
105 return tokenizer;
106 }
107
108 private void tokenize(final StreamTokenizer tokenizer) throws IOException
109 {
110 boolean parsingStringLiteral = false;
111 boolean parsingCharacterLiteral = false;
112 boolean parsingAnnotationLiteral = false;
113 for (int token = tokenizer.nextToken(); token != TT_EOF; token =
114 tokenizer.nextToken())
115 {
116 switch (token)
117 {
118 case TT_WORD:
119 if (!(parsingStringLiteral || parsingCharacterLiteral))
120 {
121 if (!parsingAnnotationLiteral)
122 {
123 addToken(tokenizer.sval);
124 }
125 else
126 {
127 parsingAnnotationLiteral = false;
128 }
129 }
130 break;
131 case '"':
132 if (!parsingCharacterLiteral)
133 {
134 parsingStringLiteral = !parsingStringLiteral;
135 }
136 break;
137 case '\'':
138 if (!parsingStringLiteral)
139 {
140 parsingCharacterLiteral = !parsingCharacterLiteral;
141 }
142 break;
143 case '@':
144 parsingAnnotationLiteral = true;
145 break;
146 default:
147 break;
148 }
149 }
150 }
151
152 private void addToken(final String token)
153 {
154 if (hasMoreThanOneChar(token) && doesNotContainDot(token))
155 {
156 tagCloud.addTag(token);
157 }
158 }
159
160 private static boolean doesNotContainDot(final String token)
161 {
162 return token.indexOf('.') == -1;
163 }
164
165 private static boolean hasMoreThanOneChar(final String token)
166 {
167 return token.length() > 1;
168 }
169
170
171
172
173
174
175 @Override
176 public TagCloud getTagCloud()
177 {
178 return tagCloud;
179 }
180
181
182
183
184
185
186 @Override
187 public void clear()
188 {
189 tagCloud.clear();
190 }
191
192
193
194 }