View Javadoc

1   /*
2    * Copyright 2011-2013 smartics, Kronseder & Reiner GmbH
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package de.smartics.tagcloud.collector;
17  
18  import static java.io.StreamTokenizer.TT_EOF;
19  import static java.io.StreamTokenizer.TT_WORD;
20  
21  import java.io.BufferedReader;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.io.InputStreamReader;
25  import java.io.Reader;
26  import java.io.StreamTokenizer;
27  
28  import org.codehaus.plexus.util.IOUtil;
29  
30  import de.smartics.tagcloud.TagCloud;
31  
32  /**
33   * Collects words by using a tokenizer.
34   *
35   * @author <a href="mailto:robert.reiner@smartics.de">Robert Reiner</a>
36   * @version $Revision:591 $
37   */
38  public final class TokenizerTagCollector implements TagCollector // NOPMD
39  {
40    // ********************************* Fields *********************************
41  
42    // --- constants ------------------------------------------------------------
43  
44    // --- members --------------------------------------------------------------
45  
46    /**
47     * The encoding to use to read from the input stream.
48     */
49    private final String encoding;
50  
51    /**
52     * The tag cloud to collect for.
53     */
54    private final TagCloud tagCloud;
55  
56    // ****************************** Initializer *******************************
57  
58    // ****************************** Constructors ******************************
59  
60    /**
61     * Default constructor.
62     */
63    public TokenizerTagCollector(final String encoding, final TagCloud tagCloud)
64    {
65      this.encoding = encoding;
66      this.tagCloud = tagCloud;
67    }
68  
69    // ****************************** Inner Classes *****************************
70  
71    // ********************************* Methods ********************************
72  
73    // --- init -----------------------------------------------------------------
74  
75    // --- get&set --------------------------------------------------------------
76  
77    // --- business -------------------------------------------------------------
78  
79    /**
80     * {@inheritDoc}
81     *
82     * @see de.smartics.tagcloud.collector.TagCollector#collect(java.io.InputStream)
83     */
84    @Override
85    public void collect(final InputStream input) throws IOException
86    {
87      final Reader reader =
88          new BufferedReader(new InputStreamReader(input, encoding));
89      try
90      {
91        final StreamTokenizer tokenizer = createTokenizer(reader);
92        tokenize(tokenizer);
93      }
94      finally
95      {
96        IOUtil.close(reader);
97      }
98    }
99  
100   private static StreamTokenizer createTokenizer(final Reader reader)
101   {
102     final StreamTokenizer tokenizer = new StreamTokenizer(reader);
103     tokenizer.slashSlashComments(true);
104     tokenizer.slashStarComments(true);
105     return tokenizer;
106   }
107 
108   private void tokenize(final StreamTokenizer tokenizer) throws IOException // NOPMD
109   {
110     boolean parsingStringLiteral = false;
111     boolean parsingCharacterLiteral = false;
112     boolean parsingAnnotationLiteral = false;
113     for (int token = tokenizer.nextToken(); token != TT_EOF; token =
114         tokenizer.nextToken())
115     {
116       switch (token)
117       {
118         case TT_WORD:
119           if (!(parsingStringLiteral || parsingCharacterLiteral))
120           {
121             if (!parsingAnnotationLiteral)
122             {
123               addToken(tokenizer.sval);
124             }
125             else
126             {
127               parsingAnnotationLiteral = false;
128             }
129           }
130           break;
131         case '"':
132           if (!parsingCharacterLiteral)
133           {
134             parsingStringLiteral = !parsingStringLiteral;
135           }
136           break;
137         case '\'':
138           if (!parsingStringLiteral)
139           {
140             parsingCharacterLiteral = !parsingCharacterLiteral;
141           }
142           break;
143         case '@':
144           parsingAnnotationLiteral = true;
145           break;
146         default:
147           break;
148       }
149     }
150   }
151 
152   private void addToken(final String token)
153   {
154     if (hasMoreThanOneChar(token) && doesNotContainDot(token))
155     {
156       tagCloud.addTag(token);
157     }
158   }
159 
160   private static boolean doesNotContainDot(final String token)
161   {
162     return token.indexOf('.') == -1;
163   }
164 
165   private static boolean hasMoreThanOneChar(final String token)
166   {
167     return token.length() > 1;
168   }
169 
170   /**
171    * {@inheritDoc}
172    *
173    * @see de.smartics.tagcloud.collector.TagCollector#getTagCloud()
174    */
175   @Override
176   public TagCloud getTagCloud()
177   {
178     return tagCloud;
179   }
180 
181   /**
182    * {@inheritDoc}
183    *
184    * @see de.smartics.tagcloud.collector.TagCollector#clear()
185    */
186   @Override
187   public void clear()
188   {
189     tagCloud.clear();
190   }
191 
192   // --- object basics --------------------------------------------------------
193 
194 }