%line | %branch | |||||||||
---|---|---|---|---|---|---|---|---|---|---|
org.apache.turbine.util.parser.DataStreamParser |
|
|
1 | package org.apache.turbine.util.parser; |
|
2 | ||
3 | /* |
|
4 | * Copyright 2001-2005 The Apache Software Foundation. |
|
5 | * |
|
6 | * Licensed under the Apache License, Version 2.0 (the "License") |
|
7 | * you may not use this file except in compliance with the License. |
|
8 | * You may obtain a copy of the License at |
|
9 | * |
|
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
|
11 | * |
|
12 | * Unless required by applicable law or agreed to in writing, software |
|
13 | * distributed under the License is distributed on an "AS IS" BASIS, |
|
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
15 | * See the License for the specific language governing permissions and |
|
16 | * limitations under the License. |
|
17 | */ |
|
18 | ||
19 | import java.io.BufferedReader; |
|
20 | import java.io.IOException; |
|
21 | import java.io.InputStreamReader; |
|
22 | import java.io.Reader; |
|
23 | import java.io.StreamTokenizer; |
|
24 | ||
25 | import java.util.ArrayList; |
|
26 | import java.util.Collections; |
|
27 | import java.util.Iterator; |
|
28 | import java.util.List; |
|
29 | import java.util.NoSuchElementException; |
|
30 | ||
31 | import org.apache.commons.lang.exception.NestableRuntimeException; |
|
32 | ||
33 | /** |
|
34 | * DataStreamParser is used to parse a stream with a fixed format and |
|
35 | * generate ValueParser objects which can be used to extract the values |
|
36 | * in the desired type. |
|
37 | * |
|
38 | * <p>The class itself is abstract - a concrete subclass which implements |
|
39 | * the initTokenizer method such as CSVParser or TSVParser is required |
|
40 | * to use the functionality. |
|
41 | * |
|
42 | * <p>The class implements the java.util.Iterator interface for convenience. |
|
43 | * This allows simple use in a Velocity template for example: |
|
44 | * |
|
45 | * <pre> |
|
46 | * #foreach ($row in $datastream) |
|
47 | * Name: $row.Name |
|
48 | * Description: $row.Description |
|
49 | * #end |
|
50 | * </pre> |
|
51 | * |
|
52 | * @author <a href="mailto:sean@informage.net">Sean Legassick</a> |
|
53 | * @author <a href="mailto:martin@mvdb.net">Martin van den Bemt</a> |
|
54 | * @author <a href="mailto:hps@intermeta.de">Henning P. Schmiedehausen</a> |
|
55 | * @version $Id: DataStreamParser.java 280284 2005-09-12 07:57:42Z henning $ |
|
56 | */ |
|
57 | public abstract class DataStreamParser implements Iterator |
|
58 | { |
|
59 | /** |
|
60 | * The constant for empty fields |
|
61 | */ |
|
62 | protected static final String EMPTYFIELDNAME = "UNKNOWNFIELD"; |
|
63 | ||
64 | /** |
|
65 | * The list of column names. |
|
66 | */ |
|
67 | 56 | private List columnNames = Collections.EMPTY_LIST; |
68 | ||
69 | /** |
|
70 | * The stream tokenizer for reading values from the input reader. |
|
71 | */ |
|
72 | private StreamTokenizer tokenizer; |
|
73 | ||
74 | /** |
|
75 | * The parameter parser holding the values of columns for the current line. |
|
76 | */ |
|
77 | private ValueParser lineValues; |
|
78 | ||
79 | /** |
|
80 | * Indicates whether or not the tokenizer has read anything yet. |
|
81 | */ |
|
82 | 56 | private boolean neverRead = true; |
83 | ||
84 | /** |
|
85 | * The character encoding of the input |
|
86 | */ |
|
87 | private String characterEncoding; |
|
88 | ||
89 | /** |
|
90 | * The fieldseperator, which can be almost any char |
|
91 | */ |
|
92 | private char fieldSeparator; |
|
93 | ||
94 | /** |
|
95 | * Create a new DataStreamParser instance. Requires a Reader to read the |
|
96 | * comma-separated values from, a list of column names and a |
|
97 | * character encoding. |
|
98 | * |
|
99 | * @param in the input reader. |
|
100 | * @param columnNames a list of column names. |
|
101 | * @param characterEncoding the character encoding of the input. |
|
102 | */ |
|
103 | public DataStreamParser(Reader in, List columnNames, |
|
104 | String characterEncoding) |
|
105 | 56 | { |
106 | 56 | setColumnNames(columnNames); |
107 | ||
108 | 56 | this.characterEncoding = characterEncoding; |
109 | ||
110 | 56 | if (this.characterEncoding == null) |
111 | { |
|
112 | 6 | if (in instanceof InputStreamReader) |
113 | { |
|
114 | 2 | this.characterEncoding = ((InputStreamReader) in).getEncoding(); |
115 | } |
|
116 | ||
117 | 6 | if (this.characterEncoding == null) |
118 | { |
|
119 | // try and get the characterEncoding from the reader |
|
120 | 4 | this.characterEncoding = "US-ASCII"; |
121 | } |
|
122 | } |
|
123 | ||
124 | 56 | tokenizer = new StreamTokenizer(class="keyword">new BufferedReader(in)); |
125 | 56 | initTokenizer(tokenizer); |
126 | 56 | } |
127 | ||
128 | /** |
|
129 | * Initialize the StreamTokenizer instance used to read the lines |
|
130 | * from the input reader. This must be implemented in subclasses to |
|
131 | * set up other tokenizing properties. |
|
132 | * |
|
133 | * @param tokenizer the tokenizer to adjust |
|
134 | */ |
|
135 | protected void initTokenizer(StreamTokenizer tokenizer) |
|
136 | { |
|
137 | 56 | tokenizer.resetSyntax(); |
138 | ||
139 | // leave out the comma sign (,), we need it for empty fields |
|
140 | 56 | tokenizer.wordChars(' ', Character.MAX_VALUE); |
141 | ||
142 | // and set the quote mark as the quoting character |
|
143 | 56 | tokenizer.quoteChar('"'); |
144 | ||
145 | // and finally say that end of line is significant |
|
146 | 56 | tokenizer.eolIsSignificant(true); |
147 | 56 | } |
148 | ||
149 | /** |
|
150 | * This method must be called to setup the field seperator |
|
151 | * @param fieldSeparator the char which separates the fields |
|
152 | */ |
|
153 | public void setFieldSeparator(char fieldSeparator) |
|
154 | { |
|
155 | 56 | this.fieldSeparator = fieldSeparator; |
156 | // make this field also an ordinary char by default. |
|
157 | 56 | tokenizer.ordinaryChar(fieldSeparator); |
158 | 56 | } |
159 | ||
160 | /** |
|
161 | * Set the list of column names explicitly. |
|
162 | * |
|
163 | * @param columnNames A list of column names. |
|
164 | */ |
|
165 | public void setColumnNames(List columnNames) |
|
166 | { |
|
167 | 76 | if (columnNames != null) |
168 | { |
|
169 | 56 | this.columnNames = columnNames; |
170 | } |
|
171 | 76 | } |
172 | ||
173 | /** |
|
174 | * get the list of column names. |
|
175 | * |
|
176 | */ |
|
177 | public List getColumnNames() |
|
178 | { |
|
179 | 42 | return columnNames; |
180 | } |
|
181 | ||
182 | /** |
|
183 | * Read the list of column names from the input reader using the |
|
184 | * tokenizer. If fieldNames are empty, we use the current fieldNumber |
|
185 | * + the EMPTYFIELDNAME to make one up. |
|
186 | * |
|
187 | * @exception IOException an IOException occurred. |
|
188 | */ |
|
189 | public void readColumnNames() |
|
190 | throws IOException |
|
191 | { |
|
192 | 16 | List columnNames = new ArrayList(); |
193 | 16 | int fieldCounter = 0; |
194 | ||
195 | 16 | if (hasNextRow()) |
196 | { |
|
197 | 16 | String colName = null; |
198 | 16 | boolean foundEol = false; |
199 | ||
200 | 306 | while(!foundEol) |
201 | { |
|
202 | 282 | tokenizer.nextToken(); |
203 | ||
204 | 282 | if (tokenizer.ttype == '"' |
205 | || tokenizer.ttype == StreamTokenizer.TT_WORD) |
|
206 | { |
|
207 | // tokenizer.ttype is either '"' or TT_WORD |
|
208 | 134 | colName = tokenizer.sval; |
209 | } |
|
210 | else |
|
211 | { |
|
212 | // fieldSeparator, EOL or EOF |
|
213 | 148 | fieldCounter++; |
214 | ||
215 | 148 | if (colName == null) |
216 | { |
|
217 | 14 | colName = EMPTYFIELDNAME + fieldCounter; |
218 | } |
|
219 | ||
220 | 148 | columnNames.add(colName); |
221 | 148 | colName = null; |
222 | } |
|
223 | ||
224 | // EOL and EOF are checked independently from existing fields. |
|
225 | 282 | if (tokenizer.ttype == StreamTokenizer.TT_EOL) |
226 | { |
|
227 | 12 | foundEol = true; |
228 | } |
|
229 | 270 | else if (tokenizer.ttype == StreamTokenizer.TT_EOF) |
230 | { |
|
231 | // Keep this token in the tokenizer for hasNext() |
|
232 | 4 | tokenizer.pushBack(); |
233 | 4 | foundEol = true; |
234 | } |
|
235 | } |
|
236 | ||
237 | 16 | setColumnNames(columnNames); |
238 | } |
|
239 | 16 | } |
240 | ||
241 | /** |
|
242 | * Determine whether a further row of values exists in the input. |
|
243 | * |
|
244 | * @return true if the input has more rows. |
|
245 | * @exception IOException an IOException occurred. |
|
246 | */ |
|
247 | public boolean hasNextRow() |
|
248 | throws IOException |
|
249 | { |
|
250 | // check for end of line ensures that an empty last line doesn't |
|
251 | // give a false positive for hasNextRow |
|
252 | 232 | if (neverRead || tokenizer.ttype == StreamTokenizer.TT_EOL) |
253 | { |
|
254 | 134 | tokenizer.nextToken(); |
255 | 134 | tokenizer.pushBack(); |
256 | 134 | neverRead = false; |
257 | } |
|
258 | 232 | return tokenizer.ttype != StreamTokenizer.TT_EOF; |
259 | } |
|
260 | ||
261 | /** |
|
262 | * Returns a ValueParser object containing the next row of values. |
|
263 | * |
|
264 | * @return a ValueParser object. |
|
265 | * @exception IOException an IOException occurred. |
|
266 | * @exception NoSuchElementException there are no more rows in the input. |
|
267 | */ |
|
268 | public ValueParser nextRow() |
|
269 | throws IOException, NoSuchElementException |
|
270 | { |
|
271 | 74 | if (!hasNextRow()) |
272 | { |
|
273 | 2 | throw new NoSuchElementException(); |
274 | } |
|
275 | ||
276 | 72 | if (lineValues == null) |
277 | { |
|
278 | 42 | lineValues = new BaseValueParser(characterEncoding); |
279 | } |
|
280 | else |
|
281 | { |
|
282 | 30 | lineValues.clear(); |
283 | } |
|
284 | ||
285 | 72 | Iterator it = columnNames.iterator(); |
286 | ||
287 | 72 | String currVal = ""; |
288 | 72 | String colName = null; |
289 | ||
290 | 72 | boolean foundEol = false; |
291 | 1398 | while (!foundEol || it.hasNext()) |
292 | { |
|
293 | 1290 | if (!foundEol) |
294 | { |
|
295 | 1188 | tokenizer.nextToken(); |
296 | } |
|
297 | ||
298 | 1290 | if (colName == null && it.hasNext()) |
299 | { |
|
300 | 592 | colName = String.valueOf(it.next()); |
301 | } |
|
302 | ||
303 | 1290 | if (tokenizer.ttype == '"' |
304 | || tokenizer.ttype == StreamTokenizer.TT_WORD) |
|
305 | { |
|
306 | // tokenizer.ttype is either '"' or TT_WORD |
|
307 | 588 | currVal = tokenizer.sval; |
308 | } |
|
309 | else |
|
310 | { |
|
311 | // fieldSeparator, EOL or EOF |
|
312 | 702 | lineValues.add(colName, currVal); |
313 | 702 | colName = null; |
314 | 702 | currVal = ""; |
315 | } |
|
316 | ||
317 | // EOL and EOF are checked independently from existing fields. |
|
318 | 1290 | if (tokenizer.ttype == StreamTokenizer.TT_EOL) |
319 | { |
|
320 | 168 | foundEol = true; |
321 | } |
|
322 | 1122 | else if (tokenizer.ttype == StreamTokenizer.TT_EOF) |
323 | { |
|
324 | // Keep this token in the tokenizer for hasNext() |
|
325 | 6 | tokenizer.pushBack(); |
326 | 6 | foundEol = true; |
327 | } |
|
328 | } |
|
329 | ||
330 | 72 | return lineValues; |
331 | } |
|
332 | ||
333 | /** |
|
334 | * Determine whether a further row of values exists in the input. |
|
335 | * |
|
336 | * @return true if the input has more rows. |
|
337 | */ |
|
338 | public boolean hasNext() |
|
339 | { |
|
340 | 142 | boolean hasNext = false; |
341 | ||
342 | try |
|
343 | { |
|
344 | 142 | hasNext = hasNextRow(); |
345 | 71 | } |
346 | 0 | catch (IOException e) |
347 | { |
|
348 | 0 | throw new NestableRuntimeException(e); |
349 | 71 | } |
350 | ||
351 | 142 | return hasNext; |
352 | } |
|
353 | ||
354 | /** |
|
355 | * Returns a ValueParser object containing the next row of values. |
|
356 | * |
|
357 | * @return a ValueParser object as an Object. |
|
358 | * @exception NoSuchElementException there are no more rows in the input |
|
359 | * or an IOException occurred. |
|
360 | */ |
|
361 | public Object next() |
|
362 | throws NoSuchElementException |
|
363 | { |
|
364 | 74 | Object nextRow = null; |
365 | ||
366 | try |
|
367 | { |
|
368 | 74 | nextRow = nextRow(); |
369 | 36 | } |
370 | 0 | catch (IOException e) |
371 | { |
|
372 | 0 | throw new NestableRuntimeException(e); |
373 | 36 | } |
374 | ||
375 | 72 | return nextRow; |
376 | } |
|
377 | ||
378 | /** |
|
379 | * The optional Iterator.remove method is not supported. |
|
380 | * |
|
381 | * @exception UnsupportedOperationException the operation is not supported. |
|
382 | */ |
|
383 | public void remove() |
|
384 | throws UnsupportedOperationException |
|
385 | { |
|
386 | 2 | throw new UnsupportedOperationException(); |
387 | } |
|
388 | } |
This report is generated by jcoverage, Maven and Maven JCoverage Plugin. |