1 package org.apache.turbine.util.parser;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 import java.io.BufferedReader;
20 import java.io.IOException;
21 import java.io.InputStreamReader;
22 import java.io.Reader;
23 import java.io.StreamTokenizer;
24
25 import java.util.ArrayList;
26 import java.util.Collections;
27 import java.util.Iterator;
28 import java.util.List;
29 import java.util.NoSuchElementException;
30
31 import org.apache.commons.lang.exception.NestableRuntimeException;
32
33 /***
34 * DataStreamParser is used to parse a stream with a fixed format and
35 * generate ValueParser objects which can be used to extract the values
36 * in the desired type.
37 *
38 * <p>The class itself is abstract - a concrete subclass which implements
39 * the initTokenizer method such as CSVParser or TSVParser is required
40 * to use the functionality.
41 *
42 * <p>The class implements the java.util.Iterator interface for convenience.
43 * This allows simple use in a Velocity template for example:
44 *
45 * <pre>
46 * #foreach ($row in $datastream)
47 * Name: $row.Name
48 * Description: $row.Description
49 * #end
50 * </pre>
51 *
52 * @author <a href="mailto:sean@informage.net">Sean Legassick</a>
53 * @author <a href="mailto:martin@mvdb.net">Martin van den Bemt</a>
54 * @author <a href="mailto:hps@intermeta.de">Henning P. Schmiedehausen</a>
55 * @version $Id: DataStreamParser.java 280284 2005-09-12 07:57:42Z henning $
56 */
57 public abstract class DataStreamParser implements Iterator
58 {
59 /***
60 * The constant for empty fields
61 */
62 protected static final String EMPTYFIELDNAME = "UNKNOWNFIELD";
63
64 /***
65 * The list of column names.
66 */
67 private List columnNames = Collections.EMPTY_LIST;
68
69 /***
70 * The stream tokenizer for reading values from the input reader.
71 */
72 private StreamTokenizer tokenizer;
73
74 /***
75 * The parameter parser holding the values of columns for the current line.
76 */
77 private ValueParser lineValues;
78
79 /***
80 * Indicates whether or not the tokenizer has read anything yet.
81 */
82 private boolean neverRead = true;
83
84 /***
85 * The character encoding of the input
86 */
87 private String characterEncoding;
88
89 /***
90 * The fieldseperator, which can be almost any char
91 */
92 private char fieldSeparator;
93
94 /***
95 * Create a new DataStreamParser instance. Requires a Reader to read the
96 * comma-separated values from, a list of column names and a
97 * character encoding.
98 *
99 * @param in the input reader.
100 * @param columnNames a list of column names.
101 * @param characterEncoding the character encoding of the input.
102 */
103 public DataStreamParser(Reader in, List columnNames,
104 String characterEncoding)
105 {
106 setColumnNames(columnNames);
107
108 this.characterEncoding = characterEncoding;
109
110 if (this.characterEncoding == null)
111 {
112 if (in instanceof InputStreamReader)
113 {
114 this.characterEncoding = ((InputStreamReader) in).getEncoding();
115 }
116
117 if (this.characterEncoding == null)
118 {
119
120 this.characterEncoding = "US-ASCII";
121 }
122 }
123
124 tokenizer = new StreamTokenizer(new BufferedReader(in));
125 initTokenizer(tokenizer);
126 }
127
128 /***
129 * Initialize the StreamTokenizer instance used to read the lines
130 * from the input reader. This must be implemented in subclasses to
131 * set up other tokenizing properties.
132 *
133 * @param tokenizer the tokenizer to adjust
134 */
135 protected void initTokenizer(StreamTokenizer tokenizer)
136 {
137 tokenizer.resetSyntax();
138
139
140 tokenizer.wordChars(' ', Character.MAX_VALUE);
141
142
143 tokenizer.quoteChar('"');
144
145
146 tokenizer.eolIsSignificant(true);
147 }
148
149 /***
150 * This method must be called to setup the field seperator
151 * @param fieldSeparator the char which separates the fields
152 */
153 public void setFieldSeparator(char fieldSeparator)
154 {
155 this.fieldSeparator = fieldSeparator;
156
157 tokenizer.ordinaryChar(fieldSeparator);
158 }
159
160 /***
161 * Set the list of column names explicitly.
162 *
163 * @param columnNames A list of column names.
164 */
165 public void setColumnNames(List columnNames)
166 {
167 if (columnNames != null)
168 {
169 this.columnNames = columnNames;
170 }
171 }
172
173 /***
174 * get the list of column names.
175 *
176 */
177 public List getColumnNames()
178 {
179 return columnNames;
180 }
181
182 /***
183 * Read the list of column names from the input reader using the
184 * tokenizer. If fieldNames are empty, we use the current fieldNumber
185 * + the EMPTYFIELDNAME to make one up.
186 *
187 * @exception IOException an IOException occurred.
188 */
189 public void readColumnNames()
190 throws IOException
191 {
192 List columnNames = new ArrayList();
193 int fieldCounter = 0;
194
195 if (hasNextRow())
196 {
197 String colName = null;
198 boolean foundEol = false;
199
200 while(!foundEol)
201 {
202 tokenizer.nextToken();
203
204 if (tokenizer.ttype == '"'
205 || tokenizer.ttype == StreamTokenizer.TT_WORD)
206 {
207
208 colName = tokenizer.sval;
209 }
210 else
211 {
212
213 fieldCounter++;
214
215 if (colName == null)
216 {
217 colName = EMPTYFIELDNAME + fieldCounter;
218 }
219
220 columnNames.add(colName);
221 colName = null;
222 }
223
224
225 if (tokenizer.ttype == StreamTokenizer.TT_EOL)
226 {
227 foundEol = true;
228 }
229 else if (tokenizer.ttype == StreamTokenizer.TT_EOF)
230 {
231
232 tokenizer.pushBack();
233 foundEol = true;
234 }
235 }
236
237 setColumnNames(columnNames);
238 }
239 }
240
241 /***
242 * Determine whether a further row of values exists in the input.
243 *
244 * @return true if the input has more rows.
245 * @exception IOException an IOException occurred.
246 */
247 public boolean hasNextRow()
248 throws IOException
249 {
250
251
252 if (neverRead || tokenizer.ttype == StreamTokenizer.TT_EOL)
253 {
254 tokenizer.nextToken();
255 tokenizer.pushBack();
256 neverRead = false;
257 }
258 return tokenizer.ttype != StreamTokenizer.TT_EOF;
259 }
260
261 /***
262 * Returns a ValueParser object containing the next row of values.
263 *
264 * @return a ValueParser object.
265 * @exception IOException an IOException occurred.
266 * @exception NoSuchElementException there are no more rows in the input.
267 */
268 public ValueParser nextRow()
269 throws IOException, NoSuchElementException
270 {
271 if (!hasNextRow())
272 {
273 throw new NoSuchElementException();
274 }
275
276 if (lineValues == null)
277 {
278 lineValues = new BaseValueParser(characterEncoding);
279 }
280 else
281 {
282 lineValues.clear();
283 }
284
285 Iterator it = columnNames.iterator();
286
287 String currVal = "";
288 String colName = null;
289
290 boolean foundEol = false;
291 while (!foundEol || it.hasNext())
292 {
293 if (!foundEol)
294 {
295 tokenizer.nextToken();
296 }
297
298 if (colName == null && it.hasNext())
299 {
300 colName = String.valueOf(it.next());
301 }
302
303 if (tokenizer.ttype == '"'
304 || tokenizer.ttype == StreamTokenizer.TT_WORD)
305 {
306
307 currVal = tokenizer.sval;
308 }
309 else
310 {
311
312 lineValues.add(colName, currVal);
313 colName = null;
314 currVal = "";
315 }
316
317
318 if (tokenizer.ttype == StreamTokenizer.TT_EOL)
319 {
320 foundEol = true;
321 }
322 else if (tokenizer.ttype == StreamTokenizer.TT_EOF)
323 {
324
325 tokenizer.pushBack();
326 foundEol = true;
327 }
328 }
329
330 return lineValues;
331 }
332
333 /***
334 * Determine whether a further row of values exists in the input.
335 *
336 * @return true if the input has more rows.
337 */
338 public boolean hasNext()
339 {
340 boolean hasNext = false;
341
342 try
343 {
344 hasNext = hasNextRow();
345 }
346 catch (IOException e)
347 {
348 throw new NestableRuntimeException(e);
349 }
350
351 return hasNext;
352 }
353
354 /***
355 * Returns a ValueParser object containing the next row of values.
356 *
357 * @return a ValueParser object as an Object.
358 * @exception NoSuchElementException there are no more rows in the input
359 * or an IOException occurred.
360 */
361 public Object next()
362 throws NoSuchElementException
363 {
364 Object nextRow = null;
365
366 try
367 {
368 nextRow = nextRow();
369 }
370 catch (IOException e)
371 {
372 throw new NestableRuntimeException(e);
373 }
374
375 return nextRow;
376 }
377
378 /***
379 * The optional Iterator.remove method is not supported.
380 *
381 * @exception UnsupportedOperationException the operation is not supported.
382 */
383 public void remove()
384 throws UnsupportedOperationException
385 {
386 throw new UnsupportedOperationException();
387 }
388 }