1 package org.codehaus.groovy.syntax.lexer;
2
3
4 import org.codehaus.groovy.syntax.ReadException;
5 import org.codehaus.groovy.syntax.Token;
6 import org.codehaus.groovy.GroovyBugError;
7
8
9 /***
10 * A Lexer for processing standard strings.
11 *
12 * @author Chris Poirier
13 */
14
15 public class StringLexer extends TextLexerBase
16 {
17
18 protected String delimiter = null;
19 protected char watchFor;
20 protected boolean allowGStrings = false;
21 protected boolean emptyString = true;
22
23
24 /***
25 * If set true, the filter will allow // and \$ to pass through unchanged.
26 * You should set this appropriately BEFORE setting source!
27 */
28
29 public void allowGStrings( boolean allow )
30 {
31 allowGStrings = allow;
32 }
33
34
35
36 /***
37 * Returns a single STRING, then null. The STRING is all of the processed
38 * input. Backslashes are stripped, with the \r, \n, and \t converted
39 * appropriately.
40 */
41
42 public Token undelegatedNextToken( ) throws ReadException, LexerException
43 {
44 if( emptyString )
45 {
46 emptyString = false;
47 return Token.newString( "", getStartLine(), getStartColumn() );
48 }
49 else if( finished )
50 {
51 return null;
52 }
53 else
54 {
55 StringBuffer string = new StringBuffer();
56
57 while( la(1) != CharStream.EOS )
58 {
59 string.append( consume() );
60 }
61
62 if( la(1) == CharStream.EOS && string.length() == 0 )
63 {
64 finished = true;
65 }
66
67 return Token.newString( string.toString(), getStartLine(), getStartColumn() );
68 }
69 }
70
71
72
73 /***
74 * Controls delimiter search. When turned on, the first thing we do
75 * is check for and eat our delimiter.
76 */
77
78 public void delimit( boolean delimit )
79 {
80 super.delimit( delimit );
81
82 if( delimit )
83 {
84 try
85 {
86 if( !finished && la(1) == CharStream.EOS )
87 {
88 finishUp();
89
90
91
92
93
94
95
96 if( !allowGStrings )
97 {
98 emptyString = true;
99 }
100 }
101 }
102 catch( Exception e )
103 {
104 finished = true;
105 }
106 }
107 }
108
109
110
111
112 /***
113 * Sets the source lexer and identifies and consumes the opening delimiter.
114 */
115
116 public void setSource( Lexer source )
117 {
118 super.setSource( source );
119
120 emptyString = false;
121
122 try
123 {
124 char c = source.la();
125 switch( c )
126 {
127 case '\'':
128 case '"':
129 mark();
130 source.consume();
131
132 if( source.la() == c && source.la(2) == c )
133 {
134 source.consume(); source.consume();
135 delimiter = new StringBuffer().append(c).append(c).append(c).toString();
136 }
137 else
138 {
139 delimiter = new StringBuffer().append(c).toString();
140 }
141
142 watchFor = delimiter.charAt(0);
143 break;
144
145
146 default:
147 {
148 throw new GroovyBugError( "at the time of StringLexer.setSource(), the source must be on a single or double quote" );
149 }
150 }
151
152 restart();
153 delimit( true );
154 }
155 catch( Exception e )
156 {
157
158
159
160
161 e.printStackTrace();
162 unsetSource( );
163 }
164 }
165
166
167
168 /***
169 * Unsets our source.
170 */
171
172 public void unsetSource()
173 {
174 super.unsetSource();
175 delimiter = null;
176 finished = true;
177 emptyString = false;
178 }
179
180
181
182
183
184
185
186 private int lookahead = 0;
187 private char[] characters = new char[3];
188 private int[] widths = new int[3];
189
190
191 public char la() throws LexerException, ReadException
192 {
193 return la(1);
194 }
195
196 /***
197 * Returns the next <code>k</code>th character, without consuming any.
198 */
199
200 public char la(int k) throws LexerException, ReadException
201 {
202
203 if( !finished && source != null )
204 {
205
206 if( delimited )
207 {
208
209 if( k > characters.length )
210 {
211 throw new GroovyBugError( "StringLexer lookahead tolerance exceeded" );
212 }
213
214 if( lookahead >= k && k >= 1)
215 {
216 lookahead = 1;
217 return characters[k-1];
218 }
219
220 lookahead = 0;
221
222 char c = ' ', c1 = ' ', c2 = ' ';
223 int offset = 1, width = 0;
224 for( int i = 1; i <= k; i++ )
225 {
226 c1 = source.la(offset);
227 C1_SWITCH: switch( c1 )
228 {
229 case CharStream.EOS:
230 {
231 return c1;
232 }
233
234 case '//':
235 {
236 c2 = source.la( offset + 1 );
237
238 ESCAPE_SWITCH: switch( c2 )
239 {
240
241 case CharStream.EOS:
242 return c2;
243
244 case '//':
245 c = '//';
246 characters[0] = c;
247 widths[0] = 2;
248 lookahead = 1;
249 return c;
250
251 case 'n':
252 c = '\n';
253 width = 2;
254 break ESCAPE_SWITCH;
255
256 case 'r':
257 c = '\r';
258 width = 2;
259 break ESCAPE_SWITCH;
260
261 case 't':
262 c = '\t';
263 width = 2;
264 break ESCAPE_SWITCH;
265
266 case 'b':
267 c = '\b';
268 width = 2;
269 break ESCAPE_SWITCH;
270
271 case 'f':
272 c = '\f';
273 width = 2;
274 break ESCAPE_SWITCH;
275
276 case '$':
277 if ( allowGStrings )
278 {
279 c = c1;
280 width = 1;
281 }
282 else
283 {
284 c = c2;
285 width = 2;
286 }
287 break ESCAPE_SWITCH;
288
289
290 case '"':
291 case '\'':
292 c = c2;
293 characters[0] = c;
294 widths[0] = 2;
295 lookahead = 1;
296 return c;
297
298 default:
299 c = '//';
300 characters[0] = c;
301 widths[0] = 1;
302 lookahead = 1;
303 return c;
304 }
305 break C1_SWITCH;
306 }
307
308 default:
309 {
310 if( c1 == watchFor )
311 {
312 boolean atEnd = true;
313 if (delimiter.length() == 1)
314 {
315 if (source.la(offset) != watchFor)
316 {
317 atEnd = false;
318 c = c1;
319 break C1_SWITCH;
320 }
321 }
322 else {
323 for( int j = 1; j < delimiter.length(); j++ )
324 {
325 if( source.la(offset+j) != delimiter.charAt(j) )
326 {
327 atEnd = false;
328 break;
329 }
330 }
331 }
332
333 if( atEnd )
334 {
335 return CharStream.EOS;
336 }
337 }
338
339 c = c1;
340 width = 1;
341 if (c == '$' && allowGStrings)
342 {
343 lookahead = 0;
344 }
345 break C1_SWITCH;
346 }
347 }
348
349
350 characters[lookahead] = c;
351 widths[lookahead] = width;
352
353 offset += width;
354 lookahead += 1;
355 }
356
357 return c;
358 }
359
360 lookahead = 0;
361 return source.la(k);
362 }
363
364 return CharStream.EOS;
365
366 }
367
368
369
370 /***
371 * Eats a character from the input stream. Searches for the delimiter if
372 * delimited. Note that turning delimiting on also checks if we are at the
373 * delimiter, so if we aren't finished, there is something to consume.
374 */
375
376 public char consume() throws LexerException, ReadException
377 {
378 if( !finished && source != null )
379 {
380 char c = CharStream.EOS;
381
382 if( delimited )
383 {
384 if( lookahead < 1 )
385 {
386 la( 1 );
387 }
388
389 if( lookahead >= 1 )
390 {
391 c = characters[0];
392 for( int i = 0; i < widths[0]; i++ )
393 {
394 source.consume();
395 }
396
397 lookahead = 0;
398 }
399
400 if( la(1) == CharStream.EOS )
401 {
402 finishUp();
403 }
404 }
405 else
406 {
407 c = source.consume();
408 }
409
410 lookahead = 0;
411 return c;
412 }
413
414 return CharStream.EOS;
415 }
416
417
418
419 /***
420 * Eats our delimiter from the stream and marks us finished.
421 */
422
423 protected void finishUp() throws LexerException, ReadException
424 {
425 for( int i = 0; i < delimiter.length(); i++ )
426 {
427 char c = source.la(1);
428 if( c == CharStream.EOS )
429 {
430 throw new UnterminatedStringLiteralException(getStartLine(), getStartColumn());
431 }
432 else if( c == delimiter.charAt(i) )
433 {
434 source.consume();
435 }
436 else
437 {
438 throw new GroovyBugError( "la() said delimiter [" + delimiter + "], finishUp() found [" + c + "]" );
439 }
440 }
441
442 finish();
443 }
444
445 }