1 package org.codehaus.groovy.syntax.lexer;
2
3
4 import org.codehaus.groovy.syntax.ReadException;
5 import org.codehaus.groovy.syntax.Token;
6 import org.codehaus.groovy.GroovyBugError;
7
8
9 /***
10 * A Lexer for processing standard strings.
11 *
12 * @author Chris Poirier
13 */
14
15 public class StringLexer extends TextLexerBase
16 {
17
18 protected String delimiter = null;
19 protected char watchFor;
20 protected boolean allowGStrings = false;
21 protected boolean emptyString = true;
22
23
24 /***
25 * If set true, the filter will allow // and \$ to pass through unchanged.
26 * You should set this appropriately BEFORE setting source!
27 */
28
29 public void allowGStrings( boolean allow )
30 {
31 allowGStrings = allow;
32 }
33
34
35
36 /***
37 * Returns a single STRING, then null. The STRING is all of the processed
38 * input. Backslashes are stripped, with the \r, \n, and \t converted
39 * appropriately.
40 */
41
42 public Token undelegatedNextToken( ) throws ReadException, LexerException
43 {
44 if( emptyString )
45 {
46 emptyString = false;
47 return Token.newString( "", getStartLine(), getStartColumn() );
48 }
49 else if( finished )
50 {
51 return null;
52 }
53 else
54 {
55 StringBuffer string = new StringBuffer();
56
57 while( la(1) != CharStream.EOS )
58 {
59 string.append( consume() );
60 }
61
62 return Token.newString( string.toString(), getStartLine(), getStartColumn() );
63 }
64 }
65
66
67
68 /***
69 * Controls delimiter search. When turned on, the first thing we do
70 * is check for and eat our delimiter.
71 */
72
73 public void delimit( boolean delimit )
74 {
75 super.delimit( delimit );
76
77 if( delimit )
78 {
79 try
80 {
81 if( !finished && la(1) == CharStream.EOS )
82 {
83 finishUp();
84
85
86
87
88
89
90
91 if( !allowGStrings )
92 {
93 emptyString = true;
94 }
95 }
96 }
97 catch( Exception e )
98 {
99 finished = true;
100 }
101 }
102 }
103
104
105
106
107 /***
108 * Sets the source lexer and identifies and consumes the opening delimiter.
109 */
110
111 public void setSource( Lexer source )
112 {
113 super.setSource( source );
114
115 emptyString = false;
116
117 try
118 {
119 char c = source.la();
120 switch( c )
121 {
122 case '\'':
123 case '"':
124 mark();
125 source.consume();
126
127 if( source.la() == c && source.la(2) == c )
128 {
129 source.consume(); source.consume();
130 delimiter = new StringBuffer().append(c).append(c).append(c).toString();
131 }
132 else
133 {
134 delimiter = new StringBuffer().append(c).toString();
135 }
136
137 watchFor = delimiter.charAt(0);
138 break;
139
140
141 default:
142 {
143 throw new GroovyBugError( "at the time of StringLexer.setSource(), the source must be on a single or double quote" );
144 }
145 }
146
147 restart();
148 delimit( true );
149 }
150 catch( Exception e )
151 {
152
153
154
155
156 e.printStackTrace();
157 unsetSource( );
158 }
159 }
160
161
162
163 /***
164 * Unsets our source.
165 */
166
167 public void unsetSource()
168 {
169 super.unsetSource();
170 delimiter = null;
171 finished = true;
172 emptyString = false;
173 }
174
175
176
177
178
179
180
181 private int lookahead = 0;
182 private char[] characters = new char[3];
183 private int[] widths = new int[3];
184
185
186
187 /***
188 * Returns the next <code>k</code>th character, without consuming any.
189 */
190
191 public char la(int k) throws LexerException, ReadException
192 {
193
194 if( !finished && source != null )
195 {
196
197 if( delimited )
198 {
199
200 if( k > characters.length )
201 {
202 throw new GroovyBugError( "StringLexer lookahead tolerance exceeded" );
203 }
204
205 if( lookahead >= k )
206 {
207 return characters[k-1];
208 }
209
210 lookahead = 0;
211
212 char c = ' ', c1 = ' ', c2 = ' ';
213 int offset = 1, width = 0;
214 for( int i = 1; i <= k; i++ )
215 {
216 c1 = source.la(offset);
217 C1_SWITCH: switch( c1 )
218 {
219 case CharStream.EOS:
220 {
221 return c1;
222 }
223
224 case '//':
225 {
226 c2 = source.la( offset + 1 );
227
228 ESCAPE_SWITCH: switch( c2 )
229 {
230
231 case CharStream.EOS:
232 return c2;
233
234 case '//':
235 case '$':
236 {
237 if( allowGStrings )
238 {
239 c = c1;
240 width = 1;
241 }
242 else
243 {
244 c = c2;
245 width = 2;
246 }
247 break ESCAPE_SWITCH;
248 }
249
250 case 'r':
251 c = '\r';
252 width = 2;
253 break ESCAPE_SWITCH;
254
255 case 't':
256 c = '\t';
257 width = 2;
258 break ESCAPE_SWITCH;
259
260 case 'n':
261 c = '\n';
262 width = 2;
263 break ESCAPE_SWITCH;
264
265
266 default:
267 c = c2;
268 width = 2;
269 break ESCAPE_SWITCH;
270 }
271 break C1_SWITCH;
272 }
273
274 default:
275 {
276 if( c1 == watchFor )
277 {
278 boolean atEnd = true;
279 for( int j = 1; j < delimiter.length(); j++ )
280 {
281 if( source.la(offset+j) != delimiter.charAt(j) )
282 {
283 atEnd = false;
284 break;
285 }
286 }
287
288 if( atEnd )
289 {
290 return CharStream.EOS;
291 }
292 }
293
294 c = c1;
295 width = 1;
296 break C1_SWITCH;
297 }
298 }
299
300
301 characters[lookahead] = c;
302 widths[lookahead] = width;
303
304 offset += width;
305 lookahead += 1;
306 }
307
308 return c;
309 }
310
311 lookahead = 0;
312 return source.la(k);
313 }
314
315 return CharStream.EOS;
316
317 }
318
319
320
321 /***
322 * Eats a character from the input stream. Searches for the delimiter if
323 * delimited. Note that turning delimiting on also checks if we are at the
324 * delimiter, so if we aren't finished, there is something to consume.
325 */
326
327 public char consume() throws LexerException, ReadException
328 {
329 if( !finished && source != null )
330 {
331 char c = CharStream.EOS;
332
333 if( delimited )
334 {
335 if( lookahead < 1 )
336 {
337 la( 1 );
338 }
339
340 if( lookahead >= 1 )
341 {
342 c = characters[0];
343 for( int i = 0; i < widths[0]; i++ )
344 {
345 source.consume();
346 }
347
348 lookahead = 0;
349 }
350
351 if( la(1) == CharStream.EOS )
352 {
353 finishUp();
354 }
355 }
356 else
357 {
358 c = source.consume();
359 }
360
361 lookahead = 0;
362 return c;
363 }
364
365 return CharStream.EOS;
366 }
367
368
369
370 /***
371 * Eats our delimiter from the stream and marks us finished.
372 */
373
374 protected void finishUp() throws LexerException, ReadException
375 {
376 for( int i = 0; i < delimiter.length(); i++ )
377 {
378 char c = source.la(1);
379 if( c == CharStream.EOS )
380 {
381 throw new UnterminatedStringLiteralException(getStartLine(), getStartColumn());
382 }
383 else if( c == delimiter.charAt(i) )
384 {
385 source.consume();
386 }
387 else
388 {
389 throw new GroovyBugError( "la() said delimiter [" + delimiter + "], finishUp() found [" + c + "]" );
390 }
391 }
392
393 finish();
394 }
395
396 }