1 package org.codehaus.groovy.syntax.lexer;
2
3
4 import org.codehaus.groovy.syntax.ReadException;
5 import org.codehaus.groovy.syntax.Token;
6 import org.codehaus.groovy.GroovyBugError;
7
8
9 /***
10 * A Lexer for processing standard strings.
11 *
12 * @author Chris Poirier
13 */
14
15 public class StringLexer extends TextLexerBase
16 {
17
18 protected String delimiter = null;
19 protected char watchFor;
20 protected boolean allowGStrings = false;
21 protected boolean emptyString = true;
22
23
24 /***
25 * If set true, the filter will allow // and \$ to pass through unchanged.
26 * You should set this appropriately BEFORE setting source!
27 */
28
29 public void allowGStrings( boolean allow )
30 {
31 allowGStrings = allow;
32 }
33
34
35
36 /***
37 * Returns a single STRING, then null. The STRING is all of the processed
38 * input. Backslashes are stripped, with the \r, \n, and \t converted
39 * appropriately.
40 */
41
42 public Token undelegatedNextToken( ) throws ReadException, LexerException
43 {
44 if( emptyString )
45 {
46 emptyString = false;
47 return Token.newString( "", getStartLine(), getStartColumn() );
48 }
49 else if( finished )
50 {
51 return null;
52 }
53 else
54 {
55 StringBuffer string = new StringBuffer();
56
57 while( la(1) != CharStream.EOS )
58 {
59 string.append( consume() );
60 }
61
62 if( la(1) == CharStream.EOS && string.length() == 0 )
63 {
64 finished = true;
65 }
66
67 return Token.newString( string.toString(), getStartLine(), getStartColumn() );
68 }
69 }
70
71
72
73 /***
74 * Controls delimiter search. When turned on, the first thing we do
75 * is check for and eat our delimiter.
76 */
77
78 public void delimit( boolean delimit )
79 {
80 super.delimit( delimit );
81
82 if( delimit )
83 {
84 try
85 {
86 if( !finished && la(1) == CharStream.EOS )
87 {
88 finishUp();
89
90
91
92
93
94
95
96 if( !allowGStrings )
97 {
98 emptyString = true;
99 }
100 }
101 }
102 catch( Exception e )
103 {
104 finished = true;
105 }
106 }
107 }
108
109
110
111
112 /***
113 * Sets the source lexer and identifies and consumes the opening delimiter.
114 */
115
116 public void setSource( Lexer source )
117 {
118 super.setSource( source );
119
120 emptyString = false;
121
122 try
123 {
124 char c = source.la();
125 switch( c )
126 {
127 case '\'':
128 case '"':
129 mark();
130 source.consume();
131
132 if( source.la() == c && source.la(2) == c )
133 {
134 source.consume(); source.consume();
135 delimiter = new StringBuffer().append(c).append(c).append(c).toString();
136 }
137 else
138 {
139 delimiter = new StringBuffer().append(c).toString();
140 }
141
142 watchFor = delimiter.charAt(0);
143 break;
144
145
146 default:
147 {
148 throw new GroovyBugError( "at the time of StringLexer.setSource(), the source must be on a single or double quote" );
149 }
150 }
151
152 restart();
153 delimit( true );
154 }
155 catch( Exception e )
156 {
157
158
159
160
161 e.printStackTrace();
162 unsetSource( );
163 }
164 }
165
166
167
168 /***
169 * Unsets our source.
170 */
171
172 public void unsetSource()
173 {
174 super.unsetSource();
175 delimiter = null;
176 finished = true;
177 emptyString = false;
178 }
179
180
181
182
183
184
185
186 private int lookahead = 0;
187 private char[] characters = new char[3];
188 private int[] widths = new int[3];
189
190
191
192 /***
193 * Returns the next <code>k</code>th character, without consuming any.
194 */
195
196 public char la(int k) throws LexerException, ReadException
197 {
198
199 if( !finished && source != null )
200 {
201
202 if( delimited )
203 {
204
205 if( k > characters.length )
206 {
207 throw new GroovyBugError( "StringLexer lookahead tolerance exceeded" );
208 }
209
210 if( lookahead >= k )
211 {
212 return characters[k-1];
213 }
214
215 lookahead = 0;
216
217 char c = ' ', c1 = ' ', c2 = ' ';
218 int offset = 1, width = 0;
219 for( int i = 1; i <= k; i++ )
220 {
221 c1 = source.la(offset);
222 C1_SWITCH: switch( c1 )
223 {
224 case CharStream.EOS:
225 {
226 return c1;
227 }
228
229 case '//':
230 {
231 c2 = source.la( offset + 1 );
232
233 ESCAPE_SWITCH: switch( c2 )
234 {
235
236 case CharStream.EOS:
237 return c2;
238
239 case '//':
240 case '$':
241 {
242 if( allowGStrings )
243 {
244 c = c1;
245 width = 1;
246 }
247 else
248 {
249 c = c2;
250 width = 2;
251 }
252 break ESCAPE_SWITCH;
253 }
254
255 case 'r':
256 c = '\r';
257 width = 2;
258 break ESCAPE_SWITCH;
259
260 case 't':
261 c = '\t';
262 width = 2;
263 break ESCAPE_SWITCH;
264
265 case 'n':
266 c = '\n';
267 width = 2;
268 break ESCAPE_SWITCH;
269
270
271 default:
272 c = c2;
273 width = 2;
274 break ESCAPE_SWITCH;
275 }
276 break C1_SWITCH;
277 }
278
279 default:
280 {
281 if( c1 == watchFor )
282 {
283 boolean atEnd = true;
284 for( int j = 1; j < delimiter.length(); j++ )
285 {
286 if( source.la(offset+j) != delimiter.charAt(j) )
287 {
288 atEnd = false;
289 break;
290 }
291 }
292
293 if( atEnd )
294 {
295 return CharStream.EOS;
296 }
297 }
298
299 c = c1;
300 width = 1;
301 break C1_SWITCH;
302 }
303 }
304
305
306 characters[lookahead] = c;
307 widths[lookahead] = width;
308
309 offset += width;
310 lookahead += 1;
311 }
312
313 return c;
314 }
315
316 lookahead = 0;
317 return source.la(k);
318 }
319
320 return CharStream.EOS;
321
322 }
323
324
325
326 /***
327 * Eats a character from the input stream. Searches for the delimiter if
328 * delimited. Note that turning delimiting on also checks if we are at the
329 * delimiter, so if we aren't finished, there is something to consume.
330 */
331
332 public char consume() throws LexerException, ReadException
333 {
334 if( !finished && source != null )
335 {
336 char c = CharStream.EOS;
337
338 if( delimited )
339 {
340 if( lookahead < 1 )
341 {
342 la( 1 );
343 }
344
345 if( lookahead >= 1 )
346 {
347 c = characters[0];
348 for( int i = 0; i < widths[0]; i++ )
349 {
350 source.consume();
351 }
352
353 lookahead = 0;
354 }
355
356 if( la(1) == CharStream.EOS )
357 {
358 finishUp();
359 }
360 }
361 else
362 {
363 c = source.consume();
364 }
365
366 lookahead = 0;
367 return c;
368 }
369
370 return CharStream.EOS;
371 }
372
373
374
375 /***
376 * Eats our delimiter from the stream and marks us finished.
377 */
378
379 protected void finishUp() throws LexerException, ReadException
380 {
381 for( int i = 0; i < delimiter.length(); i++ )
382 {
383 char c = source.la(1);
384 if( c == CharStream.EOS )
385 {
386 throw new UnterminatedStringLiteralException(getStartLine(), getStartColumn());
387 }
388 else if( c == delimiter.charAt(i) )
389 {
390 source.consume();
391 }
392 else
393 {
394 throw new GroovyBugError( "la() said delimiter [" + delimiter + "], finishUp() found [" + c + "]" );
395 }
396 }
397
398 finish();
399 }
400
401 }