View Javadoc

1   package org.codehaus.groovy.syntax.lexer;
2   
3   //{{{ imports
4   import org.codehaus.groovy.syntax.ReadException;
5   import org.codehaus.groovy.syntax.Token;
6   import org.codehaus.groovy.GroovyBugError;
7   //}}}
8   
9   /***
10   *  A Lexer for processing standard strings.
11   *
12   *  @author Chris Poirier
13   */
14  
15  public class StringLexer extends TextLexerBase
16  {
17  
18      protected String  delimiter = null;
19      protected char    watchFor;
20      protected boolean allowGStrings = false;
21      protected boolean emptyString   = true;   // If set, we need to send an empty string
22  
23  
24     /***
25      *  If set true, the filter will allow // and \$ to pass through unchanged.
26      *  You should set this appropriately BEFORE setting source!
27      */
28  
29      public void allowGStrings( boolean allow )
30      {
31          allowGStrings = allow;
32      }
33  
34  
35  
36     /***
37      *  Returns a single STRING, then null.   The STRING is all of the processed
38      *  input.  Backslashes are stripped, with the \r, \n, and \t converted
39      *  appropriately.
40      */
41  
42      public Token undelegatedNextToken( ) throws ReadException, LexerException
43      {
44          if( emptyString )
45          {
46              emptyString = false;
47              return Token.newString( "", getStartLine(), getStartColumn() );
48          }
49          else if( finished )
50          {
51              return null;
52          }
53          else
54          {
55              StringBuffer string = new StringBuffer();
56  
57              while( la(1) != CharStream.EOS )
58              {
59                  string.append( consume() );
60              }
61              
62              if( la(1) == CharStream.EOS && string.length() == 0 )
63              {
64                  finished = true;
65              }
66  
67              return Token.newString( string.toString(), getStartLine(), getStartColumn() );
68          }
69      }
70  
71  
72  
73     /***
74      *  Controls delimiter search.  When turned on, the first thing we do
75      *  is check for and eat our delimiter.
76      */
77  
78      public void delimit( boolean delimit )
79      {
80          super.delimit( delimit );
81  
82          if( delimit )
83          {
84              try
85              {
86                  if( !finished && la(1) == CharStream.EOS )
87                  {
88                      finishUp();
89  
90                      //
91                      // The GStringLexer will correctly handle the empty string.
92                      // We don't.  In order to ensure that an empty string is
93                      // supplied, we set a flag that is checked during
94                      // undelegatedNextToken().
95  
96                      if( !allowGStrings )
97                      {
98                          emptyString = true;
99                      }
100                 }
101             }
102             catch( Exception e )
103             {
104                 finished = true;
105             }
106         }
107     }
108 
109 
110 
111 
112    /***
113     *  Sets the source lexer and identifies and consumes the opening delimiter.
114     */
115 
116     public void setSource( Lexer source )
117     {
118         super.setSource( source );
119 
120         emptyString = false;
121 
122         try
123         {
124             char c = source.la();
125             switch( c )
126             {
127                 case '\'':
128                 case '"':
129                     mark();
130                     source.consume();
131 
132                     if( source.la() == c && source.la(2) == c )
133                     {
134                         source.consume(); source.consume();
135                         delimiter = new StringBuffer().append(c).append(c).append(c).toString();
136                     }
137                     else
138                     {
139                         delimiter = new StringBuffer().append(c).toString();
140                     }
141 
142                     watchFor = delimiter.charAt(0);
143                     break;
144 
145 
146                 default:
147                 {
148                     throw new GroovyBugError( "at the time of StringLexer.setSource(), the source must be on a single or double quote" );
149                 }
150             }
151 
152             restart();
153             delimit( true );
154         }
155         catch( Exception e )
156         {
157             //
158             // If we couldn't read our delimiter, we'll just
159             // cancel our source.  nextToken() will return null.
160 
161             e.printStackTrace();
162             unsetSource( );
163         }
164     }
165 
166 
167 
168    /***
169     *  Unsets our source.
170     */
171 
172     public void unsetSource()
173     {
174         super.unsetSource();
175         delimiter   = null;
176         finished    = true;
177         emptyString = false;
178     }
179 
180 
181 
182 
183   //---------------------------------------------------------------------------
184   // STREAM ROUTINES
185 
186     private int    lookahead  = 0;             // the number of characters identified
187     private char[] characters = new char[3];   // the next characters identified by la()
188     private int[]  widths     = new int[3];    // the source widths of the next characters
189 
190 
191     public char la() throws LexerException, ReadException
192     {
193 		return la(1);
194     }
195 
196    /***
197     *  Returns the next <code>k</code>th character, without consuming any.
198     */
199 
200     public char la(int k) throws LexerException, ReadException
201     {
202 
203         if( !finished && source != null )
204         {
205 
206             if( delimited )
207             {
208 
209                 if( k > characters.length )
210                 {
211                     throw new GroovyBugError( "StringLexer lookahead tolerance exceeded" );
212                 }
213 
214                 if( lookahead >= k && k >= 1)
215                 {
216                     lookahead = 1;
217                     return characters[k-1];
218                 }
219 
220                 lookahead = 0;
221 
222                 char c = ' ', c1 = ' ', c2 = ' ';
223                 int offset = 1, width = 0;
224                 for( int i = 1; i <= k; i++ )
225                 {
226                     c1 = source.la(offset);
227                     C1_SWITCH: switch( c1 )
228                     {
229                         case CharStream.EOS:
230                         {
231                             return c1;
232                         }
233 
234                         case '//':
235                         {
236                             c2 = source.la( offset + 1 );
237 
238                             ESCAPE_SWITCH: switch( c2 )
239                             {
240 
241                                 case CharStream.EOS:
242                                     return c2;
243 
244                                 case '//':
245                                     c = '//';
246                                     characters[0] = c;
247                                     widths[0] = 2;
248                                     lookahead = 1;
249                                     return c;
250 
251                                 case 'n':
252                                     c = '\n';
253                                     width = 2;
254                                     break ESCAPE_SWITCH;
255 
256                                 case 'r':
257                                     c = '\r';
258                                     width = 2;
259                                     break ESCAPE_SWITCH;
260 
261                                 case 't':
262                                     c = '\t';
263                                     width = 2;
264                                     break ESCAPE_SWITCH;
265 
266                                 case 'b':
267                                     c = '\b';
268                                     width = 2;
269                                     break ESCAPE_SWITCH;
270 
271                                 case 'f':
272                                     c = '\f';
273                                     width = 2;
274                                     break ESCAPE_SWITCH;
275 
276                                 case '$':
277                                     if ( allowGStrings )
278                                     {
279                                         c = c1;
280                                         width = 1;
281                                     }
282                                     else
283                                     {
284                                         c = c2;
285                                         width = 2;
286                                     }
287                                     break ESCAPE_SWITCH;
288 
289 
290                                 case '"':
291                                 case '\'':
292                                     c = c2;
293                                     characters[0] = c;
294                                     widths[0] = 2;
295                                     lookahead = 1;
296                                     return c;
297 
298                                 default:
299                                     c = '//';
300                                     characters[0] = c;
301                                     widths[0] = 1;
302                                     lookahead = 1;
303                                     return c;
304                             }
305                             break C1_SWITCH;
306                         }
307 
308                         default:
309                         {
310                             if( c1 == watchFor )
311                             {
312                                 boolean atEnd = true;
313                                 if (delimiter.length() == 1)
314                                 {
315                                     if (source.la(offset) != watchFor)
316                                     {
317                                         atEnd = false;
318                                         c = c1;
319                                         break C1_SWITCH;
320                                     }
321                                 }
322                                 else {
323                                     for( int j = 1; j < delimiter.length(); j++ )
324                                     {
325                                         if( source.la(offset+j) != delimiter.charAt(j) )
326                                         {
327                                             atEnd = false;
328                                             break;
329                                         }
330                                     }
331                                 }
332 
333                                 if( atEnd )
334                                 {
335                                     return CharStream.EOS;
336                                 }
337                             }
338 
339                             c = c1;
340                             width = 1;
341                             if (c == '$' && allowGStrings)
342                             {
343                                 lookahead = 0;
344                             }
345                             break C1_SWITCH;
346                         }
347                     }
348 
349 
350                     characters[lookahead] = c;
351                     widths[lookahead]     = width;
352 
353                     offset += width;
354                     lookahead += 1;
355                 }
356 
357                 return c;                                         // <<< FLOW CONTROL <<<<<<<<<
358             }
359 
360             lookahead = 0;
361             return source.la(k);
362         }
363 
364         return CharStream.EOS;
365 
366     }
367 
368 
369 
370    /***
371     *  Eats a character from the input stream.  Searches for the delimiter if
372     *  delimited.  Note that turning delimiting on also checks if we are at the
373     *  delimiter, so if we aren't finished, there is something to consume.
374     */
375 
376     public char consume() throws LexerException, ReadException
377     {
378         if( !finished && source != null )
379         {
380             char c = CharStream.EOS;
381 
382             if( delimited )
383             {
384                 if( lookahead < 1 )
385                 {
386                     la( 1 );
387                 }
388 
389                 if( lookahead >= 1 )
390                 {
391                     c = characters[0];
392                     for( int i = 0; i < widths[0]; i++ )
393                     {
394                         source.consume();
395                     }
396 
397                     lookahead = 0;
398                 }
399 
400                 if( la(1) == CharStream.EOS )
401                 {
402                     finishUp();
403                 }
404             }
405             else
406             {
407                 c = source.consume();
408             }
409 
410             lookahead = 0;
411             return c;
412         }
413 
414         return CharStream.EOS;
415     }
416 
417 
418 
419    /***
420     *  Eats our delimiter from the stream and marks us finished.
421     */
422 
423     protected void finishUp() throws LexerException, ReadException
424     {
425         for( int i = 0; i < delimiter.length(); i++ )
426         {
427             char c = source.la(1);
428             if( c == CharStream.EOS )
429             {
430                 throw new UnterminatedStringLiteralException(getStartLine(), getStartColumn());
431             }
432             else if( c == delimiter.charAt(i) )
433             {
434                 source.consume();
435             }
436             else
437             {
438                 throw new GroovyBugError( "la() said delimiter [" + delimiter + "], finishUp() found [" + c + "]" );
439             }
440         }
441 
442         finish();
443     }
444 
445 }