View Javadoc

1   package org.codehaus.groovy.syntax.lexer;
2   
3   //{{{ imports
4   import org.codehaus.groovy.syntax.ReadException;
5   import org.codehaus.groovy.syntax.Token;
6   import org.codehaus.groovy.GroovyBugError;
7   //}}}
8   
9   /***
10   *  A Lexer for processing standard strings.
11   *
12   *  @author Chris Poirier
13   */
14  
15  public class StringLexer extends TextLexerBase
16  {
17  
18      protected String  delimiter = null;
19      protected char    watchFor;
20      protected boolean allowGStrings = false;
21      protected boolean emptyString   = true;   // If set, we need to send an empty string
22  
23  
24     /***
25      *  If set true, the filter will allow // and \$ to pass through unchanged.
26      *  You should set this appropriately BEFORE setting source!
27      */
28  
29      public void allowGStrings( boolean allow )
30      {
31          allowGStrings = allow;
32      }
33  
34  
35  
36     /***
37      *  Returns a single STRING, then null.   The STRING is all of the processed
38      *  input.  Backslashes are stripped, with the \r, \n, and \t converted
39      *  appropriately.
40      */
41  
42      public Token undelegatedNextToken( ) throws ReadException, LexerException
43      {
44          if( emptyString )
45          {
46              emptyString = false;
47              return Token.newString( "", getStartLine(), getStartColumn() );
48          }
49          else if( finished )
50          {
51              return null;
52          }
53          else
54          {
55              StringBuffer string = new StringBuffer();
56  
57              while( la(1) != CharStream.EOS )
58              {
59                  string.append( consume() );
60              }
61              
62              if( la(1) == CharStream.EOS && string.length() == 0 )
63              {
64                  finished = true;
65              }
66  
67              return Token.newString( string.toString(), getStartLine(), getStartColumn() );
68          }
69      }
70  
71  
72  
73     /***
74      *  Controls delimiter search.  When turned on, the first thing we do
75      *  is check for and eat our delimiter.
76      */
77  
78      public void delimit( boolean delimit )
79      {
80          super.delimit( delimit );
81  
82          if( delimit )
83          {
84              try
85              {
86                  if( !finished && la(1) == CharStream.EOS )
87                  {
88                      finishUp();
89  
90                      //
91                      // The GStringLexer will correctly handle the empty string.
92                      // We don't.  In order to ensure that an empty string is
93                      // supplied, we set a flag that is checked during
94                      // undelegatedNextToken().
95  
96                      if( !allowGStrings )
97                      {
98                          emptyString = true;
99                      }
100                 }
101             }
102             catch( Exception e )
103             {
104                 finished = true;
105             }
106         }
107     }
108 
109 
110 
111 
112    /***
113     *  Sets the source lexer and identifies and consumes the opening delimiter.
114     */
115 
116     public void setSource( Lexer source )
117     {
118         super.setSource( source );
119 
120         emptyString = false;
121 
122         try
123         {
124             char c = source.la();
125             switch( c )
126             {
127                 case '\'':
128                 case '"':
129                     mark();
130                     source.consume();
131 
132                     if( source.la() == c && source.la(2) == c )
133                     {
134                         source.consume(); source.consume();
135                         delimiter = new StringBuffer().append(c).append(c).append(c).toString();
136                     }
137                     else
138                     {
139                         delimiter = new StringBuffer().append(c).toString();
140                     }
141 
142                     watchFor = delimiter.charAt(0);
143                     break;
144 
145 
146                 default:
147                 {
148                     throw new GroovyBugError( "at the time of StringLexer.setSource(), the source must be on a single or double quote" );
149                 }
150             }
151 
152             restart();
153             delimit( true );
154         }
155         catch( Exception e )
156         {
157             //
158             // If we couldn't read our delimiter, we'll just
159             // cancel our source.  nextToken() will return null.
160 
161             e.printStackTrace();
162             unsetSource( );
163         }
164     }
165 
166 
167 
168    /***
169     *  Unsets our source.
170     */
171 
172     public void unsetSource()
173     {
174         super.unsetSource();
175         delimiter   = null;
176         finished    = true;
177         emptyString = false;
178     }
179 
180 
181 
182 
183   //---------------------------------------------------------------------------
184   // STREAM ROUTINES
185 
186     private int    lookahead  = 0;             // the number of characters identified
187     private char[] characters = new char[3];   // the next characters identified by la()
188     private int[]  widths     = new int[3];    // the source widths of the next characters
189 
190 
191 
192    /***
193     *  Returns the next <code>k</code>th character, without consuming any.
194     */
195 
196     public char la(int k) throws LexerException, ReadException
197     {
198 
199         if( !finished && source != null )
200         {
201 
202             if( delimited )
203             {
204 
205                 if( k > characters.length )
206                 {
207                     throw new GroovyBugError( "StringLexer lookahead tolerance exceeded" );
208                 }
209 
210                 if( lookahead >= k )
211                 {
212                     return characters[k-1];
213                 }
214 
215                 lookahead = 0;
216 
217                 char c = ' ', c1 = ' ', c2 = ' ';
218                 int offset = 1, width = 0;
219                 for( int i = 1; i <= k; i++ )
220                 {
221                     c1 = source.la(offset);
222                     C1_SWITCH: switch( c1 )
223                     {
224                         case CharStream.EOS:
225                         {
226                             return c1;
227                         }
228 
229                         case '//':
230                         {
231                             c2 = source.la( offset + 1 );
232 
233                             ESCAPE_SWITCH: switch( c2 )
234                             {
235 
236                                 case CharStream.EOS:
237                                     return c2;
238 
239                                 case '//':
240                                 case '$':
241                                 {
242                                     if( allowGStrings )
243                                     {
244                                         c = c1;
245                                         width = 1;
246                                     }
247                                     else
248                                     {
249                                         c = c2;
250                                         width = 2;
251                                     }
252                                     break ESCAPE_SWITCH;
253                                 }
254 
255                                 case 'r':
256                                     c = '\r';
257                                     width = 2;
258                                     break ESCAPE_SWITCH;
259 
260                                 case 't':
261                                     c = '\t';
262                                     width = 2;
263                                     break ESCAPE_SWITCH;
264 
265                                 case 'n':
266                                     c = '\n';
267                                     width = 2;
268                                     break ESCAPE_SWITCH;
269 
270 
271                                 default:
272                                     c = c2;
273                                     width = 2;
274                                     break ESCAPE_SWITCH;
275                             }
276                             break C1_SWITCH;
277                         }
278 
279                         default:
280                         {
281                             if( c1 == watchFor )
282                             {
283                                 boolean atEnd = true;
284                                 for( int j = 1; j < delimiter.length(); j++ )
285                                 {
286                                     if( source.la(offset+j) != delimiter.charAt(j) )
287                                     {
288                                         atEnd = false;
289                                         break;
290                                     }
291                                 }
292 
293                                 if( atEnd )
294                                 {
295                                     return CharStream.EOS;
296                                 }
297                             }
298 
299                             c = c1;
300                             width = 1;
301                             break C1_SWITCH;
302                         }
303                     }
304 
305 
306                     characters[lookahead] = c;
307                     widths[lookahead]     = width;
308 
309                     offset += width;
310                     lookahead += 1;
311                 }
312 
313                 return c;                                         // <<< FLOW CONTROL <<<<<<<<<
314             }
315 
316             lookahead = 0;
317             return source.la(k);
318         }
319 
320         return CharStream.EOS;
321 
322     }
323 
324 
325 
326    /***
327     *  Eats a character from the input stream.  Searches for the delimiter if
328     *  delimited.  Note that turning delimiting on also checks if we are at the
329     *  delimiter, so if we aren't finished, there is something to consume.
330     */
331 
332     public char consume() throws LexerException, ReadException
333     {
334         if( !finished && source != null )
335         {
336             char c = CharStream.EOS;
337 
338             if( delimited )
339             {
340                 if( lookahead < 1 )
341                 {
342                     la( 1 );
343                 }
344 
345                 if( lookahead >= 1 )
346                 {
347                     c = characters[0];
348                     for( int i = 0; i < widths[0]; i++ )
349                     {
350                         source.consume();
351                     }
352 
353                     lookahead = 0;
354                 }
355 
356                 if( la(1) == CharStream.EOS )
357                 {
358                     finishUp();
359                 }
360             }
361             else
362             {
363                 c = source.consume();
364             }
365 
366             lookahead = 0;
367             return c;
368         }
369 
370         return CharStream.EOS;
371     }
372 
373 
374 
375    /***
376     *  Eats our delimiter from the stream and marks us finished.
377     */
378 
379     protected void finishUp() throws LexerException, ReadException
380     {
381         for( int i = 0; i < delimiter.length(); i++ )
382         {
383             char c = source.la(1);
384             if( c == CharStream.EOS )
385             {
386                 throw new UnterminatedStringLiteralException(getStartLine(), getStartColumn());
387             }
388             else if( c == delimiter.charAt(i) )
389             {
390                 source.consume();
391             }
392             else
393             {
394                 throw new GroovyBugError( "la() said delimiter [" + delimiter + "], finishUp() found [" + c + "]" );
395             }
396         }
397 
398         finish();
399     }
400 
401 }