View Javadoc

1   package org.codehaus.groovy.syntax.lexer;
2   
3   //{{{ imports
4   import org.codehaus.groovy.syntax.ReadException;
5   import org.codehaus.groovy.syntax.Token;
6   import org.codehaus.groovy.GroovyBugError;
7   //}}}
8   
9   /***
10   *  A Lexer for processing standard strings.
11   *
12   *  @author Chris Poirier
13   */
14  
15  public class StringLexer extends TextLexerBase
16  {
17  
18      protected String  delimiter = null;
19      protected char    watchFor;
20      protected boolean allowGStrings = false;
21      protected boolean emptyString   = true;   // If set, we need to send an empty string
22  
23  
24     /***
25      *  If set true, the filter will allow // and \$ to pass through unchanged.
26      *  You should set this appropriately BEFORE setting source!
27      */
28  
29      public void allowGStrings( boolean allow )
30      {
31          allowGStrings = allow;
32      }
33  
34  
35  
36     /***
37      *  Returns a single STRING, then null.   The STRING is all of the processed
38      *  input.  Backslashes are stripped, with the \r, \n, and \t converted
39      *  appropriately.
40      */
41  
42      public Token undelegatedNextToken( ) throws ReadException, LexerException
43      {
44          if( emptyString )
45          {
46              emptyString = false;
47              return Token.newString( "", getStartLine(), getStartColumn() );
48          }
49          else if( finished )
50          {
51              return null;
52          }
53          else
54          {
55              StringBuffer string = new StringBuffer();
56  
57              while( la(1) != CharStream.EOS )
58              {
59                  string.append( consume() );
60              }
61  
62              return Token.newString( string.toString(), getStartLine(), getStartColumn() );
63          }
64      }
65  
66  
67  
68     /***
69      *  Controls delimiter search.  When turned on, the first thing we do
70      *  is check for and eat our delimiter.
71      */
72  
73      public void delimit( boolean delimit )
74      {
75          super.delimit( delimit );
76  
77          if( delimit )
78          {
79              try
80              {
81                  if( !finished && la(1) == CharStream.EOS )
82                  {
83                      finishUp();
84  
85                      //
86                      // The GStringLexer will correctly handle the empty string.
87                      // We don't.  In order to ensure that an empty string is
88                      // supplied, we set a flag that is checked during
89                      // undelegatedNextToken().
90  
91                      if( !allowGStrings )
92                      {
93                          emptyString = true;
94                      }
95                  }
96              }
97              catch( Exception e )
98              {
99                  finished = true;
100             }
101         }
102     }
103 
104 
105 
106 
107    /***
108     *  Sets the source lexer and identifies and consumes the opening delimiter.
109     */
110 
111     public void setSource( Lexer source )
112     {
113         super.setSource( source );
114 
115         emptyString = false;
116 
117         try
118         {
119             char c = source.la();
120             switch( c )
121             {
122                 case '\'':
123                 case '"':
124                     mark();
125                     source.consume();
126 
127                     if( source.la() == c && source.la(2) == c )
128                     {
129                         source.consume(); source.consume();
130                         delimiter = new StringBuffer().append(c).append(c).append(c).toString();
131                     }
132                     else
133                     {
134                         delimiter = new StringBuffer().append(c).toString();
135                     }
136 
137                     watchFor = delimiter.charAt(0);
138                     break;
139 
140 
141                 default:
142                 {
143                     throw new GroovyBugError( "at the time of StringLexer.setSource(), the source must be on a single or double quote" );
144                 }
145             }
146 
147             restart();
148             delimit( true );
149         }
150         catch( Exception e )
151         {
152             //
153             // If we couldn't read our delimiter, we'll just
154             // cancel our source.  nextToken() will return null.
155 
156             e.printStackTrace();
157             unsetSource( );
158         }
159     }
160 
161 
162 
163    /***
164     *  Unsets our source.
165     */
166 
167     public void unsetSource()
168     {
169         super.unsetSource();
170         delimiter   = null;
171         finished    = true;
172         emptyString = false;
173     }
174 
175 
176 
177 
178   //---------------------------------------------------------------------------
179   // STREAM ROUTINES
180 
181     private int    lookahead  = 0;             // the number of characters identified
182     private char[] characters = new char[3];   // the next characters identified by la()
183     private int[]  widths     = new int[3];    // the source widths of the next characters
184 
185 
186 
187    /***
188     *  Returns the next <code>k</code>th character, without consuming any.
189     */
190 
191     public char la(int k) throws LexerException, ReadException
192     {
193 
194         if( !finished && source != null )
195         {
196 
197             if( delimited )
198             {
199 
200                 if( k > characters.length )
201                 {
202                     throw new GroovyBugError( "StringLexer lookahead tolerance exceeded" );
203                 }
204 
205                 if( lookahead >= k )
206                 {
207                     return characters[k-1];
208                 }
209 
210                 lookahead = 0;
211 
212                 char c = ' ', c1 = ' ', c2 = ' ';
213                 int offset = 1, width = 0;
214                 for( int i = 1; i <= k; i++ )
215                 {
216                     c1 = source.la(offset);
217                     C1_SWITCH: switch( c1 )
218                     {
219                         case CharStream.EOS:
220                         {
221                             return c1;
222                         }
223 
224                         case '//':
225                         {
226                             c2 = source.la( offset + 1 );
227 
228                             ESCAPE_SWITCH: switch( c2 )
229                             {
230 
231                                 case CharStream.EOS:
232                                     return c2;
233 
234                                 case '//':
235                                 case '$':
236                                 {
237                                     if( allowGStrings )
238                                     {
239                                         c = c1;
240                                         width = 1;
241                                     }
242                                     else
243                                     {
244                                         c = c2;
245                                         width = 2;
246                                     }
247                                     break ESCAPE_SWITCH;
248                                 }
249 
250                                 case 'r':
251                                     c = '\r';
252                                     width = 2;
253                                     break ESCAPE_SWITCH;
254 
255                                 case 't':
256                                     c = '\t';
257                                     width = 2;
258                                     break ESCAPE_SWITCH;
259 
260                                 case 'n':
261                                     c = '\n';
262                                     width = 2;
263                                     break ESCAPE_SWITCH;
264 
265 
266                                 default:
267                                     c = c2;
268                                     width = 2;
269                                     break ESCAPE_SWITCH;
270                             }
271                             break C1_SWITCH;
272                         }
273 
274                         default:
275                         {
276                             if( c1 == watchFor )
277                             {
278                                 boolean atEnd = true;
279                                 for( int j = 1; j < delimiter.length(); j++ )
280                                 {
281                                     if( source.la(offset+j) != delimiter.charAt(j) )
282                                     {
283                                         atEnd = false;
284                                         break;
285                                     }
286                                 }
287 
288                                 if( atEnd )
289                                 {
290                                     return CharStream.EOS;
291                                 }
292                             }
293 
294                             c = c1;
295                             width = 1;
296                             break C1_SWITCH;
297                         }
298                     }
299 
300 
301                     characters[lookahead] = c;
302                     widths[lookahead]     = width;
303 
304                     offset += width;
305                     lookahead += 1;
306                 }
307 
308                 return c;                                         // <<< FLOW CONTROL <<<<<<<<<
309             }
310 
311             lookahead = 0;
312             return source.la(k);
313         }
314 
315         return CharStream.EOS;
316 
317     }
318 
319 
320 
321    /***
322     *  Eats a character from the input stream.  Searches for the delimiter if
323     *  delimited.  Note that turning delimiting on also checks if we are at the
324     *  delimiter, so if we aren't finished, there is something to consume.
325     */
326 
327     public char consume() throws LexerException, ReadException
328     {
329         if( !finished && source != null )
330         {
331             char c = CharStream.EOS;
332 
333             if( delimited )
334             {
335                 if( lookahead < 1 )
336                 {
337                     la( 1 );
338                 }
339 
340                 if( lookahead >= 1 )
341                 {
342                     c = characters[0];
343                     for( int i = 0; i < widths[0]; i++ )
344                     {
345                         source.consume();
346                     }
347 
348                     lookahead = 0;
349                 }
350 
351                 if( la(1) == CharStream.EOS )
352                 {
353                     finishUp();
354                 }
355             }
356             else
357             {
358                 c = source.consume();
359             }
360 
361             lookahead = 0;
362             return c;
363         }
364 
365         return CharStream.EOS;
366     }
367 
368 
369 
370    /***
371     *  Eats our delimiter from the stream and marks us finished.
372     */
373 
374     protected void finishUp() throws LexerException, ReadException
375     {
376         for( int i = 0; i < delimiter.length(); i++ )
377         {
378             char c = source.la(1);
379             if( c == CharStream.EOS )
380             {
381                 throw new UnterminatedStringLiteralException(getStartLine(), getStartColumn());
382             }
383             else if( c == delimiter.charAt(i) )
384             {
385                 source.consume();
386             }
387             else
388             {
389                 throw new GroovyBugError( "la() said delimiter [" + delimiter + "], finishUp() found [" + c + "]" );
390             }
391         }
392 
393         finish();
394     }
395 
396 }