View Javadoc

1   package org.codehaus.groovy.sandbox.util;
2   import groovy.lang.GroovyObjectSupport;
3   
4   import java.io.File;
5   import java.io.FileInputStream;
6   import java.io.IOException;
7   import java.io.InputStream;
8   import java.io.Reader;
9   import java.io.StringReader;
10  import java.security.AccessController;
11  import java.security.PrivilegedActionException;
12  import java.security.PrivilegedExceptionAction;
13  import java.util.HashMap;
14  import java.util.Iterator;
15  import java.util.LinkedList;
16  import java.util.List;
17  import java.util.Map;
18  
19  import javax.xml.parsers.ParserConfigurationException;
20  import javax.xml.parsers.SAXParser;
21  import javax.xml.parsers.SAXParserFactory;
22  
23  import org.xml.sax.Attributes;
24  import org.xml.sax.InputSource;
25  import org.xml.sax.SAXException;
26  import org.xml.sax.XMLReader;
27  import org.xml.sax.helpers.DefaultHandler;
28  
29  
30  public class XmlSlurper extends DefaultHandler {
31      private final XMLReader reader;
32  	private List result = null;
33  	private List body = null;
34  	private final StringBuffer charBuffer = new StringBuffer();
35  
36      public XmlSlurper() throws ParserConfigurationException, SAXException {
37          this(false, true);
38      }
39  
40      public XmlSlurper(final boolean validating, final boolean namespaceAware) throws ParserConfigurationException, SAXException {
41          SAXParserFactory factory = null;
42          
43  	    	try {
44  				factory = (SAXParserFactory) AccessController.doPrivileged(new PrivilegedExceptionAction() {
45  					public Object run() throws ParserConfigurationException {
46  						return SAXParserFactory.newInstance();
47  					}
48  				});
49  	    	} catch (final PrivilegedActionException pae) {
50  	    	final Exception e = pae.getException();
51  	    		
52  	    		if (e instanceof ParserConfigurationException) {
53  	    			throw (ParserConfigurationException) e;
54  	    		} else {
55  	    			throw new RuntimeException(e);
56  	    		}
57  	    	}
58          factory.setNamespaceAware(namespaceAware);
59          factory.setValidating(validating);
60  
61          final SAXParser parser = factory.newSAXParser();
62          this.reader = parser.getXMLReader();
63      }
64  
65      public XmlSlurper(final XMLReader reader) {
66          this.reader = reader;
67      }
68  
69      public XmlSlurper(final SAXParser parser) throws SAXException {
70          this(parser.getXMLReader());
71      }
72  
73      /***
74       * Parse the content of the specified input source into a List
75       */
76      public XmlList parse(final InputSource input) throws IOException, SAXException {
77      		this.reader.setContentHandler(this);
78      		this.reader.parse(input);
79          
80          return (XmlList)this.result.get(0);
81      }
82      
83      /***
84       * Parses the content of the given file as XML turning it into a List
85       */
86      public XmlList parse(final File file) throws IOException, SAXException {
87      final InputSource input = new InputSource(new FileInputStream(file));
88      
89          input.setSystemId("file://" + file.getAbsolutePath());
90          
91          return parse(input);
92  
93      }
94  
95      /***
96       * Parse the content of the specified input stream into a List.
97       * Note that using this method will not provide the parser with any URI
98       * for which to find DTDs etc
99       */
100     public XmlList parse(final InputStream input) throws IOException, SAXException {
101         return parse(new InputSource(input));
102     }
103 
104     /***
105      * Parse the content of the specified reader into a List.
106      * Note that using this method will not provide the parser with any URI
107      * for which to find DTDs etc
108      */
109     public XmlList parse(final Reader in) throws IOException, SAXException {
110         return parse(new InputSource(in));
111     }
112 
113     /***
114      * Parse the content of the specified URI into a List
115      */
116     public XmlList parse(final String uri) throws IOException, SAXException {
117         return parse(new InputSource(uri));
118     }
119 
120     /***
121      * A helper method to parse the given text as XML
122      * 
123      * @param text
124      * @return
125      */
126     public XmlList parseText(final String text) throws IOException, SAXException {
127         return parse(new StringReader(text));
128     }
129     
130 
131     // ContentHandler interface
132     //-------------------------------------------------------------------------                    
133 	
134 	/* (non-Javadoc)
135 	 * @see org.xml.sax.ContentHandler#startDocument()
136 	 */
137 	public void startDocument() throws SAXException {
138 		this.result = null;
139 		this.body = new LinkedList();
140 		this.charBuffer.setLength(0);
141 	}
142 	
143 	/* (non-Javadoc)
144 	 * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
145 	 */
146 	public void startElement(final String namespaceURI, final String localName, final String qName, final Attributes atts) throws SAXException {
147 		addNonWhitespaceCdata();
148 		
149 		final Map attributes = new HashMap();
150 		
151 		for (int i = atts.getLength() - 1; i != -1; i--) {
152 			if (atts.getURI(i).length() == 0) {
153 				attributes.put(atts.getQName(i), atts.getValue(i));
154 			} else {
155 				//
156 				// Note this is strictly incorrect the name is really localname + URI
157 				// We need to figure out what to do with paramenters in namespaces
158 				//
159 				attributes.put(atts.getLocalName(i), atts.getValue(i));
160 			}
161 			
162 		}
163 		
164 		final List newBody = new LinkedList();
165 
166 		newBody.add(attributes);
167 		
168 		newBody.add(this.body);
169 
170 		this.body = newBody;
171 	}
172 
173 	/* (non-Javadoc)
174 	 * @see org.xml.sax.ContentHandler#characters(char[], int, int)
175 	 */
176 	public void characters(final char[] ch, final int start, final int length) throws SAXException {
177 		this.charBuffer.append(ch, start, length);
178 	}
179 	
180 	/* (non-Javadoc)
181 	 * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
182 	 */
183 	public void endElement(final String namespaceURI, final String localName, final String qName) throws SAXException {
184 		addNonWhitespaceCdata();
185 		
186 		final List children = this.body;
187 		
188 		final Map attributes = (Map)this.body.remove(0);
189 		
190 		this.body = (List)this.body.remove(0);
191 		
192 		if (namespaceURI.length() == 0) {
193 			this.body.add(new XmlList(qName, attributes, children, namespaceURI));
194 		} else {
195 			this.body.add(new XmlList(localName, attributes, children, namespaceURI));
196 		}
197 	}
198 	
199 	/* (non-Javadoc)
200 	 * @see org.xml.sax.ContentHandler#endDocument()
201 	 */
202 	public void endDocument() throws SAXException {
203 		this.result = this.body;
204 		this.body = null;
205 	}
206 
207     // Implementation methods
208     //-------------------------------------------------------------------------           
209 
210 	/***
211 	 * 
212 	 */
213 	private void addNonWhitespaceCdata() {
214 		if (this.charBuffer.length() != 0) {
215 			//
216 			// This element is preceeded by CDATA if it's not whitespace add it to the body
217 			// Note that, according to the XML spec, we should preserve the CDATA if it's all whitespace
218 			// but for the sort of work I'm doing ignoring the whitespace is preferable
219 			//
220 			final String cdata = this.charBuffer.toString();
221 			
222 			this.charBuffer.setLength(0);
223 			if (cdata.trim().length() != 0) {
224 				this.body.add(cdata);
225 			}
226 		}		
227 	}
228 }
229 
230 class XmlList extends GroovyObjectSupport {
231 	final String name;
232 	final Map attributes;
233 	final Object[] children;
234 	final String namespaceURI;
235 	
236     public XmlList(final String name, final Map attributes, final List body, final String namespaceURI) {
237         super();
238         
239         this.name = name;
240         this.attributes = attributes;
241         this.children = body.toArray();
242         this.namespaceURI = namespaceURI;
243     }
244     
245     public Object getProperty(final String elementName) {
246 	    	if (elementName.startsWith("@")) {
247 	    		return this.attributes.get(elementName.substring(1));
248 	    	} else {
249 	    	final int indexOfFirst = getNextXmlElement(elementName, -1);
250 	    	
251 	    		if (indexOfFirst == -1) { // no elements match the element name
252     				return new ElementCollection() {
253         				protected ElementCollection getResult(final String property) {
254         					return this;
255         				}
256 
257 	    	    	    		/***
258 	    	    	    		 * 
259 	    	    	    		 * Used by the Invoker when it wants to iterate over this object
260 	    	    	    		 * 
261 	    	    	    		 * @return
262 	    	    	    		 */
263 	    	    	    		public ElementIterator iterator() {
264 	    	    	    			return new ElementIterator(new XmlList[]{XmlList.this}, new int[]{-1}) {
265 	    	    	    				{
266 	    	    	    					findNextChild();		// set up the element indexes
267 	    	    	    				}
268 	    	    	    				
269 	    	        				protected void findNextChild() {
270 	    	        					this.nextParentElements[0] = -1;
271 	    	        				}
272 	    	    	    			};
273 	    	    	    		}
274     				};
275 	    		}
276 	    		
277     			if (getNextXmlElement(elementName, indexOfFirst) == -1) {	// one element matches the element name
278     				return this.children[indexOfFirst];
279     			} else {		// > 1 element matches the element name
280 	    	    		return new ElementCollection() {
281 	        				protected ElementCollection getResult(final String property) {
282 	        					return new ComplexElementCollection(new XmlList[]{XmlList.this},
283 	    							     						new int[] {indexOfFirst},
284 	    														new String[] {elementName},
285 	    														property);
286 	        				}
287 	
288 	    	    	    		/***
289 	    	    	    		 * 
290 	    	    	    		 * Used by the Invoker when it wants to iterate over this object
291 	    	    	    		 * 
292 	    	    	    		 * @return
293 	    	    	    		 */
294 	    	    	    		public ElementIterator iterator() {
295 	    	    	    			return new ElementIterator(new XmlList[]{XmlList.this}, new int[]{indexOfFirst}) {
296 	    	        				protected void findNextChild() {
297 	    	        					this.nextParentElements[0] = XmlList.this.getNextXmlElement(elementName, this.nextParentElements[0]);
298 	    	        				}
299 	    	    	    			};
300 	    	    	    		}
301 	    	    	    };
302     			}
303 	    	}
304     }
305     
306     public Object getAt(final int index) {
307     		if (index == 0) {
308     			return this;
309     		} else {
310     			throw new ArrayIndexOutOfBoundsException(index);
311     		}
312     	}
313     
314     public int size() {
315     		return 1;
316     }
317 
318     public Object invokeMethod(final String name, final Object args) {
319 		if ("attributes".equals(name)) {
320 			return this.attributes;
321 		} else if ("name".equals(name)) {
322 			return this.name;
323 		} else if ("children".equals(name)) {
324 			return this.children;
325 		} else if ("text".equals(name)) {
326 		final StringBuffer buff = new StringBuffer();
327 		
328 			for (int i = 0; i != this.children.length; i++) {
329 			final Object child = this.children[i];
330 			
331 				if (child instanceof String) {
332 					buff.append(child);
333 				}
334 			}	
335 		
336 			return buff.toString();
337 		} else if ("getAt".equals(name) && ((Object[])args)[0] instanceof String) {
338 			return getProperty((String)((Object[])args)[0]);
339 		} else if ("depthFirst".equals(name)) {
340 			//
341 			// TODO: replace this with an iterator
342 			//
343 			
344 			return new GroovyObjectSupport() {
345 				public Object invokeMethod(final String name, final Object args) {
346 					if ("getAt".equals(name) && ((Object[])args)[0] instanceof String) {
347 						return getProperty((String)((Object[])args)[0]);
348 					} else {
349 						return XmlList.this.invokeMethod(name, args);
350 					}
351 				}
352 				
353 				public Object getProperty(final String property) {
354 					if (property.startsWith("@")) {
355 						return XmlList.this.getProperty(property);
356 					} else {
357 					final List result = new LinkedList();
358 
359 						depthFirstGetProperty(property, XmlList.this.children, result);
360 						
361 						return result;
362 					}
363 				}
364 				
365 				private void depthFirstGetProperty(final String property, final Object[] contents, final List result) {
366 			    		for (int i = 0; i != contents.length; i++) {
367 			    		final Object item = contents[i];
368 			    		
369 			    			if (item instanceof XmlList) {
370 			    				if (((XmlList)item).name.equals(property)) {
371 			    					result.add(item);
372 			    				}
373 			    				
374 			    				depthFirstGetProperty(property, ((XmlList)item).children, result);
375 			    			}
376 					}
377 				}
378 			};
379     		} else {
380     			return getMetaClass().invokeMethod(this, name, args);
381     		}
382     }
383 
384     	protected int getNextXmlElement(final String name, final int lastFound) {
385     		for (int i = lastFound + 1; i < this.children.length; i++) {
386 	    	final Object item = this.children[i];
387 	    		
388 	    		if (item instanceof XmlList && ((XmlList)item).name.equals(name)) {
389 	    			return i;
390 	    		}
391 	    	}
392     		
393     		return -1;
394     	}
395 }
396 
397 abstract class ElementIterator implements Iterator {
398 	protected final XmlList[] parents;
399 	protected final int[] nextParentElements;
400 	
401 	protected ElementIterator(final XmlList[] parents, int[] nextParentElements) {
402 		this.parents = new XmlList[parents.length];
403 		System.arraycopy(parents, 0, this.parents, 0, parents.length);
404 		
405 		this.nextParentElements = new int[nextParentElements.length];
406 		System.arraycopy(nextParentElements, 0, this.nextParentElements, 0, nextParentElements.length);
407 	}
408 	
409 	/* (non-Javadoc)
410 	 * @see java.util.Enumeration#hasMoreElements()
411 	*/
412 	public boolean hasNext() {
413 		return this.nextParentElements[0] != -1;
414 	}
415 	
416 	/* (non-Javadoc)
417 	 * @see java.util.Enumeration#nextElement()
418 	 */
419 	public Object next() {
420 	final Object result = this.parents[0].children[this.nextParentElements[0]];
421 			
422 		findNextChild();
423 	
424 		return result;
425 	}
426 	
427 	/* (non-Javadoc)
428 	 * @see java.util.Iterator#remove()
429 	 */
430 	public void remove() {
431 		throw new UnsupportedOperationException();
432 	}
433 	
434 	protected abstract void findNextChild();
435 }
436 
437 abstract class ElementCollection extends GroovyObjectSupport {
438 	private int count = -1;
439 	
440 	public abstract ElementIterator iterator();
441 	
442 	/* (non-Javadoc)
443 	 * @see groovy.lang.GroovyObject#getProperty(java.lang.String)
444 	 */
445 	public Object getProperty(final String property) {
446 	final ElementCollection result = getResult(property);
447 	final Iterator iterator = result.iterator();
448 
449 		if (iterator.hasNext()) {				
450 			//
451 			// See if there's only one available
452 			//
453 			final Object first = iterator.next();
454 			
455 			if (!iterator.hasNext()) {
456 				return first;
457 			}
458 		}
459 		
460 		return result;
461 	}
462 	
463 	protected abstract ElementCollection getResult(String property);
464 	
465 	public synchronized int size() {
466 		if (this.count == -1) {
467 		final Iterator iter = iterator();
468 		
469 			this.count = 0;
470 			
471 			while (iter.hasNext()) {
472 				this.count++;
473 				iter.next();
474 			}
475 		}
476 		return this.count;
477 	}
478 }
479 
480 class ComplexElementCollection extends ElementCollection {
481 	private final XmlList[] parents;
482 	private final int[] nextParentElements;
483 	private final String[] parentElementNames;
484 	
485 	public ComplexElementCollection(final XmlList[] parents,
486               				  	  final int[] nextParentElements,
487 								  final String[] parentElementNames,
488 								  final String childElementName)
489 	{
490 		this.parents = new XmlList[parents.length + 1];
491 		this.parents[0] = (XmlList)parents[0].children[nextParentElements[0]];
492 		System.arraycopy(parents, 0, this.parents, 1, parents.length);
493 		
494 		this.nextParentElements = new int[nextParentElements.length + 1];
495 		this.nextParentElements[0] = -1;	
496 		System.arraycopy(nextParentElements, 0, this.nextParentElements, 1, nextParentElements.length);
497 		
498 		this.parentElementNames = new String[parentElementNames.length + 1];
499 		this.parentElementNames[0] = childElementName;
500 		System.arraycopy(parentElementNames, 0, this.parentElementNames, 1, parentElementNames.length);
501 		
502 		//
503 		// Use the iterator to get the index of the first elemeny
504 		//
505 		
506 		final ElementIterator iter = this.iterator();
507 		
508 		iter.findNextChild();
509 		
510 		this.nextParentElements[0] = iter.nextParentElements[0];
511 	}
512 	
513 	protected ElementCollection getResult(final String property) {
514 		return new ComplexElementCollection(this.parents,
515 				   							this.nextParentElements,
516 											this.parentElementNames,
517 											property);
518 	}
519 	
520 	/***
521 	 * 
522 	 * Used by the Invoker when it wants to iterate over this object
523 	 * 
524 	 * @return
525 	 */
526 	public ElementIterator iterator() {
527 		return new ElementIterator(this.parents, this.nextParentElements) {
528 						protected void findNextChild() {	
529 							this.nextParentElements[0] = this.parents[0].getNextXmlElement(ComplexElementCollection.this.parentElementNames[0], this.nextParentElements[0]);
530 							
531 							while (this.nextParentElements[0] == -1) {
532 								this.parents[0] = findNextParent(1);
533 								
534 								if (this.parents[0] == null) {
535 									return;
536 								} else {
537 									this.nextParentElements[0] = this.parents[0].getNextXmlElement(ComplexElementCollection.this.parentElementNames[0], -1);
538 								}
539 							}
540 						}
541 						
542 						private XmlList findNextParent(final int i) {
543 							if (i == this.nextParentElements.length) return null;
544 							
545 							this.nextParentElements[i] = this.parents[i].getNextXmlElement(ComplexElementCollection.this.parentElementNames[i], this.nextParentElements[i]);
546 							
547 							while (this.nextParentElements[i] == -1) {
548 								this.parents[i] = findNextParent(i + 1);
549 								
550 								if (this.parents[i] == null) {
551 									return null;
552 								} else {
553 									this.nextParentElements[i] = this.parents[i].getNextXmlElement(ComplexElementCollection.this.parentElementNames[i], -1);
554 								}
555 							}
556 						
557 							return (XmlList)this.parents[i].children[this.nextParentElements[i]];
558 						}
559 		};
560 	}
561 }