1 package org.codehaus.groovy.sandbox.util;
2 import groovy.lang.GroovyObjectSupport;
3
4 import java.io.File;
5 import java.io.FileInputStream;
6 import java.io.IOException;
7 import java.io.InputStream;
8 import java.io.Reader;
9 import java.io.StringReader;
10 import java.security.AccessController;
11 import java.security.PrivilegedActionException;
12 import java.security.PrivilegedExceptionAction;
13 import java.util.HashMap;
14 import java.util.Iterator;
15 import java.util.LinkedList;
16 import java.util.List;
17 import java.util.Map;
18
19 import javax.xml.parsers.ParserConfigurationException;
20 import javax.xml.parsers.SAXParser;
21 import javax.xml.parsers.SAXParserFactory;
22
23 import org.xml.sax.Attributes;
24 import org.xml.sax.InputSource;
25 import org.xml.sax.SAXException;
26 import org.xml.sax.XMLReader;
27 import org.xml.sax.helpers.DefaultHandler;
28
29
30 public class XmlSlurper extends DefaultHandler {
31 private final XMLReader reader;
32 private List result = null;
33 private List body = null;
34 private final StringBuffer charBuffer = new StringBuffer();
35
36 public XmlSlurper() throws ParserConfigurationException, SAXException {
37 this(false, true);
38 }
39
40 public XmlSlurper(final boolean validating, final boolean namespaceAware) throws ParserConfigurationException, SAXException {
41 SAXParserFactory factory = null;
42
43 try {
44 factory = (SAXParserFactory) AccessController.doPrivileged(new PrivilegedExceptionAction() {
45 public Object run() throws ParserConfigurationException {
46 return SAXParserFactory.newInstance();
47 }
48 });
49 } catch (final PrivilegedActionException pae) {
50 final Exception e = pae.getException();
51
52 if (e instanceof ParserConfigurationException) {
53 throw (ParserConfigurationException) e;
54 } else {
55 throw new RuntimeException(e);
56 }
57 }
58 factory.setNamespaceAware(namespaceAware);
59 factory.setValidating(validating);
60
61 final SAXParser parser = factory.newSAXParser();
62 this.reader = parser.getXMLReader();
63 }
64
65 public XmlSlurper(final XMLReader reader) {
66 this.reader = reader;
67 }
68
69 public XmlSlurper(final SAXParser parser) throws SAXException {
70 this(parser.getXMLReader());
71 }
72
73 /***
74 * Parse the content of the specified input source into a List
75 */
76 public XmlList parse(final InputSource input) throws IOException, SAXException {
77 this.reader.setContentHandler(this);
78 this.reader.parse(input);
79
80 return (XmlList)this.result.get(0);
81 }
82
83 /***
84 * Parses the content of the given file as XML turning it into a List
85 */
86 public XmlList parse(final File file) throws IOException, SAXException {
87 final InputSource input = new InputSource(new FileInputStream(file));
88
89 input.setSystemId("file://" + file.getAbsolutePath());
90
91 return parse(input);
92
93 }
94
95 /***
96 * Parse the content of the specified input stream into a List.
97 * Note that using this method will not provide the parser with any URI
98 * for which to find DTDs etc
99 */
100 public XmlList parse(final InputStream input) throws IOException, SAXException {
101 return parse(new InputSource(input));
102 }
103
104 /***
105 * Parse the content of the specified reader into a List.
106 * Note that using this method will not provide the parser with any URI
107 * for which to find DTDs etc
108 */
109 public XmlList parse(final Reader in) throws IOException, SAXException {
110 return parse(new InputSource(in));
111 }
112
113 /***
114 * Parse the content of the specified URI into a List
115 */
116 public XmlList parse(final String uri) throws IOException, SAXException {
117 return parse(new InputSource(uri));
118 }
119
120 /***
121 * A helper method to parse the given text as XML
122 *
123 * @param text
124 * @return
125 */
126 public XmlList parseText(final String text) throws IOException, SAXException {
127 return parse(new StringReader(text));
128 }
129
130
131
132
133
134
135
136
137 public void startDocument() throws SAXException {
138 this.result = null;
139 this.body = new LinkedList();
140 this.charBuffer.setLength(0);
141 }
142
143
144
145
146 public void startElement(final String namespaceURI, final String localName, final String qName, final Attributes atts) throws SAXException {
147 addNonWhitespaceCdata();
148
149 final Map attributes = new HashMap();
150
151 for (int i = atts.getLength() - 1; i != -1; i--) {
152 if (atts.getURI(i).length() == 0) {
153 attributes.put(atts.getQName(i), atts.getValue(i));
154 } else {
155
156
157
158
159 attributes.put(atts.getLocalName(i), atts.getValue(i));
160 }
161
162 }
163
164 final List newBody = new LinkedList();
165
166 newBody.add(attributes);
167
168 newBody.add(this.body);
169
170 this.body = newBody;
171 }
172
173
174
175
176 public void characters(final char[] ch, final int start, final int length) throws SAXException {
177 this.charBuffer.append(ch, start, length);
178 }
179
180
181
182
183 public void endElement(final String namespaceURI, final String localName, final String qName) throws SAXException {
184 addNonWhitespaceCdata();
185
186 final List children = this.body;
187
188 final Map attributes = (Map)this.body.remove(0);
189
190 this.body = (List)this.body.remove(0);
191
192 if (namespaceURI.length() == 0) {
193 this.body.add(new XmlList(qName, attributes, children, namespaceURI));
194 } else {
195 this.body.add(new XmlList(localName, attributes, children, namespaceURI));
196 }
197 }
198
199
200
201
202 public void endDocument() throws SAXException {
203 this.result = this.body;
204 this.body = null;
205 }
206
207
208
209
210 /***
211 *
212 */
213 private void addNonWhitespaceCdata() {
214 if (this.charBuffer.length() != 0) {
215
216
217
218
219
220 final String cdata = this.charBuffer.toString();
221
222 this.charBuffer.setLength(0);
223 if (cdata.trim().length() != 0) {
224 this.body.add(cdata);
225 }
226 }
227 }
228 }
229
230 class XmlList extends GroovyObjectSupport {
231 final String name;
232 final Map attributes;
233 final Object[] children;
234 final String namespaceURI;
235
236 public XmlList(final String name, final Map attributes, final List body, final String namespaceURI) {
237 super();
238
239 this.name = name;
240 this.attributes = attributes;
241 this.children = body.toArray();
242 this.namespaceURI = namespaceURI;
243 }
244
245 public Object getProperty(final String elementName) {
246 if (elementName.startsWith("@")) {
247 return this.attributes.get(elementName.substring(1));
248 } else {
249 final int indexOfFirst = getNextXmlElement(elementName, -1);
250
251 if (indexOfFirst == -1) {
252 return new ElementCollection() {
253 protected ElementCollection getResult(final String property) {
254 return this;
255 }
256
257 /***
258 *
259 * Used by the Invoker when it wants to iterate over this object
260 *
261 * @return
262 */
263 public ElementIterator iterator() {
264 return new ElementIterator(new XmlList[]{XmlList.this}, new int[]{-1}) {
265 {
266 findNextChild();
267 }
268
269 protected void findNextChild() {
270 this.nextParentElements[0] = -1;
271 }
272 };
273 }
274 };
275 }
276
277 if (getNextXmlElement(elementName, indexOfFirst) == -1) {
278 return this.children[indexOfFirst];
279 } else {
280 return new ElementCollection() {
281 protected ElementCollection getResult(final String property) {
282 return new ComplexElementCollection(new XmlList[]{XmlList.this},
283 new int[] {indexOfFirst},
284 new String[] {elementName},
285 property);
286 }
287
288 /***
289 *
290 * Used by the Invoker when it wants to iterate over this object
291 *
292 * @return
293 */
294 public ElementIterator iterator() {
295 return new ElementIterator(new XmlList[]{XmlList.this}, new int[]{indexOfFirst}) {
296 protected void findNextChild() {
297 this.nextParentElements[0] = XmlList.this.getNextXmlElement(elementName, this.nextParentElements[0]);
298 }
299 };
300 }
301 };
302 }
303 }
304 }
305
306 public Object getAt(final int index) {
307 if (index == 0) {
308 return this;
309 } else {
310 throw new ArrayIndexOutOfBoundsException(index);
311 }
312 }
313
314 public int size() {
315 return 1;
316 }
317
318 public Object invokeMethod(final String name, final Object args) {
319 if ("attributes".equals(name)) {
320 return this.attributes;
321 } else if ("name".equals(name)) {
322 return this.name;
323 } else if ("children".equals(name)) {
324 return this.children;
325 } else if ("text".equals(name)) {
326 final StringBuffer buff = new StringBuffer();
327
328 for (int i = 0; i != this.children.length; i++) {
329 final Object child = this.children[i];
330
331 if (child instanceof String) {
332 buff.append(child);
333 }
334 }
335
336 return buff.toString();
337 } else if ("getAt".equals(name) && ((Object[])args)[0] instanceof String) {
338 return getProperty((String)((Object[])args)[0]);
339 } else if ("depthFirst".equals(name)) {
340
341
342
343
344 return new GroovyObjectSupport() {
345 public Object invokeMethod(final String name, final Object args) {
346 if ("getAt".equals(name) && ((Object[])args)[0] instanceof String) {
347 return getProperty((String)((Object[])args)[0]);
348 } else {
349 return XmlList.this.invokeMethod(name, args);
350 }
351 }
352
353 public Object getProperty(final String property) {
354 if (property.startsWith("@")) {
355 return XmlList.this.getProperty(property);
356 } else {
357 final List result = new LinkedList();
358
359 depthFirstGetProperty(property, XmlList.this.children, result);
360
361 return result;
362 }
363 }
364
365 private void depthFirstGetProperty(final String property, final Object[] contents, final List result) {
366 for (int i = 0; i != contents.length; i++) {
367 final Object item = contents[i];
368
369 if (item instanceof XmlList) {
370 if (((XmlList)item).name.equals(property)) {
371 result.add(item);
372 }
373
374 depthFirstGetProperty(property, ((XmlList)item).children, result);
375 }
376 }
377 }
378 };
379 } else {
380 return getMetaClass().invokeMethod(this, name, args);
381 }
382 }
383
384 protected int getNextXmlElement(final String name, final int lastFound) {
385 for (int i = lastFound + 1; i < this.children.length; i++) {
386 final Object item = this.children[i];
387
388 if (item instanceof XmlList && ((XmlList)item).name.equals(name)) {
389 return i;
390 }
391 }
392
393 return -1;
394 }
395 }
396
397 abstract class ElementIterator implements Iterator {
398 protected final XmlList[] parents;
399 protected final int[] nextParentElements;
400
401 protected ElementIterator(final XmlList[] parents, int[] nextParentElements) {
402 this.parents = new XmlList[parents.length];
403 System.arraycopy(parents, 0, this.parents, 0, parents.length);
404
405 this.nextParentElements = new int[nextParentElements.length];
406 System.arraycopy(nextParentElements, 0, this.nextParentElements, 0, nextParentElements.length);
407 }
408
409
410
411
412 public boolean hasNext() {
413 return this.nextParentElements[0] != -1;
414 }
415
416
417
418
419 public Object next() {
420 final Object result = this.parents[0].children[this.nextParentElements[0]];
421
422 findNextChild();
423
424 return result;
425 }
426
427
428
429
430 public void remove() {
431 throw new UnsupportedOperationException();
432 }
433
434 protected abstract void findNextChild();
435 }
436
437 abstract class ElementCollection extends GroovyObjectSupport {
438 private int count = -1;
439
440 public abstract ElementIterator iterator();
441
442
443
444
445 public Object getProperty(final String property) {
446 final ElementCollection result = getResult(property);
447 final Iterator iterator = result.iterator();
448
449 if (iterator.hasNext()) {
450
451
452
453 final Object first = iterator.next();
454
455 if (!iterator.hasNext()) {
456 return first;
457 }
458 }
459
460 return result;
461 }
462
463 protected abstract ElementCollection getResult(String property);
464
465 public synchronized int size() {
466 if (this.count == -1) {
467 final Iterator iter = iterator();
468
469 this.count = 0;
470
471 while (iter.hasNext()) {
472 this.count++;
473 iter.next();
474 }
475 }
476 return this.count;
477 }
478 }
479
480 class ComplexElementCollection extends ElementCollection {
481 private final XmlList[] parents;
482 private final int[] nextParentElements;
483 private final String[] parentElementNames;
484
485 public ComplexElementCollection(final XmlList[] parents,
486 final int[] nextParentElements,
487 final String[] parentElementNames,
488 final String childElementName)
489 {
490 this.parents = new XmlList[parents.length + 1];
491 this.parents[0] = (XmlList)parents[0].children[nextParentElements[0]];
492 System.arraycopy(parents, 0, this.parents, 1, parents.length);
493
494 this.nextParentElements = new int[nextParentElements.length + 1];
495 this.nextParentElements[0] = -1;
496 System.arraycopy(nextParentElements, 0, this.nextParentElements, 1, nextParentElements.length);
497
498 this.parentElementNames = new String[parentElementNames.length + 1];
499 this.parentElementNames[0] = childElementName;
500 System.arraycopy(parentElementNames, 0, this.parentElementNames, 1, parentElementNames.length);
501
502
503
504
505
506 final ElementIterator iter = this.iterator();
507
508 iter.findNextChild();
509
510 this.nextParentElements[0] = iter.nextParentElements[0];
511 }
512
513 protected ElementCollection getResult(final String property) {
514 return new ComplexElementCollection(this.parents,
515 this.nextParentElements,
516 this.parentElementNames,
517 property);
518 }
519
520 /***
521 *
522 * Used by the Invoker when it wants to iterate over this object
523 *
524 * @return
525 */
526 public ElementIterator iterator() {
527 return new ElementIterator(this.parents, this.nextParentElements) {
528 protected void findNextChild() {
529 this.nextParentElements[0] = this.parents[0].getNextXmlElement(ComplexElementCollection.this.parentElementNames[0], this.nextParentElements[0]);
530
531 while (this.nextParentElements[0] == -1) {
532 this.parents[0] = findNextParent(1);
533
534 if (this.parents[0] == null) {
535 return;
536 } else {
537 this.nextParentElements[0] = this.parents[0].getNextXmlElement(ComplexElementCollection.this.parentElementNames[0], -1);
538 }
539 }
540 }
541
542 private XmlList findNextParent(final int i) {
543 if (i == this.nextParentElements.length) return null;
544
545 this.nextParentElements[i] = this.parents[i].getNextXmlElement(ComplexElementCollection.this.parentElementNames[i], this.nextParentElements[i]);
546
547 while (this.nextParentElements[i] == -1) {
548 this.parents[i] = findNextParent(i + 1);
549
550 if (this.parents[i] == null) {
551 return null;
552 } else {
553 this.nextParentElements[i] = this.parents[i].getNextXmlElement(ComplexElementCollection.this.parentElementNames[i], -1);
554 }
555 }
556
557 return (XmlList)this.parents[i].children[this.nextParentElements[i]];
558 }
559 };
560 }
561 }