package com.digitalpebble.storm.crawler.parse.filter;

import com.digitalpebble.storm.crawler.Metadata;
import com.digitalpebble.storm.crawler.parse.ParseFilter;
import com.digitalpebble.storm.crawler.parse.ParseResult;
import com.fasterxml.jackson.databind.JsonNode;
import java.io.IOException;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import javax.xml.namespace.QName;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.lang.StringUtils;
import org.apache.xml.serialize.OutputFormat;
import org.apache.xml.serialize.XMLSerializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/* loaded from: input_file:com/digitalpebble/storm/crawler/parse/filter/XPathFilter.class */
public class XPathFilter extends ParseFilter {
    private static final Logger LOG = LoggerFactory.getLogger(XPathFilter.class);
    private XPathFactory factory = XPathFactory.newInstance();
    private XPath xpath = this.factory.newXPath();
    private final Map<String, List<LabelledExpression>> expressions = new HashMap();

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/digitalpebble/storm/crawler/parse/filter/XPathFilter$EvalFunction.class */
    public enum EvalFunction {
        NONE,
        STRING,
        SERIALIZE;

        public QName getReturnType() {
            switch (this) {
                case STRING:
                    return XPathConstants.STRING;
                default:
                    return XPathConstants.NODESET;
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/digitalpebble/storm/crawler/parse/filter/XPathFilter$LabelledExpression.class */
    public class LabelledExpression {
        private String key;
        private EvalFunction evalFunction;
        private XPathExpression expression;

        private LabelledExpression(String str, String str2) throws XPathExpressionException {
            this.key = str;
            if (str2.startsWith("string(")) {
                this.evalFunction = EvalFunction.STRING;
            } else if (str2.startsWith("serialize(")) {
                str2 = str2.substring(10, str2.length() - 1);
                this.evalFunction = EvalFunction.SERIALIZE;
            } else {
                this.evalFunction = EvalFunction.NONE;
            }
            this.expression = XPathFilter.this.xpath.compile(str2);
        }

        /* JADX INFO: Access modifiers changed from: private */
        public List<String> evaluate(DocumentFragment documentFragment) throws XPathExpressionException, IOException {
            Object evaluate = this.expression.evaluate(documentFragment, this.evalFunction.getReturnType());
            LinkedList linkedList = new LinkedList();
            switch (this.evalFunction) {
                case STRING:
                    if (evaluate != null) {
                        linkedList.add(StringUtils.strip((String) evaluate));
                        break;
                    }
                    break;
                case SERIALIZE:
                    NodeList nodeList = (NodeList) evaluate;
                    StringWriter stringWriter = new StringWriter();
                    OutputFormat outputFormat = new OutputFormat("xhtml", (String) null, false);
                    outputFormat.setOmitXMLDeclaration(true);
                    XMLSerializer xMLSerializer = new XMLSerializer(stringWriter, outputFormat);
                    for (int i = 0; i < nodeList.getLength(); i++) {
                        Node item = nodeList.item(i);
                        switch (item.getNodeType()) {
                            case 1:
                                xMLSerializer.serialize((Element) item);
                                break;
                            case 3:
                                String textContent = item.getTextContent();
                                if (textContent.length() > 0) {
                                    linkedList.add(textContent);
                                    break;
                                } else {
                                    continue;
                                }
                            case 9:
                                xMLSerializer.serialize((Document) item);
                                break;
                            case 11:
                                xMLSerializer.serialize((DocumentFragment) item);
                                break;
                        }
                        String stringWriter2 = stringWriter.toString();
                        if (stringWriter2.length() > 0) {
                            linkedList.add(stringWriter2);
                        }
                        stringWriter.getBuffer().setLength(0);
                    }
                    break;
                default:
                    NodeList nodeList2 = (NodeList) evaluate;
                    for (int i2 = 0; i2 < nodeList2.getLength(); i2++) {
                        linkedList.add(StringUtils.strip(nodeList2.item(i2).getTextContent()));
                    }
                    break;
            }
            return linkedList;
        }
    }

    @Override // com.digitalpebble.storm.crawler.parse.ParseFilter
    public void filter(String str, byte[] bArr, DocumentFragment documentFragment, ParseResult parseResult) {
        Metadata metadata = parseResult.get(str).getMetadata();
        Iterator<List<LabelledExpression>> it = this.expressions.values().iterator();
        while (it.hasNext()) {
            Iterator<LabelledExpression> it2 = it.next().iterator();
            while (true) {
                if (it2.hasNext()) {
                    LabelledExpression next = it2.next();
                    try {
                        List evaluate = next.evaluate(documentFragment);
                        if (evaluate != null && !evaluate.isEmpty()) {
                            metadata.addValues(next.key, evaluate);
                            break;
                        }
                    } catch (IOException e) {
                        LOG.error("Error evaluating {}: {}", next.key, e);
                    } catch (XPathExpressionException e2) {
                        LOG.error("Error evaluating {}: {}", next.key, e2);
                    }
                }
            }
        }
    }

    @Override // com.digitalpebble.storm.crawler.parse.ParseFilter
    public void configure(Map map, JsonNode jsonNode) {
        Iterator fields = jsonNode.fields();
        while (fields.hasNext()) {
            Map.Entry entry = (Map.Entry) fields.next();
            String str = (String) entry.getKey();
            JsonNode jsonNode2 = (JsonNode) entry.getValue();
            if (jsonNode2.isArray()) {
                Iterator it = jsonNode2.iterator();
                while (it.hasNext()) {
                    addExpression(str, (JsonNode) it.next());
                }
            } else {
                addExpression(str, (JsonNode) entry.getValue());
            }
        }
    }

    private void addExpression(String str, JsonNode jsonNode) {
        String asText = jsonNode.asText();
        try {
            List<LabelledExpression> list = this.expressions.get(str);
            if (list == null) {
                list = new ArrayList();
                this.expressions.put(str, list);
            }
            list.add(new LabelledExpression(str, asText));
        } catch (XPathExpressionException e) {
            throw new RuntimeException("Can't compile expression : " + asText, e);
        }
    }

    @Override // com.digitalpebble.storm.crawler.parse.ParseFilter
    public boolean needsDOM() {
        return true;
    }
}
