package com.digitalpebble.stormcrawler.parse.filter;

import com.digitalpebble.stormcrawler.parse.ParseData;
import com.digitalpebble.stormcrawler.parse.ParseFilter;
import com.digitalpebble.stormcrawler.parse.ParseResult;
import java.io.ByteArrayInputStream;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/* loaded from: input_file:com/digitalpebble/stormcrawler/parse/filter/SubDocumentsParseFilter.class */
public class SubDocumentsParseFilter extends ParseFilter {
    private static final Logger LOG = LoggerFactory.getLogger(SubDocumentsParseFilter.class);

    public void filter(String str, byte[] bArr, DocumentFragment documentFragment, ParseResult parseResult) {
        try {
            Element documentElement = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new ByteArrayInputStream(bArr)).getDocumentElement();
            XPath newXPath = XPathFactory.newInstance().newXPath();
            NodeList nodeList = (NodeList) newXPath.compile("//url").evaluate(documentElement, XPathConstants.NODESET);
            for (int i = 0; i < nodeList.getLength(); i++) {
                Node item = nodeList.item(i);
                ParseData parseData = parseResult.get(((Node) newXPath.compile("loc").evaluate(item, XPathConstants.NODE)).getTextContent());
                NodeList childNodes = item.getChildNodes();
                for (int i2 = 0; i2 < childNodes.getLength(); i2++) {
                    Node item2 = childNodes.item(i2);
                    parseData.put(item2.getNodeName(), item2.getTextContent());
                }
            }
        } catch (Exception e) {
            LOG.error("Error processing sitemap from {}: {}", str, e);
        }
    }

    public boolean needsDOM() {
        return true;
    }
}
