package com.digitalpebble.stormcrawler.parse.filter;

import com.digitalpebble.stormcrawler.Metadata;
import com.digitalpebble.stormcrawler.parse.ParseFilter;
import com.digitalpebble.stormcrawler.parse.ParseResult;
import com.fasterxml.jackson.core.JsonPointer;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.Node;

/* loaded from: input_file:com/digitalpebble/stormcrawler/parse/filter/LDJsonParseFilter.class */
public class LDJsonParseFilter extends ParseFilter {
    public static final Logger LOG = LoggerFactory.getLogger(LDJsonParseFilter.class);
    private static XPathFactory factory = XPathFactory.newInstance();
    private static XPath xpath = factory.newXPath();
    private static ObjectMapper mapper = new ObjectMapper();
    private List<LabelledJsonPointer> expressions = new LinkedList();

    /* loaded from: input_file:com/digitalpebble/stormcrawler/parse/filter/LDJsonParseFilter$LabelledJsonPointer.class */
    class LabelledJsonPointer {
        String label;
        JsonPointer pointer;

        public LabelledJsonPointer(String str, JsonPointer jsonPointer) {
            this.label = str;
            this.pointer = jsonPointer;
        }

        public String toString() {
            return this.label + " => " + this.pointer.toString();
        }
    }

    @Override // com.digitalpebble.stormcrawler.parse.ParseFilter
    public void filter(String str, byte[] bArr, DocumentFragment documentFragment, ParseResult parseResult) {
        if (documentFragment == null) {
            return;
        }
        try {
            JsonNode filterJson = filterJson(documentFragment);
            if (filterJson == null) {
                return;
            }
            Metadata metadata = parseResult.get(str).getMetadata();
            for (LabelledJsonPointer labelledJsonPointer : this.expressions) {
                JsonNode at = filterJson.at(labelledJsonPointer.pointer);
                if (!at.isMissingNode()) {
                    metadata.addValue(labelledJsonPointer.label, at.asText());
                }
            }
        } catch (Exception e) {
            LOG.error("Exception caught when extracting json", e);
        }
    }

    public static JsonNode filterJson(DocumentFragment documentFragment) throws Exception {
        Node node = (Node) xpath.compile("//SCRIPT[@type=\"application/ld+json\"]").evaluate(documentFragment, XPathConstants.NODE);
        if (node == null) {
            return null;
        }
        return (JsonNode) mapper.readValue(node.getTextContent(), JsonNode.class);
    }

    @Override // com.digitalpebble.stormcrawler.util.Configurable
    public void configure(@NotNull Map<String, Object> map, @NotNull JsonNode jsonNode) {
        Iterator fields = jsonNode.fields();
        while (fields.hasNext()) {
            Map.Entry entry = (Map.Entry) fields.next();
            this.expressions.add(new LabelledJsonPointer((String) entry.getKey(), JsonPointer.valueOf(((JsonNode) entry.getValue()).asText())));
        }
    }

    @Override // com.digitalpebble.stormcrawler.parse.ParseFilter
    public boolean needsDOM() {
        return true;
    }
}
