package com.digitalpebble.stormcrawler.parse.filter;

import com.digitalpebble.stormcrawler.Metadata;
import com.digitalpebble.stormcrawler.filtering.URLFilters;
import com.digitalpebble.stormcrawler.parse.Outlink;
import com.digitalpebble.stormcrawler.parse.ParseResult;
import com.digitalpebble.stormcrawler.parse.filter.XPathFilter;
import com.digitalpebble.stormcrawler.util.MetadataTransfer;
import com.digitalpebble.stormcrawler.util.URLUtil;
import com.fasterxml.jackson.databind.JsonNode;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.DocumentFragment;

/* loaded from: input_file:com/digitalpebble/stormcrawler/parse/filter/LinkParseFilter.class */
public class LinkParseFilter extends XPathFilter {
    private static final Logger LOG = LoggerFactory.getLogger(LinkParseFilter.class);
    private MetadataTransfer metadataTransfer;
    private URLFilters urlFilters;

    @Override // com.digitalpebble.stormcrawler.parse.filter.XPathFilter, com.digitalpebble.stormcrawler.parse.ParseFilter
    public void filter(String str, byte[] bArr, DocumentFragment documentFragment, ParseResult parseResult) {
        Metadata metadata = parseResult.get(str).getMetadata();
        HashMap hashMap = new HashMap();
        for (Outlink outlink : parseResult.getOutlinks()) {
            hashMap.put(outlink.getTargetURL(), outlink);
        }
        try {
            URL url = new URL(str);
            Iterator<List<XPathFilter.LabelledExpression>> it = this.expressions.values().iterator();
            while (it.hasNext()) {
                for (XPathFilter.LabelledExpression labelledExpression : it.next()) {
                    try {
                        List<String> evaluate = labelledExpression.evaluate(documentFragment);
                        if (evaluate != null && !evaluate.isEmpty()) {
                            Iterator<String> it2 = evaluate.iterator();
                            while (it2.hasNext()) {
                                String filter = this.urlFilters.filter(url, metadata, URLUtil.resolveURL(url, it2.next()).toExternalForm());
                                if (filter != null && !hashMap.containsKey(filter)) {
                                    Outlink outlink2 = new Outlink(filter);
                                    outlink2.setMetadata(this.metadataTransfer.getMetaForOutlink(filter, str, metadata));
                                    hashMap.put(outlink2.getTargetURL(), outlink2);
                                }
                            }
                        }
                    } catch (Exception e) {
                        LOG.error("Error evaluating {}: {}", labelledExpression.key, e);
                    }
                }
            }
            parseResult.setOutlinks(new ArrayList(hashMap.values()));
        } catch (MalformedURLException e2) {
            LOG.error("MalformedURLException on {}", str);
        }
    }

    @Override // com.digitalpebble.stormcrawler.parse.filter.XPathFilter, com.digitalpebble.stormcrawler.parse.ParseFilter
    public void configure(Map map, JsonNode jsonNode) {
        super.configure(map, jsonNode);
        this.metadataTransfer = MetadataTransfer.getInstance(map);
        this.urlFilters = URLFilters.fromConf(map);
    }
}
