package im.nll.data.extractor.impl;

import im.nll.data.extractor.ListableExtractor;
import im.nll.data.extractor.annotation.Name;
import im.nll.data.extractor.exception.ExtractException;
import im.nll.data.extractor.utils.Logs;
import im.nll.data.extractor.utils.TypeUtils;
import java.util.ArrayList;
import java.util.List;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.PrettyHtmlSerializer;
import org.htmlcleaner.TagNode;
import org.slf4j.Logger;

@Name({"htmlcleaner"})
/* loaded from: input_file:im/nll/data/extractor/impl/HtmlCleanerExtractor.class */
public class HtmlCleanerExtractor implements ListableExtractor {
    private static final Logger logger = Logs.get();
    private String xpath;

    public HtmlCleanerExtractor(String str) {
        this.xpath = str;
    }

    @Override // im.nll.data.extractor.Extractor
    public String extract(String str) {
        String str2 = "";
        try {
            HtmlCleaner htmlCleaner = getHtmlCleaner();
            Object[] evaluateXPath = htmlCleaner.clean(str).evaluateXPath(this.xpath);
            if (evaluateXPath == null || evaluateXPath.length <= 0) {
                logger.warn("not found content,xpath:{}", this.xpath);
                logger.debug("content:{}", str);
            } else {
                str2 = wrap(evaluateXPath[0], htmlCleaner);
            }
            return str2;
        } catch (Exception e) {
            throw new ExtractException(e);
        }
    }

    @Override // im.nll.data.extractor.ListableExtractor
    public List<String> extractList(String str) {
        ArrayList arrayList = new ArrayList();
        try {
            HtmlCleaner htmlCleaner = getHtmlCleaner();
            Object[] evaluateXPath = htmlCleaner.clean(str).evaluateXPath(this.xpath);
            if (evaluateXPath == null || evaluateXPath.length <= 0) {
                logger.warn("not found content,xpath:{}", this.xpath);
                logger.debug("content:{}", str);
                return arrayList;
            }
            for (Object obj : evaluateXPath) {
                arrayList.add(wrap(obj, htmlCleaner));
            }
            return arrayList;
        } catch (Exception e) {
            throw new ExtractException(e);
        }
    }

    private HtmlCleaner getHtmlCleaner() {
        HtmlCleaner htmlCleaner = new HtmlCleaner();
        htmlCleaner.getProperties().setUseCdataForScriptAndStyle(false);
        htmlCleaner.getProperties().setPruneTags("script,style");
        htmlCleaner.getProperties().setTreatUnknownTagsAsContent(true);
        htmlCleaner.getProperties().setOmitUnknownTags(true);
        return htmlCleaner;
    }

    private String wrap(Object obj, HtmlCleaner htmlCleaner) {
        if (obj == null) {
            return "";
        }
        if (!(obj instanceof TagNode)) {
            return TypeUtils.castToString(obj);
        }
        CleanerProperties properties = htmlCleaner.getProperties();
        properties.setOmitXmlDeclaration(true);
        return new PrettyHtmlSerializer(properties).getAsString((TagNode) obj);
    }
}
