package net.oschina.htmlsucker;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/* loaded from: input_file:net/oschina/htmlsucker/MaxTextContentExtractor.class */
public class MaxTextContentExtractor implements ContentExtractor {
    private static final Collection<String> TEXT_TAGS = Arrays.asList("p", "b", "i", "u", "strong", "em", "span", "a", "pre", "code", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote", "img", "hr", "br", "ul", "ol", "li", "embed", "table");

    @Override // net.oschina.htmlsucker.ContentExtractor
    public String content(Element element) {
        element.select("script").remove();
        element.select("style").remove();
        return findTextNode(element).stream().max(Comparator.comparingInt(elements -> {
            return elements.text().length();
        })).get().outerHtml();
    }

    private static List<Elements> findTextNode(Element element) {
        ArrayList arrayList = new ArrayList();
        Elements elements = new Elements();
        Iterator it = element.children().iterator();
        while (it.hasNext()) {
            Element element2 = (Element) it.next();
            if (TEXT_TAGS.contains(element2.nodeName().toLowerCase())) {
                elements.add(element2);
            } else {
                arrayList.addAll(findTextNode(element2));
            }
        }
        arrayList.add(elements);
        return arrayList;
    }
}
