package co.mailtarget.durian.extractor;

import co.mailtarget.durian.content.AddSiblings;
import co.mailtarget.durian.content.ScoreInfo;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Set;
import java.util.regex.Pattern;
import kotlin.Metadata;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.math.MathKt;
import kotlin.text.StringsKt;
import org.jetbrains.annotations.NotNull;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/* compiled from: ContentExtractor.kt */
@Metadata(mv = {1, 1, 16}, bv = {1, 0, 3}, k = 1, d1 = {"��H\n\u0002\u0018\u0002\n\u0002\u0010��\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0010\b\n��\n\u0002\u0018\u0002\n��\n\u0002\u0010\u000e\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0010\u0006\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\b\u0006\n\u0002\u0010\u001e\n\u0002\b\u0004\bÆ\u0002\u0018��2\u00020\u0001:\u0001#B\u0007\b\u0002¢\u0006\u0002\u0010\u0002J\u0018\u0010\b\u001a\u00020\t2\u0006\u0010\n\u001a\u00020\u000b2\u0006\u0010\f\u001a\u00020\rH\u0002J\u0012\u0010\u000e\u001a\u0004\u0018\u00010\u000b2\u0006\u0010\u000f\u001a\u00020\u0010H\u0002J\u0012\u0010\u0011\u001a\u0004\u0018\u00010\u000b2\u0006\u0010\u000f\u001a\u00020\u0010H\u0002J\u0012\u0010\u0012\u001a\u0004\u0018\u00010\u000b2\u0006\u0010\u000f\u001a\u00020\u0010H\u0002J\u0010\u0010\u0013\u001a\u00020\u00142\u0006\u0010\u0015\u001a\u00020\u000bH\u0002J\u0012\u0010\u0016\u001a\u0004\u0018\u00010\u000b2\u0006\u0010\u000f\u001a\u00020\u0010H\u0002J\u000e\u0010\u0017\u001a\u00020\u000b2\u0006\u0010\u000f\u001a\u00020\u0010J\u0018\u0010\u0018\u001a\u00020\u00192\u0006\u0010\u001a\u001a\u00020\u000b2\u0006\u0010\u000f\u001a\u00020\u0010H\u0002J\u0010\u0010\u001b\u001a\u00020\u00142\u0006\u0010\u001c\u001a\u00020\u000bH\u0002J\u0012\u0010\u001d\u001a\u0004\u0018\u00010\u000b2\u0006\u0010\u001e\u001a\u00020\u0019H\u0002J\u0016\u0010\u001f\u001a\b\u0012\u0004\u0012\u00020\u000b0 2\u0006\u0010!\u001a\u00020\u0010H\u0002J\u0010\u0010\"\u001a\u00020\t2\u0006\u0010\u0015\u001a\u00020\u000bH\u0002R\u0016\u0010\u0003\u001a\n \u0005*\u0004\u0018\u00010\u00040\u0004X\u0082\u0004¢\u0006\u0002\n��R\u0016\u0010\u0006\u001a\n \u0005*\u0004\u0018\u00010\u00040\u0004X\u0082\u0004¢\u0006\u0002\n��R\u0016\u0010\u0007\u001a\n \u0005*\u0004\u0018\u00010\u00040\u0004X\u0082\u0004¢\u0006\u0002\n��¨\u0006$"}, d2 = {"Lco/mailtarget/durian/extractor/ContentExtractor;", "", "()V", "NEGATIVE", "Ljava/util/regex/Pattern;", "kotlin.jvm.PlatformType", "POSITIVE", "UNLIKELY", "calcWeightForChild", "", "child", "Lorg/jsoup/nodes/Element;", "ownText", "", "fetchArticleContent", "document", "Lorg/jsoup/nodes/Document;", "getArticleById", "getArticleByTag", "getClassWeight", "", "e", "getContentByTitle", "getContentElement", "getContentTargets", "Lorg/jsoup/select/Elements;", "topElement", "getElementScore", "element", "getMatchesContentByHrOrP", "targets", "getNodesToCheck", "", "doc", "weightChildNodes", "ScoreTags", "durian"})
/* loaded from: input_file:co/mailtarget/durian/extractor/ContentExtractor.class */
public final class ContentExtractor {
    public static final ContentExtractor INSTANCE = new ContentExtractor();
    private static final Pattern UNLIKELY = Pattern.compile("^(com(bx|ment|munity)|dis(qus|cuss)|e(xtra|[-]?mail)|foot|header|menu|re(mark|ply)|rss|sh(are|outbox)|sponsora(d|ll|gegate|rchive|ttachment)|(pag(er|ination))|popup|print|login|si(debar|gn|ngle))");
    private static final Pattern POSITIVE = Pattern.compile("(^(body|content|h?entry|main|page|post|text|blog|story|haupt))|arti(cle|kel)|instapaper_body");
    private static final Pattern NEGATIVE = Pattern.compile("nav($|igation)|user|com(ment|bx)|(^com-)|contact|foot|masthead|^(me(dia|ta))$|outbrain|promo|related|scroll|(sho(utbox|pping))|sidebar|sponsor|tags|tool|widget");

    /* compiled from: ContentExtractor.kt */
    @Metadata(mv = {1, 1, 16}, bv = {1, 0, 3}, k = 1, d1 = {"��\f\n\u0002\u0018\u0002\n\u0002\u0010\u0010\n\u0002\b\u0017\b\u0086\u0001\u0018�� \u00172\b\u0012\u0004\u0012\u00020��0\u0001:\u0001\u0017B\u0007\b\u0002¢\u0006\u0002\u0010\u0002j\u0002\b\u0003j\u0002\b\u0004j\u0002\b\u0005j\u0002\b\u0006j\u0002\b\u0007j\u0002\b\bj\u0002\b\tj\u0002\b\nj\u0002\b\u000bj\u0002\b\fj\u0002\b\rj\u0002\b\u000ej\u0002\b\u000fj\u0002\b\u0010j\u0002\b\u0011j\u0002\b\u0012j\u0002\b\u0013j\u0002\b\u0014j\u0002\b\u0015j\u0002\b\u0016¨\u0006\u0018"}, d2 = {"Lco/mailtarget/durian/extractor/ContentExtractor$ScoreTags;", "", "(Ljava/lang/String;I)V", "div", "pre", "td", "blockquote", "address", "ol", "ul", "dl", "dd", "dt", "li", "form", "h1", "h2", "h3", "h4", "h5", "h6", "th", "UNKNOWN", "Companion", "durian"})
    /* loaded from: input_file:co/mailtarget/durian/extractor/ContentExtractor$ScoreTags.class */
    public enum ScoreTags {
        div,
        pre,
        td,
        blockquote,
        address,
        ol,
        ul,
        dl,
        dd,
        dt,
        li,
        form,
        h1,
        h2,
        h3,
        h4,
        h5,
        h6,
        th,
        UNKNOWN;

        public static final Companion Companion = new Companion(null);

        /* compiled from: ContentExtractor.kt */
        @Metadata(mv = {1, 1, 16}, bv = {1, 0, 3}, k = 1, d1 = {"��\u0018\n\u0002\u0018\u0002\n\u0002\u0010��\n\u0002\b\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0010\u000e\n��\b\u0086\u0003\u0018��2\u00020\u0001B\u0007\b\u0002¢\u0006\u0002\u0010\u0002J\u000e\u0010\u0003\u001a\u00020\u00042\u0006\u0010\u0005\u001a\u00020\u0006¨\u0006\u0007"}, d2 = {"Lco/mailtarget/durian/extractor/ContentExtractor$ScoreTags$Companion;", "", "()V", "getTagName", "Lco/mailtarget/durian/extractor/ContentExtractor$ScoreTags;", "tag", "", "durian"})
        /* loaded from: input_file:co/mailtarget/durian/extractor/ContentExtractor$ScoreTags$Companion.class */
        public static final class Companion {
            @NotNull
            public final ScoreTags getTagName(@NotNull String str) {
                ScoreTags scoreTags;
                Intrinsics.checkParameterIsNotNull(str, "tag");
                try {
                    scoreTags = ScoreTags.valueOf(str);
                } catch (Exception e) {
                    scoreTags = ScoreTags.UNKNOWN;
                }
                return scoreTags;
            }

            private Companion() {
            }

            public /* synthetic */ Companion(DefaultConstructorMarker defaultConstructorMarker) {
                this();
            }
        }
    }

    @NotNull
    public final Element getContentElement(@NotNull Document document) {
        Intrinsics.checkParameterIsNotNull(document, "document");
        Element contentByTitle = getContentByTitle(document);
        if (contentByTitle == null) {
            contentByTitle = getArticleByTag(document);
        }
        if (contentByTitle == null) {
            contentByTitle = getArticleById(document);
        }
        if (contentByTitle == null) {
            contentByTitle = fetchArticleContent(document);
        }
        Element element = contentByTitle;
        if (element != null) {
            return element;
        }
        Element body = document.body();
        Intrinsics.checkExpressionValueIsNotNull(body, "document.body()");
        return body;
    }

    private final Element getContentByTitle(Document document) {
        Elements elementsByTag = document.getElementsByTag("h1");
        if (elementsByTag.size() == 0) {
            elementsByTag = document.getElementsByTag("h2");
            if (elementsByTag.size() == 0) {
                elementsByTag = document.getElementsByTag("h3");
            }
        }
        Element element = (Element) null;
        if (elementsByTag.size() > 0) {
            Element first = elementsByTag.first();
            Intrinsics.checkExpressionValueIsNotNull(first, "titleCandicates.first()");
            element = getMatchesContentByHrOrP(getContentTargets(first, document));
        }
        return element;
    }

    private final Element getArticleByTag(Document document) {
        Elements elementsByTag = document.getElementsByTag("article");
        if (elementsByTag != null) {
            return elementsByTag.first();
        }
        return null;
    }

    private final Element getArticleById(Document document) {
        return document.getElementById("articleDescription");
    }

    private final Elements getContentTargets(Element element, Document document) {
        Elements select = element.siblingElements().select("p, br + br");
        if (select.size() > 3 || element.parent() == null) {
            Intrinsics.checkExpressionValueIsNotNull(select, "targets");
            return select;
        }
        Element parent = element.parent();
        Intrinsics.checkExpressionValueIsNotNull(parent, "topElement.parent()");
        return getContentTargets(parent, document);
    }

    private final Element getMatchesContentByHrOrP(Elements elements) {
        Iterator it = elements.iterator();
        while (it.hasNext()) {
            Element element = (Element) it.next();
            if (element.parent().select("p, br+br").size() > 3) {
                return element.parent();
            }
        }
        return null;
    }

    private final Element fetchArticleContent(Document document) {
        Element element = (Element) null;
        HashSet hashSet = new HashSet();
        for (Element element2 : getNodesToCheck(document)) {
            System.out.println((Object) ("nodetocheck " + element2.text()));
            if (element2.text().length() >= 25) {
                System.out.println((Object) ("content " + element2.text()));
                double elementScore = getElementScore(element2);
                ScoreInfo scoreInfo = ScoreInfo.INSTANCE;
                Element parent = element2.parent();
                Intrinsics.checkExpressionValueIsNotNull(parent, "element.parent()");
                scoreInfo.updateContentScore(parent, elementScore);
                ScoreInfo scoreInfo2 = ScoreInfo.INSTANCE;
                Element parent2 = element2.parent().parent();
                Intrinsics.checkExpressionValueIsNotNull(parent2, "element.parent().parent()");
                scoreInfo2.updateContentScore(parent2, elementScore / 2);
                if (!hashSet.contains(element2.parent())) {
                    hashSet.add(element2.parent());
                }
                if (!hashSet.contains(element2.parent().parent())) {
                    hashSet.add(element2.parent().parent());
                }
            }
        }
        double d = 0.0d;
        Iterator it = hashSet.iterator();
        while (it.hasNext()) {
            Element element3 = (Element) it.next();
            double contentScore = ScoreInfo.INSTANCE.getContentScore(element3);
            if (contentScore > d) {
                element = element3;
                d = contentScore;
            }
            if (element == null) {
                element = element3;
            }
        }
        if (element != null) {
            element = AddSiblings.INSTANCE.addSiblings(element);
        }
        return element;
    }

    private final double getElementScore(Element element) {
        double d;
        ScoreTags.Companion companion = ScoreTags.Companion;
        Intrinsics.checkExpressionValueIsNotNull(element.tagName(), "element.tagName()");
        switch (companion.getTagName(r1)) {
            case div:
                d = 0.0d + 5.0d;
                break;
            case pre:
            case td:
            case blockquote:
                d = 0.0d + 3.0d;
                break;
            case address:
            case ol:
            case ul:
            case dl:
            case dd:
            case dt:
            case li:
            case form:
                d = 0.0d - 3.0d;
                break;
            case h1:
            case h2:
            case h3:
            case h4:
            case h5:
            case h6:
            case th:
                d = 0.0d - 5.0d;
                break;
            default:
                d = 0.0d + 0;
                break;
        }
        return d + getClassWeight(element);
    }

    private final double getClassWeight(Element element) {
        double d = 0.0d;
        if (POSITIVE.matcher(element.className()).find()) {
            d = 0.0d + 35.0d;
        }
        if (POSITIVE.matcher(element.id()).find()) {
            d += 40.0d;
        }
        if (UNLIKELY.matcher(element.className()).find()) {
            d -= 20.0d;
        }
        if (UNLIKELY.matcher(element.id()).find()) {
            d -= 20.0d;
        }
        if (NEGATIVE.matcher(element.className()).find()) {
            d -= 50.0d;
        }
        if (NEGATIVE.matcher(element.id()).find()) {
            d -= 50.0d;
        }
        return d + MathKt.roundToInt((element.ownText().length() / 100.0d) * 10) + weightChildNodes(element);
    }

    /* JADX WARN: Code restructure failed: missing block: B:13:0x009e, code lost:
    
        if (kotlin.text.StringsKt.contains$default(r0, "caption", false, 2, (java.lang.Object) null) != false) goto L15;
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    private final int weightChildNodes(org.jsoup.nodes.Element r8) {
        /*
            Method dump skipped, instructions count: 491
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: co.mailtarget.durian.extractor.ContentExtractor.weightChildNodes(org.jsoup.nodes.Element):int");
    }

    private final int calcWeightForChild(Element element, String str) {
        int roundToInt = MathKt.roundToInt(str.length() / 25.0d);
        ScoreInfo.INSTANCE.updateContentScore(element, roundToInt);
        return roundToInt;
    }

    private final Collection<Element> getNodesToCheck(Document document) {
        LinkedHashMap linkedHashMap = new LinkedHashMap(64);
        Iterator it = document.select("body").select("*").iterator();
        while (it.hasNext()) {
            Element element = (Element) it.next();
            String tagName = element.tagName();
            Intrinsics.checkExpressionValueIsNotNull(tagName, "element.tagName()");
            if (StringsKt.contains$default("p;td;h1;h2;pre", tagName, false, 2, (Object) null)) {
                Intrinsics.checkExpressionValueIsNotNull(element, "element");
                linkedHashMap.put(element, null);
            }
        }
        Set keySet = linkedHashMap.keySet();
        Intrinsics.checkExpressionValueIsNotNull(keySet, "nodesToCheck.keys");
        return keySet;
    }

    private ContentExtractor() {
    }
}
