package co.mailtarget.durian;

import co.mailtarget.durian.DocumentCleaner;
import co.mailtarget.durian.extractor.ContentExtractor;
import co.mailtarget.durian.extractor.DateExtractor;
import co.mailtarget.durian.extractor.FaviconExtractor;
import co.mailtarget.durian.extractor.ImageExtractor;
import co.mailtarget.durian.extractor.KeywordExtractor;
import co.mailtarget.durian.extractor.SnippetExtractor;
import co.mailtarget.durian.extractor.TitleExtractor;
import java.net.URL;
import java.util.ArrayList;
import kotlin.Metadata;
import kotlin.jvm.internal.Intrinsics;
import org.jetbrains.annotations.NotNull;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

/* compiled from: WebExtractor.kt */
@Metadata(mv = {1, 1, 16}, bv = {1, 0, 3}, k = 1, d1 = {"��@\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0005\n\u0002\u0010\u000b\n\u0002\b\u0005\n\u0002\u0018\u0002\n\u0002\b\u0005\n\u0002\u0018\u0002\n��\n\u0002\u0010\u000e\n\u0002\b\u0003\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0005\u0018��2\u00020\u0001:\u0002!\"B\u0005¢\u0006\u0002\u0010\u0002J\u0016\u0010\u0015\u001a\u00020\u00162\u0006\u0010\u0017\u001a\u00020\u00182\u0006\u0010\u0019\u001a\u00020\u0018J\u0018\u0010\u001a\u001a\u00020\u00162\u0006\u0010\u0017\u001a\u00020\u00182\u0006\u0010\u001b\u001a\u00020\u001cH\u0002J\u0010\u0010\u001d\u001a\u00020\u001e2\u0006\u0010\u001b\u001a\u00020\u001cH\u0002J\u0018\u0010\u001f\u001a\u00020\u00162\u0006\u0010\u0017\u001a\u00020\u00182\u0006\u0010\u001b\u001a\u00020\u001cH\u0002J\u0018\u0010 \u001a\u00020\u00162\u0006\u0010\u0017\u001a\u00020\u00182\u0006\u0010\u001b\u001a\u00020\u001cH\u0002R\u001a\u0010\u0003\u001a\u00020\u0004X\u0086\u000e¢\u0006\u000e\n��\u001a\u0004\b\u0005\u0010\u0006\"\u0004\b\u0007\u0010\bR\u001a\u0010\t\u001a\u00020\nX\u0086\u000e¢\u0006\u000e\n��\u001a\u0004\b\u000b\u0010\f\"\u0004\b\r\u0010\u000eR\u001a\u0010\u000f\u001a\u00020\u0010X\u0086\u000e¢\u0006\u000e\n��\u001a\u0004\b\u0011\u0010\u0012\"\u0004\b\u0013\u0010\u0014¨\u0006#"}, d2 = {"Lco/mailtarget/durian/WebExtractor;", "Lco/mailtarget/durian/Connection;", "()V", "cleaner", "Lco/mailtarget/durian/DocumentCleaner;", "getCleaner", "()Lco/mailtarget/durian/DocumentCleaner;", "setCleaner", "(Lco/mailtarget/durian/DocumentCleaner;)V", "logging", "", "getLogging", "()Z", "setLogging", "(Z)V", "strategy", "Lco/mailtarget/durian/WebExtractor$Strategy;", "getStrategy", "()Lco/mailtarget/durian/WebExtractor$Strategy;", "setStrategy", "(Lco/mailtarget/durian/WebExtractor$Strategy;)V", "extract", "Lco/mailtarget/durian/WebPage;", "url", "", "html", "extractContent", "document", "Lorg/jsoup/nodes/Document;", "extractContentElement", "Lorg/jsoup/nodes/Element;", "extractHybrid", "extractMeta", "Builder", "Strategy", "durian"})
/* loaded from: input_file:co/mailtarget/durian/WebExtractor.class */
public final class WebExtractor extends Connection {

    @NotNull
    private DocumentCleaner cleaner = new DocumentCleaner();

    @NotNull
    private Strategy strategy = Strategy.META;
    private boolean logging = true;

    /* compiled from: WebExtractor.kt */
    @Metadata(mv = {1, 1, 16}, bv = {1, 0, 3}, k = 1, d1 = {"��.\n\u0002\u0018\u0002\n\u0002\u0010��\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0010\u000b\n��\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0002\bÆ\u0002\u0018��2\u00020\u0001B\u0007\b\u0002¢\u0006\u0002\u0010\u0002J\u0006\u0010\u000b\u001a\u00020\fJ\u001e\u0010\u0003\u001a\u00020��2\u0016\u0010\r\u001a\u0012\u0012\u0004\u0012\u00020\u00050\u0004j\b\u0012\u0004\u0012\u00020\u0005`\u0006J\u000e\u0010\u0007\u001a\u00020��2\u0006\u0010\u0007\u001a\u00020\bJ\u000e\u0010\t\u001a\u00020��2\u0006\u0010\t\u001a\u00020\nR\u001e\u0010\u0003\u001a\u0012\u0012\u0004\u0012\u00020\u00050\u0004j\b\u0012\u0004\u0012\u00020\u0005`\u0006X\u0082\u000e¢\u0006\u0002\n��R\u000e\u0010\u0007\u001a\u00020\bX\u0082\u000e¢\u0006\u0002\n��R\u000e\u0010\t\u001a\u00020\nX\u0082\u000e¢\u0006\u0002\n��¨\u0006\u000e"}, d2 = {"Lco/mailtarget/durian/WebExtractor$Builder;", "", "()V", "cleanerOptions", "Ljava/util/ArrayList;", "Lco/mailtarget/durian/DocumentCleaner$Options;", "Lkotlin/collections/ArrayList;", "logging", "", "strategy", "Lco/mailtarget/durian/WebExtractor$Strategy;", "build", "Lco/mailtarget/durian/WebExtractor;", "options", "durian"})
    /* loaded from: input_file:co/mailtarget/durian/WebExtractor$Builder.class */
    public static final class Builder {
        public static final Builder INSTANCE = new Builder();
        private static Strategy strategy = Strategy.META;
        private static ArrayList<DocumentCleaner.Options> cleanerOptions = new ArrayList<>();
        private static boolean logging = true;

        @NotNull
        public final Builder strategy(@NotNull Strategy strategy2) {
            Intrinsics.checkParameterIsNotNull(strategy2, "strategy");
            strategy = strategy2;
            return this;
        }

        @NotNull
        public final Builder cleanerOptions(@NotNull ArrayList<DocumentCleaner.Options> arrayList) {
            Intrinsics.checkParameterIsNotNull(arrayList, "options");
            cleanerOptions = arrayList;
            return this;
        }

        @NotNull
        public final Builder logging(boolean z) {
            logging = z;
            return this;
        }

        @NotNull
        public final WebExtractor build() {
            WebExtractor webExtractor = new WebExtractor();
            webExtractor.setStrategy(strategy);
            webExtractor.getCleaner().getOptions().addAll(cleanerOptions);
            webExtractor.setLogging(logging);
            return webExtractor;
        }

        private Builder() {
        }
    }

    /* compiled from: WebExtractor.kt */
    @Metadata(mv = {1, 1, 16}, bv = {1, 0, 3}, k = 1, d1 = {"��\f\n\u0002\u0018\u0002\n\u0002\u0010\u0010\n\u0002\b\u0005\b\u0086\u0001\u0018��2\b\u0012\u0004\u0012\u00020��0\u0001B\u0007\b\u0002¢\u0006\u0002\u0010\u0002j\u0002\b\u0003j\u0002\b\u0004j\u0002\b\u0005¨\u0006\u0006"}, d2 = {"Lco/mailtarget/durian/WebExtractor$Strategy;", "", "(Ljava/lang/String;I)V", "META", "CONTENT", "HYBRID", "durian"})
    /* loaded from: input_file:co/mailtarget/durian/WebExtractor$Strategy.class */
    public enum Strategy {
        META,
        CONTENT,
        HYBRID
    }

    @NotNull
    public final DocumentCleaner getCleaner() {
        return this.cleaner;
    }

    public final void setCleaner(@NotNull DocumentCleaner documentCleaner) {
        Intrinsics.checkParameterIsNotNull(documentCleaner, "<set-?>");
        this.cleaner = documentCleaner;
    }

    @NotNull
    public final Strategy getStrategy() {
        return this.strategy;
    }

    public final void setStrategy(@NotNull Strategy strategy) {
        Intrinsics.checkParameterIsNotNull(strategy, "<set-?>");
        this.strategy = strategy;
    }

    public final boolean getLogging() {
        return this.logging;
    }

    public final void setLogging(boolean z) {
        this.logging = z;
    }

    @NotNull
    public final WebPage extract(@NotNull String str, @NotNull String str2) {
        Intrinsics.checkParameterIsNotNull(str, "url");
        Intrinsics.checkParameterIsNotNull(str2, "html");
        Document document = str2.length() > 0 ? getDocument(str, str2) : Connection.getDocument$default(this, str, 0, 2, null);
        switch (this.strategy) {
            case CONTENT:
                return extractContent(str, document);
            case HYBRID:
                return extractHybrid(str, document);
            default:
                return extractMeta(str, document);
        }
    }

    private final WebPage extractContent(String str, Document document) {
        Document clean = this.cleaner.clean(document);
        Element extractContentElement = extractContentElement(clean);
        TitleExtractor titleExtractor = TitleExtractor.INSTANCE;
        Element body = document.body();
        Intrinsics.checkExpressionValueIsNotNull(body, "document.body()");
        WebPage webPage = new WebPage(str, titleExtractor.getTitleFromContent(clean, body));
        webPage.setFavicon(FaviconExtractor.INSTANCE.getFavicon(document));
        webPage.setImage(ImageExtractor.INSTANCE.getImageFromContent(document, new URL(str), webPage.getTitle(), extractContentElement));
        webPage.setDescription(SnippetExtractor.INSTANCE.getDescriptionFromContent(document, extractContentElement));
        webPage.setPublishedDate(DateExtractor.getDate$default(DateExtractor.INSTANCE, document, null, 2, null));
        webPage.setKeywords(KeywordExtractor.INSTANCE.getKeywords(document));
        webPage.setContent(extractContentElement);
        return webPage;
    }

    private final WebPage extractHybrid(String str, Document document) {
        Document clean = this.cleaner.clean(document);
        Element extractContentElement = extractContentElement(clean);
        TitleExtractor titleExtractor = TitleExtractor.INSTANCE;
        Element body = document.body();
        Intrinsics.checkExpressionValueIsNotNull(body, "document.body()");
        WebPage webPage = new WebPage(str, titleExtractor.getTitle(clean, body));
        webPage.setFavicon(FaviconExtractor.INSTANCE.getFavicon(document));
        webPage.setImage(ImageExtractor.INSTANCE.getImage(document, new URL(str), webPage.getTitle(), extractContentElement));
        webPage.setDescription(SnippetExtractor.INSTANCE.getDescription(document, extractContentElement));
        webPage.setPublishedDate(DateExtractor.INSTANCE.getDate(document, extractContentElement));
        webPage.setKeywords(KeywordExtractor.INSTANCE.getKeywords(document));
        webPage.setContent(extractContentElement);
        return webPage;
    }

    private final WebPage extractMeta(String str, Document document) {
        WebPage webPage = new WebPage(str, TitleExtractor.INSTANCE.getTitleFromMeta(document));
        webPage.setFavicon(FaviconExtractor.INSTANCE.getFavicon(document));
        webPage.setImage(ImageExtractor.INSTANCE.getImageFromMeta(document));
        webPage.setDescription(SnippetExtractor.INSTANCE.getDescriptionFromMeta(document));
        webPage.setPublishedDate(DateExtractor.INSTANCE.getDateFromMeta(document));
        webPage.setKeywords(KeywordExtractor.INSTANCE.getKeywordsFromMeta(document));
        webPage.setContent(document.body());
        return webPage;
    }

    private final Element extractContentElement(Document document) {
        return ContentExtractor.INSTANCE.getContentElement(document);
    }
}
