package com.gengoai.hermes.extraction.keyword;

import com.gengoai.Tag;
import com.gengoai.Validation;
import com.gengoai.collection.counter.Counter;
import com.gengoai.collection.counter.Counters;
import com.gengoai.hermes.Annotation;
import com.gengoai.hermes.HString;
import com.gengoai.hermes.Types;
import com.gengoai.hermes.corpus.DocumentCollection;
import com.gengoai.hermes.extraction.Extraction;
import com.gengoai.hermes.extraction.lyre.LyreDSL;
import com.gengoai.hermes.extraction.lyre.LyreExpression;
import com.gengoai.hermes.extraction.lyre.LyreExpressionType;
import com.gengoai.hermes.morphology.StopWords;
import java.util.ArrayList;
import java.util.Iterator;
import lombok.NonNull;

/* loaded from: input_file:com/gengoai/hermes/extraction/keyword/RakeKeywordExtractor.class */
public class RakeKeywordExtractor implements KeywordExtractor {
    private static final long serialVersionUID = 1;
    private final LyreExpression toStringExpression;

    public RakeKeywordExtractor() {
        this(LyreDSL.lower);
    }

    public RakeKeywordExtractor(@NonNull LyreExpression lyreExpression) {
        if (lyreExpression == null) {
            throw new NullPointerException("toStringExpression is marked non-null but is null");
        }
        Validation.checkArgument(lyreExpression.isInstance(new Tag[]{LyreExpressionType.STRING}), "Must give a STRING expression");
        this.toStringExpression = lyreExpression;
    }

    @Override // com.gengoai.hermes.extraction.Extractor
    public Extraction extract(@NonNull HString hString) {
        if (hString == null) {
            throw new NullPointerException("source is marked non-null but is null");
        }
        ArrayList arrayList = new ArrayList();
        hString.document().annotate(Types.SENTENCE);
        StopWords stopWords = StopWords.getStopWords(hString.getLanguage());
        hString.sentenceStream().forEach(annotation -> {
            ArrayList arrayList2 = new ArrayList();
            annotation.tokenStream().forEach(annotation -> {
                if (stopWords.isStopWord(annotation) && !arrayList2.isEmpty()) {
                    arrayList.add(HString.union(arrayList2));
                    arrayList2.clear();
                } else {
                    if (stopWords.isStopWord(annotation)) {
                        return;
                    }
                    arrayList2.add(annotation);
                }
            });
            if (arrayList2.size() > 0) {
                arrayList.add(HString.union(arrayList2));
            }
        });
        Counter newCounter = Counters.newCounter(new String[0]);
        Counter newCounter2 = Counters.newCounter(new String[0]);
        arrayList.forEach(hString2 -> {
            hString2.tokenStream().forEach(annotation2 -> {
                newCounter.increment(this.toStringExpression.apply((HString) annotation2));
                newCounter2.increment(this.toStringExpression.apply((HString) annotation2), hString2.tokenLength() - 1);
            });
        });
        Counter newCounter3 = Counters.newCounter(new String[0]);
        newCounter.forEach((str, d) -> {
            newCounter3.increment(str, (newCounter2.get(str) + d.doubleValue()) / d.doubleValue());
        });
        Counter newCounter4 = Counters.newCounter(new String[0]);
        arrayList.forEach(hString3 -> {
            double d2 = 0.0d;
            Iterator<Annotation> it = hString3.tokens().iterator();
            while (it.hasNext()) {
                d2 += newCounter3.get(this.toStringExpression.apply((HString) it.next()));
            }
            newCounter4.increment(this.toStringExpression.apply(hString3), d2);
        });
        return Extraction.fromCounter(newCounter4);
    }

    @Override // com.gengoai.hermes.extraction.keyword.KeywordExtractor
    public void fit(DocumentCollection documentCollection) {
    }
}
