package com.gengoai.hermes.extraction.keyword;

import com.gengoai.collection.counter.Counter;
import com.gengoai.collection.counter.Counters;
import com.gengoai.collection.multimap.HashSetMultimap;
import com.gengoai.hermes.Annotation;
import com.gengoai.hermes.HString;
import com.gengoai.hermes.Types;
import com.gengoai.hermes.corpus.DocumentCollection;
import com.gengoai.hermes.extraction.Extraction;
import com.gengoai.hermes.extraction.NGramExtractor;
import com.gengoai.hermes.morphology.PartOfSpeech;
import com.gengoai.hermes.morphology.StopWords;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import lombok.NonNull;

/* loaded from: input_file:com/gengoai/hermes/extraction/keyword/NPClusteringKeywordExtractor.class */
public class NPClusteringKeywordExtractor implements KeywordExtractor {
    private static final long serialVersionUID = 1;

    @Override // com.gengoai.hermes.extraction.Extractor
    public Extraction extract(@NonNull HString hString) {
        if (hString == null) {
            throw new NullPointerException("source is marked non-null but is null");
        }
        hString.document().annotate(Types.PHRASE_CHUNK, Types.LEMMA);
        Counter newCounter = Counters.newCounter((Iterable) hString.tokenStream().map((v0) -> {
            return v0.getLemma();
        }).collect(Collectors.toList()));
        List list = (List) hString.annotationStream(Types.PHRASE_CHUNK).filter(annotation -> {
            return annotation.pos().isInstance(PartOfSpeech.NOUN);
        }).flatMap(annotation2 -> {
            return annotation2.split(annotation2 -> {
                return annotation2.pos().isInstance(PartOfSpeech.PUNCTUATION);
            }).stream();
        }).map(hString2 -> {
            return hString2.trim(StopWords.isStopWord());
        }).filter(hString3 -> {
            return !hString3.isEmpty();
        }).collect(Collectors.toList());
        Counter newCounter2 = Counters.newCounter((Iterable) list.stream().map((v0) -> {
            return v0.getLemma();
        }).collect(Collectors.toList()));
        Counter newCounter3 = Counters.newCounter(new String[0]);
        list.forEach(hString4 -> {
            String lemma = hString4.getLemma();
            double log = Math.log(hString4.tokenLength() + ((hString4.tokenStream().mapToDouble(annotation3 -> {
                return newCounter.get(annotation3.getLemma());
            }).sum() / hString4.tokenLength()) * newCounter2.get(lemma)));
            if (log > newCounter3.get(lemma)) {
                newCounter3.set(lemma, log);
            }
        });
        HashSetMultimap hashSetMultimap = new HashSetMultimap();
        list.stream().filter(hString5 -> {
            return hString5.tokenLength() == 1;
        }).forEach(hString6 -> {
            hashSetMultimap.put(hString6.getLemma(), hString6.getLemma());
        });
        HashMap hashMap = new HashMap();
        list.stream().filter(hString7 -> {
            return hString7.tokenLength() > 1;
        }).forEach(hString8 -> {
            boolean z = false;
            for (Annotation annotation3 : hString8.tokens()) {
                if (hashSetMultimap.containsKey(annotation3.getLemma())) {
                    z = true;
                    hashSetMultimap.put(annotation3.getLemma(), hString8.getLemma());
                }
            }
            if (z) {
                return;
            }
            hashMap.put(hString8.getLemma(), hString8);
        });
        NGramExtractor build = NGramExtractor.builder(2, 4).build();
        hashMap.forEach((str, hString9) -> {
            Iterator<HString> it = build.extract(hString9).iterator();
            while (it.hasNext()) {
                String lemma = it.next().getLemma();
                boolean z = false;
                for (String str : hashSetMultimap.keySet()) {
                    if (hashSetMultimap.get(str).contains(lemma)) {
                        z = true;
                        hashSetMultimap.put(str, lemma);
                    }
                }
                if (z) {
                    return;
                }
            }
        });
        Counter newCounter4 = Counters.newCounter(new String[0]);
        hashSetMultimap.keySet().forEach(str2 -> {
            Stream stream = hashSetMultimap.get(str2).stream();
            Objects.requireNonNull(newCounter3);
            newCounter4.set(str2, stream.mapToDouble((v1) -> {
                return r1.get(v1);
            }).sum() / hashSetMultimap.get(str2).size());
        });
        Counter newCounter5 = Counters.newCounter(new String[0]);
        newCounter4.forEach((str3, d) -> {
            String str3 = (String) hashSetMultimap.get(str3).stream().max((str4, str5) -> {
                return -Double.compare(newCounter3.get(str4), newCounter3.get(str5));
            }).orElse(str3);
            newCounter5.set(str3, newCounter4.get(str3) + newCounter3.get(str3));
        });
        return Extraction.fromCounter(newCounter5);
    }

    @Override // com.gengoai.hermes.extraction.keyword.KeywordExtractor
    public void fit(DocumentCollection documentCollection) {
    }
}
