package com.gengoai.hermes.annotator;

import com.gengoai.Language;
import com.gengoai.Validation;
import com.gengoai.collection.Sets;
import com.gengoai.collection.counter.Counter;
import com.gengoai.collection.counter.Counters;
import com.gengoai.collection.multimap.HashSetMultimap;
import com.gengoai.collection.multimap.SetMultimap;
import com.gengoai.conversion.Cast;
import com.gengoai.hermes.AnnotatableType;
import com.gengoai.hermes.Annotation;
import com.gengoai.hermes.AnnotationType;
import com.gengoai.hermes.AttributeType;
import com.gengoai.hermes.Document;
import com.gengoai.hermes.Fragments;
import com.gengoai.hermes.HString;
import com.gengoai.hermes.Types;
import com.gengoai.hermes.lexicon.Lexicon;
import com.gengoai.hermes.lexicon.LexiconEntry;
import com.gengoai.hermes.lexicon.LexiconManager;
import com.gengoai.hermes.lexicon.LexiconMatch;
import com.gengoai.string.Strings;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import lombok.NonNull;

/* loaded from: input_file:com/gengoai/hermes/annotator/FuzzyLexiconAnnotator.class */
public class FuzzyLexiconAnnotator extends ViterbiAnnotator {
    private static final long serialVersionUID = 1;
    private final Lexicon lexicon;
    private final int maxDistance;
    private final SetMultimap<String, String[]> prefix;
    private final SetMultimap<String, String[]> suffix;
    private final AnnotationType type;
    private final AttributeType<?> attributeType;

    public FuzzyLexiconAnnotator(@NonNull AnnotationType annotationType, @NonNull AttributeType<?> attributeType, @NonNull Lexicon lexicon, @NonNull Language language, int i) {
        super(lexicon.getMaxTokenLength() + i);
        this.prefix = new HashSetMultimap();
        this.suffix = new HashSetMultimap();
        if (annotationType == null) {
            throw new NullPointerException("annotationType is marked non-null but is null");
        }
        if (attributeType == null) {
            throw new NullPointerException("attributeType is marked non-null but is null");
        }
        if (lexicon == null) {
            throw new NullPointerException("lexicon is marked non-null but is null");
        }
        if (language == null) {
            throw new NullPointerException("lexiconLanguage is marked non-null but is null");
        }
        Validation.checkArgument(i >= 0, "Maximum fuzzy distance must be > 0");
        this.attributeType = attributeType;
        this.type = annotationType;
        this.lexicon = lexicon;
        this.maxDistance = i;
        Iterator it = this.lexicon.iterator();
        while (it.hasNext()) {
            String str = (String) it.next();
            String[] split = language.usesWhitespace() ? str.split("\\s+") : str.split("");
            if (split.length > 1) {
                this.prefix.put(split[0], split);
                this.suffix.put(split[split.length - 1], split);
            }
        }
    }

    public FuzzyLexiconAnnotator(@NonNull AnnotationType annotationType, @NonNull AttributeType<?> attributeType, @NonNull String str, @NonNull Language language, int i) {
        this(annotationType, attributeType, LexiconManager.getLexicon(str), language, i);
        if (annotationType == null) {
            throw new NullPointerException("annotationType is marked non-null but is null");
        }
        if (attributeType == null) {
            throw new NullPointerException("attributeType is marked non-null but is null");
        }
        if (str == null) {
            throw new NullPointerException("lexiconName is marked non-null but is null");
        }
        if (language == null) {
            throw new NullPointerException("lexiconLanguage is marked non-null but is null");
        }
    }

    @Override // com.gengoai.hermes.annotator.ViterbiAnnotator
    protected void createAndAttachAnnotation(Document document, LexiconMatch lexiconMatch) {
        if (Strings.isNullOrBlank(lexiconMatch.getMatchedString())) {
            return;
        }
        Annotation createAttached = document.annotationBuilder(this.type).bounds(lexiconMatch.getSpan()).createAttached();
        if (Strings.isNotNullOrBlank(lexiconMatch.getTag())) {
            createAttached.put(this.attributeType, Cast.as(this.attributeType.decode(lexiconMatch.getTag())));
        }
        createAttached.put(Types.CONFIDENCE, Double.valueOf(lexiconMatch.getScore()));
        createAttached.put(Types.MATCHED_STRING, lexiconMatch.getMatchedString());
    }

    private double distance(List<Annotation> list, String[] strArr) {
        Counter newCounter = Counters.newCounter(Arrays.asList(strArr));
        for (Annotation annotation : list) {
            if (newCounter.contains(annotation.toString())) {
                newCounter.decrement(annotation.toString());
            } else if (!this.lexicon.isCaseSensitive() && newCounter.contains(annotation.toString().toLowerCase())) {
                newCounter.decrement(annotation.toString().toLowerCase());
            } else if (newCounter.contains(annotation.getLemma())) {
                newCounter.decrement(annotation.getLemma());
            } else if (!this.lexicon.isCaseSensitive() && newCounter.contains(annotation.getLemma().toLowerCase())) {
                newCounter.decrement(annotation.getLemma().toLowerCase());
            }
        }
        if (newCounter.sum() > 0.0d) {
            return Double.POSITIVE_INFINITY;
        }
        double[] dArr = new double[strArr.length + 1];
        double[] dArr2 = new double[strArr.length + 1];
        for (int i = 0; i < dArr.length; i++) {
            dArr[i] = i;
        }
        for (int i2 = 0; i2 < list.size(); i2++) {
            dArr2[0] = i2 + 1;
            for (int i3 = 0; i3 < strArr.length; i3++) {
                double d = (Strings.safeEquals(strArr[i3], list.get(i2).toString(), this.lexicon.isCaseSensitive()) || Strings.safeEquals(strArr[i3], list.get(i2).getLemma(), this.lexicon.isCaseSensitive())) ? 0.0d : 1.0d;
                if (d == 1.0d && Strings.isPunctuation(list.get(i3).toString())) {
                    d = dArr.length;
                }
                dArr2[i3 + 1] = Math.min(dArr2[i3] + d, Math.min(dArr[i3 + 1] + d, dArr[i3] + d));
            }
            if (dArr2[strArr.length] > this.maxDistance) {
                return Double.POSITIVE_INFINITY;
            }
            System.arraycopy(dArr2, 0, dArr, 0, dArr.length);
        }
        return dArr[strArr.length];
    }

    private Set<String[]> getCandidates(String str, String str2) {
        return Sets.intersection(this.prefix.get(str), this.suffix.get(str2));
    }

    @Override // com.gengoai.hermes.annotator.Annotator
    public String getProvider(Language language) {
        return "FuzzyLexicon(lexicon='" + this.lexicon.getName() + "', maxDistance=" + this.maxDistance + ")";
    }

    @Override // com.gengoai.hermes.annotator.Annotator
    public Set<AnnotatableType> satisfies() {
        return Collections.singleton(this.type);
    }

    @Override // com.gengoai.hermes.annotator.ViterbiAnnotator
    protected LexiconEntry scoreSpan(HString hString) {
        LexiconEntry orElse = this.lexicon.match(hString).stream().findFirst().orElse(null);
        if (orElse != null) {
            return orElse;
        }
        if (hString.tokenLength() > 2) {
            List<Annotation> list = hString.tokens();
            int size = list.size() - 1;
            String[] strArr = null;
            double d = Double.POSITIVE_INFINITY;
            for (String[] strArr2 : this.lexicon.isCaseSensitive() ? Sets.union(getCandidates(list.get(0).toString(), list.get(size).toString()), getCandidates(list.get(0).getLemma(), list.get(size).getLemma())) : Sets.union(getCandidates(list.get(0).toString().toLowerCase(), list.get(size).toString().toLowerCase()), getCandidates(list.get(0).getLemma().toLowerCase(), list.get(size).getLemma().toLowerCase()))) {
                if (strArr2.length < list.size()) {
                    double distance = distance(list, strArr2);
                    if (distance < d) {
                        d = distance;
                        strArr = strArr2;
                    }
                }
            }
            if (d <= this.maxDistance && strArr != null) {
                String join = Strings.join(strArr, hString.getLanguage().usesWhitespace() ? " " : "");
                return LexiconEntry.of(join, this.lexicon.getProbability(Fragments.stringWrapper(join)) / (0.1d + d), this.lexicon.getTag(join).orElse(null), strArr.length);
            }
        }
        return LexiconEntry.empty();
    }
}
