package com.gengoai.hermes.en;

import com.gengoai.Tag;
import com.gengoai.collection.Maps;
import com.gengoai.collection.multimap.ArrayListMultimap;
import com.gengoai.collection.multimap.Multimap;
import com.gengoai.collection.tree.Trie;
import com.gengoai.hermes.format.CoNLLFormat;
import com.gengoai.hermes.morphology.Lemmatizer;
import com.gengoai.hermes.morphology.PartOfSpeech;
import com.gengoai.io.CSV;
import com.gengoai.io.CSVReader;
import com.gengoai.io.Resources;
import com.gengoai.stream.Streams;
import com.gengoai.string.Re;
import com.gengoai.string.Strings;
import com.gengoai.tuple.Tuple2;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import lombok.NonNull;

/* loaded from: input_file:com/gengoai/hermes/en/ENLemmatizer.class */
public class ENLemmatizer implements Lemmatizer, Serializable {
    private static final long serialVersionUID = -6093027604295026727L;
    private static final PartOfSpeech[] ALL_PartOfSpeech = {PartOfSpeech.NOUN, PartOfSpeech.VERB, PartOfSpeech.ADJECTIVE, PartOfSpeech.ADVERB};
    private static volatile ENLemmatizer INSTANCE = null;
    private static Pattern WHITESPACE = Pattern.compile(Re.MULTIPLE_WHITESPACE);
    private final Multimap<PartOfSpeech, DetachmentRule> rules = new ArrayListMultimap();
    private final Multimap<Tuple2<PartOfSpeech, String>, String> exceptions = new ArrayListMultimap();
    private final Trie<Set<PartOfSpeech>> lemmas;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/gengoai/hermes/en/ENLemmatizer$DetachmentRule.class */
    public static class DetachmentRule implements Serializable, Function<String, String> {
        private static final long serialVersionUID = 2748362312310767937L;
        public final String ending;
        public final String replacement;

        private DetachmentRule(String str, String str2) {
            this.ending = str;
            this.replacement = str2;
        }

        @Override // java.util.function.Function
        public String apply(String str) {
            if (str == null) {
                return null;
            }
            if (!str.endsWith(this.ending)) {
                return str;
            }
            int length = str.length() - this.ending.length();
            return length == 0 ? this.replacement : str.substring(0, length) + this.replacement;
        }

        public String unapply(String str) {
            if (str == null) {
                return null;
            }
            if (!str.endsWith(this.replacement)) {
                return str;
            }
            int length = str.length() - this.replacement.length();
            return length == 0 ? this.ending : str.substring(0, length) + this.ending;
        }
    }

    private ENLemmatizer() {
        this.rules.put(PartOfSpeech.NOUN, new DetachmentRule("s", ""));
        this.rules.put(PartOfSpeech.NOUN, new DetachmentRule("ses", "s"));
        this.rules.put(PartOfSpeech.NOUN, new DetachmentRule("xes", "x"));
        this.rules.put(PartOfSpeech.NOUN, new DetachmentRule("zes", "z"));
        this.rules.put(PartOfSpeech.NOUN, new DetachmentRule("ies", "y"));
        this.rules.put(PartOfSpeech.NOUN, new DetachmentRule("shes", "sh"));
        this.rules.put(PartOfSpeech.NOUN, new DetachmentRule("ches", "ch"));
        this.rules.put(PartOfSpeech.NOUN, new DetachmentRule("men", "man"));
        loadException(PartOfSpeech.NOUN);
        this.rules.put(PartOfSpeech.VERB, new DetachmentRule("s", ""));
        this.rules.put(PartOfSpeech.VERB, new DetachmentRule("ies", "y"));
        this.rules.put(PartOfSpeech.VERB, new DetachmentRule("es", "s"));
        this.rules.put(PartOfSpeech.VERB, new DetachmentRule("es", ""));
        this.rules.put(PartOfSpeech.VERB, new DetachmentRule("ed", "e"));
        this.rules.put(PartOfSpeech.VERB, new DetachmentRule("ed", ""));
        this.rules.put(PartOfSpeech.VERB, new DetachmentRule("ing", "e"));
        this.rules.put(PartOfSpeech.VERB, new DetachmentRule("ing", ""));
        loadException(PartOfSpeech.VERB);
        this.rules.put(PartOfSpeech.ADJECTIVE, new DetachmentRule("er", ""));
        this.rules.put(PartOfSpeech.ADJECTIVE, new DetachmentRule("est", ""));
        this.rules.put(PartOfSpeech.ADJECTIVE, new DetachmentRule("er", "e"));
        this.rules.put(PartOfSpeech.ADJECTIVE, new DetachmentRule("est", "e"));
        loadException(PartOfSpeech.ADJECTIVE);
        loadException(PartOfSpeech.ADVERB);
        this.lemmas = new Trie<>();
        try {
            CSVReader reader = CSV.builder().delimiter('\t').reader(Resources.fromClasspath("com/gengoai/hermes/en/lemmas.dict.gz"));
            try {
                reader.forEach(list -> {
                    if (list.size() >= 2) {
                        String lowerCase = ((String) list.get(0)).replace('_', ' ').toLowerCase();
                        PartOfSpeech valueOf = PartOfSpeech.valueOf(((String) list.get(1)).toUpperCase());
                        if (!this.lemmas.containsKey(lowerCase)) {
                            this.lemmas.put(lowerCase, new HashSet());
                        }
                        ((Set) this.lemmas.get(lowerCase)).add(valueOf);
                    }
                });
                if (reader != null) {
                    reader.close();
                }
            } finally {
            }
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public static ENLemmatizer getInstance() {
        if (INSTANCE == null) {
            synchronized (ENLemmatizer.class) {
                if (INSTANCE != null) {
                    return INSTANCE;
                }
                INSTANCE = new ENLemmatizer();
            }
        }
        return INSTANCE;
    }

    private Set<String> allAndSelf(String str) {
        HashSet hashSet = new HashSet(doLemmatization(str, true, PartOfSpeech.ANY));
        hashSet.add(str);
        return hashSet;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v10, types: [java.util.List] */
    @Override // com.gengoai.hermes.morphology.Lemmatizer
    public List<String> allPossibleLemmas(@NonNull String str, @NonNull PartOfSpeech partOfSpeech) {
        if (str == null) {
            throw new NullPointerException("word is marked non-null but is null");
        }
        if (partOfSpeech == null) {
            throw new NullPointerException("partOfSpeech is marked non-null but is null");
        }
        ArrayList arrayList = null;
        if (partOfSpeech == PartOfSpeech.ANY) {
            arrayList = new ArrayList(doLemmatization(str, true, PartOfSpeech.NOUN, PartOfSpeech.VERB, PartOfSpeech.ADJECTIVE, PartOfSpeech.ADVERB));
        } else if (partOfSpeech.isInstance(new Tag[]{PartOfSpeech.NOUN, PartOfSpeech.VERB, PartOfSpeech.ADJECTIVE, PartOfSpeech.ADVERB})) {
            arrayList = new ArrayList(doLemmatization(str, true, partOfSpeech));
        }
        if (arrayList == null || arrayList.isEmpty()) {
            arrayList = Collections.singletonList(str.toLowerCase());
        }
        return arrayList;
    }

    @Override // com.gengoai.hermes.morphology.Lemmatizer
    public Trie<String> allPossibleLemmasAndPrefixes(@NonNull String str, @NonNull PartOfSpeech partOfSpeech) {
        if (str == null) {
            throw new NullPointerException("string is marked non-null but is null");
        }
        if (partOfSpeech == null) {
            throw new NullPointerException("partOfSpeech is marked non-null but is null");
        }
        Trie<String> trie = new Trie<>();
        for (String str2 : doLemmatization(str, true, partOfSpeech)) {
            trie.putAll(Maps.asHashMap(this.lemmas.prefix(str2 + " ").keySet(), str3 -> {
                return str3;
            }));
            if (this.lemmas.containsKey(str2)) {
                trie.put(str2, str2);
            }
        }
        return trie;
    }

    @Override // com.gengoai.hermes.morphology.Lemmatizer
    public boolean canLemmatize(String str, PartOfSpeech partOfSpeech) {
        return partOfSpeech.isInstance(new Tag[]{PartOfSpeech.NOUN, PartOfSpeech.VERB, PartOfSpeech.ADJECTIVE, PartOfSpeech.ADVERB}) && doLemmatization(str, false, partOfSpeech).size() > 0;
    }

    private boolean contains(String str, PartOfSpeech partOfSpeech) {
        return this.lemmas.containsKey(str) && (partOfSpeech == PartOfSpeech.ANY || ((Set) this.lemmas.get(str)).contains(partOfSpeech.getUniversalTag()));
    }

    private Set<String> doLemmatization(String str, boolean z, PartOfSpeech... partOfSpeechArr) {
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        if (partOfSpeechArr == null || partOfSpeechArr.length == 0 || partOfSpeechArr[0] == PartOfSpeech.ANY) {
            partOfSpeechArr = ALL_PartOfSpeech;
        }
        String lowerCase = str.toLowerCase();
        for (PartOfSpeech partOfSpeech : partOfSpeechArr) {
            fill(lowerCase, partOfSpeech, linkedHashSet);
        }
        if (linkedHashSet.isEmpty() && lowerCase.contains("-")) {
            String replace = lowerCase.replace('-', ' ');
            for (PartOfSpeech partOfSpeech2 : partOfSpeechArr) {
                fill(replace, partOfSpeech2, linkedHashSet);
            }
        }
        if (linkedHashSet.isEmpty() && lowerCase.contains(" ")) {
            String replace2 = lowerCase.replace(' ', '-');
            for (PartOfSpeech partOfSpeech3 : partOfSpeechArr) {
                fill(replace2, partOfSpeech3, linkedHashSet);
            }
        }
        if (linkedHashSet.isEmpty() && WHITESPACE.matcher(lowerCase).find()) {
            linkedHashSet.addAll(phraseLemmas(lowerCase, partOfSpeechArr));
        }
        return (linkedHashSet.isEmpty() && z) ? Collections.singleton(lowerCase.toLowerCase()) : linkedHashSet;
    }

    private void fill(String str, PartOfSpeech partOfSpeech, Set<String> set) {
        String lowerCase = str.toLowerCase();
        if (contains(lowerCase, partOfSpeech.getUniversalTag())) {
            set.add(lowerCase);
            return;
        }
        if (partOfSpeech.isVerb()) {
            if (lowerCase.equalsIgnoreCase("'s") || lowerCase.equalsIgnoreCase("'re")) {
                set.add("be");
                return;
            } else if (lowerCase.equals("'ll")) {
                set.add("will");
                return;
            } else if (lowerCase.equals("'ve")) {
                set.add("will");
                return;
            }
        } else if (partOfSpeech.isAdverb()) {
            if (lowerCase.equalsIgnoreCase("n't")) {
                set.add("not");
                return;
            }
        } else if (lowerCase.equalsIgnoreCase("'d")) {
            set.add("would");
            return;
        }
        Tuple2 of = Tuple2.of(partOfSpeech.getUniversalTag(), lowerCase.toLowerCase());
        if (this.exceptions.containsKey(of)) {
            set.addAll(this.exceptions.get(of));
        }
        Iterator it = this.rules.get(partOfSpeech.getUniversalTag()).iterator();
        while (it.hasNext()) {
            String apply = ((DetachmentRule) it.next()).apply(lowerCase);
            if (contains(apply, partOfSpeech.getUniversalTag())) {
                set.add(apply);
            }
        }
    }

    private boolean hasPartOfSpeech(String str, PartOfSpeech... partOfSpeechArr) {
        if (partOfSpeechArr == null || partOfSpeechArr.length == 0 || partOfSpeechArr[0] == PartOfSpeech.ANY) {
            return this.lemmas.containsKey(str);
        }
        Iterator it = ((Set) this.lemmas.getOrDefault(str, Collections.emptySet())).iterator();
        while (it.hasNext()) {
            if (((PartOfSpeech) it.next()).isInstance(partOfSpeechArr)) {
                return true;
            }
        }
        return false;
    }

    @Override // com.gengoai.hermes.morphology.Lemmatizer
    public String lemmatize(@NonNull String str, @NonNull PartOfSpeech partOfSpeech) {
        if (str == null) {
            throw new NullPointerException("string is marked non-null but is null");
        }
        if (partOfSpeech == null) {
            throw new NullPointerException("partOfSpeech is marked non-null but is null");
        }
        return partOfSpeech == PartOfSpeech.ANY ? ((String) Streams.asStream(doLemmatization(str, true, ALL_PartOfSpeech)).findFirst().orElse(str)).toLowerCase() : partOfSpeech.isInstance(ALL_PartOfSpeech) ? ((String) Streams.asStream(doLemmatization(str, true, partOfSpeech)).findFirst().orElse(str)).toLowerCase() : str.toLowerCase();
    }

    private void loadException(PartOfSpeech partOfSpeech) {
        try {
            for (String str : Resources.fromClasspath("com/gengoai/hermes/en").getChild(partOfSpeech.tag().toLowerCase() + ".exc").readLines()) {
                if (!Strings.isNullOrBlank(str)) {
                    String[] split = str.split("\\s+");
                    Tuple2 of = Tuple2.of(partOfSpeech.getUniversalTag(), split[0].replaceAll(CoNLLFormat.EMPTY_FIELD, " "));
                    for (int i = 1; i < split.length; i++) {
                        this.exceptions.put(of, split[i]);
                    }
                }
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private Set<String> phraseLemmas(String str, PartOfSpeech... partOfSpeechArr) {
        String[] split = str.split("\\s+");
        if (split.length == 0) {
            return Collections.emptySet();
        }
        if (partOfSpeechArr == null || partOfSpeechArr.length == 0 || partOfSpeechArr[0] == PartOfSpeech.ANY) {
            partOfSpeechArr = ALL_PartOfSpeech;
        }
        Trie<String> allPossibleLemmasAndPrefixes = allPossibleLemmasAndPrefixes(split[0], PartOfSpeech.ANY);
        Set<String> allAndSelf = allAndSelf(split[0]);
        for (int i = 1; i < split.length; i++) {
            HashSet hashSet = new HashSet();
            for (String str2 : allAndSelf) {
                Iterator<String> it = allAndSelf(split[i]).iterator();
                while (it.hasNext()) {
                    String str3 = str2 + " " + it.next();
                    if (allPossibleLemmasAndPrefixes.containsKey(str3)) {
                        hashSet.add(str3);
                    } else if (allPossibleLemmasAndPrefixes.prefix(str3).size() > 0) {
                        hashSet.add(str3);
                    }
                }
            }
            if (hashSet.isEmpty()) {
                return Collections.emptySet();
            }
            allAndSelf = hashSet;
        }
        PartOfSpeech[] partOfSpeechArr2 = partOfSpeechArr;
        return (Set) allAndSelf.stream().filter(str4 -> {
            return hasPartOfSpeech(str4, partOfSpeechArr2);
        }).collect(Collectors.toSet());
    }
}
