package de.julielab.umlsfilter.delemmatizer;

import de.julielab.provider.ProvidedTerm;
import de.julielab.umlsfilter.config.ResourceProvider;
import de.julielab.umlsfilter.rules.Rule;
import de.julielab.umlsfilter.rules.TermContainer;
import de.julielab.umlsfilter.rules.TermWithSource;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:de/julielab/umlsfilter/delemmatizer/Delemmatizer.class */
public class Delemmatizer {
    private static final Matcher punctuation = Pattern.compile("\\p{Punct}").matcher("");
    private static final Matcher space = Pattern.compile("\\s").matcher("");
    public static final String LANGUAGE_ENLGLISH = "ENG";
    public static final String LANGUAGE_GERMAN = "GER";
    public static final String LANGUAGE_FRENCH = "FRE";
    public static final String LANGUAGE_SPANISH = "SPA";
    public static final String LANGUAGE_DUTCH = "DUT";
    private final Map<String, List<Rule>> ruleMap = new HashMap();

    public static void delemmatize(Iterator<ProvidedTerm> it, FilterMode filterMode, Set<String> set, String str, String str2) throws IOException {
        Delemmatizer delemmatizer = new Delemmatizer();
        if (str != null) {
            ResourceProvider.setLanguageRule(str2, str);
        }
        HashSet hashSet = new HashSet();
        while (it.hasNext()) {
            ProvidedTerm next = it.next();
            TermContainer delemmatizeTerm = FilterMode.BASELINE_GAZETTEER_FILE == filterMode ? null : delemmatizer.delemmatizeTerm(next.getTerm(), next.getLanguageLong(), next.isChemicalOrDrug(), set);
            if (FilterMode.MRCONSO == filterMode) {
                for (TermWithSource termWithSource : delemmatizeTerm.getRawTerms()) {
                    if (!termWithSource.getIsSupressed()) {
                        if (termWithSource.getModifiedByRulesString().equals("")) {
                            System.out.println(next.getOriginalMRCONSO());
                        } else {
                            System.out.println(next.getUpdatedMRCONSO(termWithSource.getTerm(), termWithSource.getModifiedByRulesString()));
                        }
                    }
                }
            } else if (FilterMode.BASELINE_GAZETTEER_FILE == filterMode) {
                printGazetteerString(next.getTerm(), next.getCui(), "", hashSet);
            } else {
                if (FilterMode.PRODUCE_GAZETTEER_FILE != filterMode) {
                    throw new IllegalArgumentException();
                }
                String cui = next.getCui();
                for (TermWithSource termWithSource2 : delemmatizeTerm.getRawTerms()) {
                    if (termWithSource2.getIsSupressed()) {
                        System.err.printf("Deleted:\t%s\t%s\t%s\n", termWithSource2.getTerm(), cui, termWithSource2.getModifiedByRulesString());
                    } else if (termWithSource2.getModifiedByRulesString().equals("")) {
                        printGazetteerString(termWithSource2.getTerm(), cui, "", hashSet);
                    } else {
                        printGazetteerString(termWithSource2.getTerm(), cui, next.getTerm() + "---" + termWithSource2.getModifiedByRulesString(), hashSet);
                    }
                }
            }
        }
    }

    private static TermContainer delemmatizeTermForRules(String str, String str2, boolean z, List<Rule> list, Set<String> set) throws IOException {
        TermContainer termContainer = new TermContainer(str, str2, z);
        Iterator<Rule> it = list.iterator();
        while (it.hasNext()) {
            it.next().apply(termContainer, set);
        }
        return termContainer;
    }

    private static void printGazetteerString(String str, String str2, String str3, Set<String> set) {
        String str4 = str2 + regularizeTerm(str);
        if (set.contains(str4)) {
            return;
        }
        set.add(str4);
        System.out.println(String.format("%s\tUMLS@@%s@@%s@@ANY", str, str2, str3));
    }

    public static String regularizeTerm(String str) {
        return space.reset(punctuation.reset(str).replaceAll("").toLowerCase()).replaceAll("");
    }

    private TermContainer delemmatizeTerm(String str, String str2, boolean z, Set<String> set) throws IOException {
        if (!this.ruleMap.containsKey(str2)) {
            prepareRules(str2);
        }
        return delemmatizeTermForRules(str, str2, z, this.ruleMap.get(str2), set);
    }

    @Deprecated
    ArrayList<String> delemmatizeTermProducingUnsuppressedStrings(String str, String str2, boolean z) throws IOException {
        return delemmatizeTerm(str, str2, z, null).getUnsuppressedTermStrings();
    }

    void prepareRules(String str) throws IOException {
        ArrayList arrayList = new ArrayList();
        for (String str2 : ResourceProvider.getRulesForLanguage(str)) {
            try {
                arrayList.add((Rule) (str2.contains(".") ? Class.forName(str2) : Class.forName("de.julielab.umlsfilter.rules." + str2)).getDeclaredConstructor(Map.class).newInstance(ResourceProvider.getRuleParameters(str, str2)));
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        this.ruleMap.put(str, arrayList);
    }
}
