package lv.semti.morphology.lexicon;

import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Multimap;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.regex.Pattern;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import lv.semti.morphology.analyzer.AllEndings;
import lv.semti.morphology.analyzer.Mijas;
import lv.semti.morphology.analyzer.Variants;
import lv.semti.morphology.attributes.AttributeNames;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/* loaded from: input_file:lv/semti/morphology/lexicon/Lexicon.class */
public class Lexicon {
    public static final String DEFAULT_LEXICON_FILE = "Lexicon_v2.xml";
    protected String filename;
    private String revision;
    private String licence;
    public ArrayList<Paradigm> paradigms;
    protected ArrayList<String> prefixes;
    public static int proper_name_frequency_floor = 200;
    protected static Pattern p_firstcap = Pattern.compile("\\p{Lu}.*");
    protected static Pattern p_allcaps = Pattern.compile("(\\p{Lu})*");
    protected static Pattern p_doublesurname = Pattern.compile("\\p{Lu}.+-\\p{Lu}.+");
    private AllEndings allEndings = null;
    private ArrayList<String> corpusFileNames = new ArrayList<>();
    protected Multimap<String, Lexeme> hardcodedForms = ArrayListMultimap.create();
    int lexeme_id_counter = 1100000;

    public Lexicon() throws Exception {
        InputStream resourceAsStream = getClass().getClassLoader().getResourceAsStream(DEFAULT_LEXICON_FILE);
        if (resourceAsStream == null) {
            throw new IOException("Can't find 'Lexicon_v2.xml'.");
        }
        init(resourceAsStream, true);
    }

    public Lexicon(boolean z) throws Exception {
        InputStream resourceAsStream = getClass().getClassLoader().getResourceAsStream(DEFAULT_LEXICON_FILE);
        if (resourceAsStream == null) {
            throw new IOException("Can't find 'Lexicon_v2.xml'.");
        }
        init(resourceAsStream, z);
    }

    public Lexicon(String str) throws Exception {
        init(str, true);
    }

    public Lexicon(String str, boolean z) throws Exception {
        init(str, z);
    }

    public Lexicon(String str, ArrayList<String> arrayList) throws Exception {
        init(str, arrayList);
    }

    public String getFilename() {
        return this.filename;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public AllEndings getAllEndings() {
        if (this.allEndings == null) {
            ArrayList arrayList = new ArrayList();
            Iterator<Paradigm> it = this.paradigms.iterator();
            while (it.hasNext()) {
                arrayList.addAll(it.next().endings);
            }
            this.allEndings = new AllEndings(arrayList);
        }
        return this.allEndings;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void invalidateAllEndings() {
        this.allEndings = null;
    }

    private void init(String str, boolean z) throws Exception {
        System.err.println("Loading " + str);
        this.filename = str;
        DocumentBuilder newDocumentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
        InputStream resourceAsStream = getClass().getClassLoader().getResourceAsStream(str);
        init_main(resourceAsStream != null ? newDocumentBuilder.parse(resourceAsStream) : newDocumentBuilder.parse(new File(str)), new File(str).getParent(), z);
    }

    private void init(String str, ArrayList<String> arrayList) throws Exception {
        System.err.println("Loading " + str);
        this.filename = str;
        DocumentBuilder newDocumentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
        InputStream resourceAsStream = getClass().getClassLoader().getResourceAsStream(str);
        init_main(resourceAsStream != null ? newDocumentBuilder.parse(resourceAsStream) : newDocumentBuilder.parse(new File(str)), new File(str).getParent(), arrayList);
    }

    private void init(InputStream inputStream, boolean z) throws Exception {
        System.err.println("Loading the lexicon from an input stream...");
        init_main(DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(inputStream), (String) null, z);
    }

    private void init_main(Document document, String str, boolean z) throws Exception {
        init_main(document, str, z, true, null);
    }

    private void init_main(Document document, String str, ArrayList<String> arrayList) throws Exception {
        init_main(document, str, true, true, arrayList);
    }

    private void init_main(Document document, String str, boolean z, boolean z2, ArrayList<String> arrayList) throws Exception {
        Element documentElement = document.getDocumentElement();
        if (!documentElement.getNodeName().equalsIgnoreCase("Morphology")) {
            throw new Error("Node '" + documentElement.getNodeName() + "' but Morphology expected!");
        }
        Node namedItem = documentElement.getAttributes().getNamedItem("revision");
        if (namedItem != null) {
            this.revision = namedItem.getTextContent();
        }
        Node namedItem2 = documentElement.getAttributes().getNamedItem("licence");
        if (namedItem2 != null) {
            this.licence = namedItem2.getTextContent();
        }
        NodeList childNodes = documentElement.getChildNodes();
        this.paradigms = new ArrayList<>();
        for (int i = 0; i < childNodes.getLength(); i++) {
            if (childNodes.item(i).getNodeName().equals("Paradigm")) {
                addParadigm(new Paradigm(this, childNodes.item(i)));
            }
            if (childNodes.item(i).getNodeName().equals("Corpus")) {
                Node namedItem3 = childNodes.item(i).getAttributes().getNamedItem("FileName");
                Node namedItem4 = childNodes.item(i).getAttributes().getNamedItem("Type");
                boolean equalsIgnoreCase = namedItem4 != null ? namedItem4.getTextContent().equalsIgnoreCase("core") : false;
                if (namedItem3 != null && (z || (equalsIgnoreCase && z2))) {
                    this.corpusFileNames.add(namedItem3.getTextContent());
                }
            }
        }
        Iterator<String> it = this.corpusFileNames.iterator();
        while (it.hasNext()) {
            String next = it.next();
            if (arrayList == null || !arrayList.contains(next)) {
                if (next.endsWith(".xml")) {
                    DocumentBuilder newDocumentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
                    load_sublexicon_xml(str != null ? newDocumentBuilder.parse(new File(str + File.separatorChar + next)) : newDocumentBuilder.parse(getClass().getClassLoader().getResourceAsStream(next)));
                } else {
                    if (!next.endsWith(".json")) {
                        throw new Error(String.format("Unsupported file format for sublexicon '%s'", next));
                    }
                    if (str != null) {
                        load_sublexicon_json(new FileInputStream(new File(str + File.separatorChar + next)));
                    } else {
                        load_sublexicon_json(getClass().getClassLoader().getResourceAsStream(next));
                    }
                }
            }
        }
        this.prefixes = new ArrayList<>();
        this.prefixes.add("aiz");
        this.prefixes.add("ap");
        this.prefixes.add("at");
        this.prefixes.add("ie");
        this.prefixes.add("iz");
        this.prefixes.add("ne");
        this.prefixes.add("no");
        this.prefixes.add("pa");
        this.prefixes.add("pār");
        this.prefixes.add("pie");
        this.prefixes.add("sa");
        this.prefixes.add("uz");
        System.err.println("Lexicon " + (this.revision != null ? this.revision : "") + " loaded");
    }

    private void load_sublexicon_json(InputStream inputStream) throws ParseException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
        JSONParser jSONParser = new JSONParser();
        while (true) {
            try {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    return;
                }
                Lexeme lexeme = new Lexeme((JSONObject) jSONParser.parse(readLine), this);
                if (lexeme.isMatchingStrong(AttributeNames.i_EntryName, "irt:1") || lexeme.isMatchingStrong(AttributeNames.i_EntryName, "art:1")) {
                    lexeme.addAttribute(AttributeNames.i_Frequency, AttributeNames.v_Rare);
                }
                if (lexeme.isMatchingStrong(AttributeNames.i_Usage, AttributeNames.v_Regional) || lexeme.getParadigm().getID() == 29 || lexeme.isMatchingStrong(AttributeNames.i_PartOfSpeech, AttributeNames.v_Pronoun) || lexeme.isMatchingStrong(AttributeNames.i_TezaursCategory, AttributeNames.v_Pronoun) || lexeme.getParadigm().isMatchingStrong(AttributeNames.i_PartOfSpeech, AttributeNames.v_Pronoun) || lexeme.isMatchingStrong(AttributeNames.i_PartOfSpeech, AttributeNames.v_Numeral) || lexeme.isMatchingStrong(AttributeNames.i_TezaursCategory, AttributeNames.v_Numeral) || lexeme.getParadigm().isMatchingStrong(AttributeNames.i_PartOfSpeech, AttributeNames.v_Numeral) || lexeme.isMatchingStrong(AttributeNames.i_PartOfSpeech, AttributeNames.v_Conjunction) || lexeme.isMatchingStrong(AttributeNames.i_TezaursCategory, AttributeNames.v_Conjunction) || lexeme.getParadigm().isMatchingStrong(AttributeNames.i_PartOfSpeech, AttributeNames.v_Conjunction) || lexeme.isMatchingStrong(AttributeNames.i_PartOfSpeech, AttributeNames.v_Particle) || lexeme.isMatchingStrong(AttributeNames.i_TezaursCategory, AttributeNames.v_Particle) || lexeme.getParadigm().isMatchingStrong(AttributeNames.i_PartOfSpeech, AttributeNames.v_Particle)) {
                    lexeme.getParadigm().removeLexeme(lexeme);
                }
            } catch (IOException e) {
                e.printStackTrace();
                return;
            }
        }
    }

    private void load_sublexicon_xml(Document document) throws Exception {
        Node namedItem;
        Element documentElement = document.getDocumentElement();
        if (!documentElement.getNodeName().equalsIgnoreCase("Lexicon")) {
            throw new Error("Node '" + documentElement.getNodeName() + "' but Lexicon expected!");
        }
        NodeList childNodes = documentElement.getChildNodes();
        for (int i = 0; i < childNodes.getLength(); i++) {
            if (childNodes.item(i).getNodeName().equals("Paradigm") && (namedItem = childNodes.item(i).getAttributes().getNamedItem("ID")) != null) {
                int parseInt = Integer.parseInt(namedItem.getTextContent());
                Paradigm paradigmByID = paradigmByID(parseInt);
                if (paradigmByID == null) {
                    throw new Exception(String.format("When loading subcorpus, cannot find paradigm %d in main morphology", Integer.valueOf(parseInt)));
                }
                paradigmByID.addLexemesFromXML(childNodes.item(i));
            }
        }
    }

    public void toXML(String str) throws IOException {
        System.out.println("Warning! XML saving possibly obsolete after multuple-lexicon changes");
        File file = new File(str);
        File file2 = new File(str + ".new");
        File file3 = new File(str + ".bak");
        BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file2), "UTF-8"));
        bufferedWriter.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
        bufferedWriter.write("<Lexicon revision=\"" + (this.revision != null ? this.revision : "") + "\" licence=\"" + (this.licence != null ? this.licence : "") + "\">\n");
        Iterator<Paradigm> it = this.paradigms.iterator();
        while (it.hasNext()) {
            it.next().toXML(bufferedWriter);
        }
        bufferedWriter.write("</Lexicon>");
        bufferedWriter.close();
        if (file3.exists()) {
            file3.delete();
        }
        if (file.exists()) {
            file.renameTo(file3);
        }
        file2.renameTo(file);
    }

    public void toXML_sub(String str, String str2) throws IOException {
        File file = new File(str);
        File file2 = new File(str + ".new");
        File file3 = new File(str + ".bak");
        BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file2), "UTF-8"));
        bufferedWriter.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
        bufferedWriter.write("<Lexicon revision=\"" + (this.revision != null ? this.revision : "") + "\" licence=\"" + (this.licence != null ? this.licence : "") + "\">\n");
        Iterator<Paradigm> it = this.paradigms.iterator();
        while (it.hasNext()) {
            it.next().toXML_sub(bufferedWriter, str2);
        }
        bufferedWriter.write("</Lexicon>");
        bufferedWriter.close();
        if (file3.exists()) {
            file3.delete();
        }
        if (file.exists()) {
            file.renameTo(file3);
        }
        file2.renameTo(file);
    }

    public void toXML(OutputStream outputStream) throws IOException {
        System.out.println("Warning! XML saving possibly obsolete after multuple-lexicon changes");
        BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(outputStream, "UTF-8"));
        bufferedWriter.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
        bufferedWriter.write("<Lexicon revision=\"" + (this.revision != null ? this.revision : "") + "\" licence=\"" + (this.licence != null ? this.licence : "") + "\">\n");
        Iterator<Paradigm> it = this.paradigms.iterator();
        while (it.hasNext()) {
            it.next().toXML(bufferedWriter);
        }
        bufferedWriter.write("</Lexicon>");
        bufferedWriter.close();
    }

    public Paradigm paradigmByID(int i) {
        Paradigm paradigm = null;
        Iterator<Paradigm> it = this.paradigms.iterator();
        while (it.hasNext()) {
            Paradigm next = it.next();
            if (next.getID() == i) {
                paradigm = next;
            }
        }
        return paradigm;
    }

    public Ending endingByID(int i) {
        return getAllEndings().endingByID(i);
    }

    public Lexeme lexemeByID(int i) {
        Lexeme lexeme = null;
        Iterator<Paradigm> it = this.paradigms.iterator();
        while (it.hasNext()) {
            Paradigm next = it.next();
            if (next.lexemesByID.get(Integer.valueOf(i)) != null) {
                lexeme = next.lexemesByID.get(Integer.valueOf(i));
            }
        }
        return lexeme;
    }

    public int newLexemeID() {
        this.lexeme_id_counter++;
        while (lexemeByID(this.lexeme_id_counter) != null) {
            this.lexeme_id_counter++;
        }
        return this.lexeme_id_counter;
    }

    public Lexeme createLexeme(String str, int i, String str2) {
        Ending endingByID = endingByID(i);
        try {
            ArrayList<Variants> mijuVarianti = Mijas.mijuVarianti(endingByID.stem(str.toLowerCase()), endingByID.getMija(), str.matches("\\p{Lu}.*"));
            if (mijuVarianti.size() == 0) {
                return null;
            }
            String str3 = mijuVarianti.get(0).celms;
            Lexeme lexeme = new Lexeme();
            lexeme.setStemCount(endingByID.getParadigm().getStems());
            lexeme.setStem(endingByID.stemID - 1, str3);
            endingByID.getParadigm().addLexeme(lexeme);
            lexeme.addAttribute(AttributeNames.i_Lemma, recapitalize(lexeme.getValue(AttributeNames.i_Lemma), str));
            lexeme.addAttribute(AttributeNames.i_Source, str2);
            clearCache();
            return lexeme;
        } catch (Exception e) {
            System.err.print(str + Integer.toString(i) + str2);
            System.err.print(e.getStackTrace());
            return null;
        }
    }

    public Lexeme createLexemeFromParadigm(String str, int i, String str2) throws Exception {
        Paradigm paradigmByID = paradigmByID(i);
        if (paradigmByID == null) {
            throw new Exception(String.format("createLexemeFromParadigm - invalid paradigm id %d passed for lexeme %s", Integer.valueOf(i), str));
        }
        if (str == null) {
            throw new Exception("createLexemeFromParadigm - null lexeme string passed");
        }
        if (paradigmByID.getLemmaEnding() == null) {
            throw new Exception(String.format("createLexemeFromParadigm - null lemma ending at paradigm id %d for lexeme %s", Integer.valueOf(i), str));
        }
        if (str.endsWith(paradigmByID.getLemmaEnding().getEnding())) {
            return createLexeme(str, paradigmByID.getLemmaEnding().getID(), str2);
        }
        Iterator<Ending> it = paradigmByID.endings.iterator();
        while (it.hasNext()) {
            Ending next = it.next();
            if (!next.isMatchingStrong(AttributeNames.i_Case, AttributeNames.v_Vocative) && str.endsWith(next.getEnding())) {
                return createLexeme(str, next.getID(), str2);
            }
        }
        throw new Exception(String.format("createLexemeFromParadigm - couldn't create lexeme %s with paradigm %d", str, Integer.valueOf(i)));
    }

    public void addParadigm(Paradigm paradigm) {
        if (paradigm.getID() == 0) {
            int i = 0;
            Iterator<Paradigm> it = this.paradigms.iterator();
            while (it.hasNext()) {
                Paradigm next = it.next();
                if (next.getID() > i) {
                    i = next.getID();
                }
            }
            paradigm.setID(i + 1);
        }
        this.paradigms.add(paradigm);
    }

    public void removeParadigm(Paradigm paradigm) {
        this.paradigms.remove(paradigm);
    }

    public int maxEndingID() {
        int i = 0;
        Iterator<Paradigm> it = this.paradigms.iterator();
        while (it.hasNext()) {
            Iterator<Ending> it2 = it.next().endings.iterator();
            while (it2.hasNext()) {
                Ending next = it2.next();
                if (next.getID() > i) {
                    i = next.getID();
                }
            }
        }
        return i;
    }

    public void clearCache() {
    }

    public static String recapitalize(String str, String str2) {
        int indexOf;
        if (p_firstcap.matcher(str2).matches()) {
            str = str.substring(0, 1).toUpperCase() + str.substring(1, str.length());
        }
        if (p_allcaps.matcher(str2).matches()) {
            str = str.toUpperCase();
        }
        if (p_doublesurname.matcher(str2).matches() && (indexOf = str.indexOf("-") + 1) > -1) {
            str = str.substring(0, indexOf) + str.substring(indexOf, indexOf + 1).toUpperCase() + str.substring(indexOf + 1, str.length());
        }
        return str;
    }
}
