package lv.semti.morphology.corpus;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.Iterator;
import lv.semti.morphology.analyzer.Analyzer;
import lv.semti.morphology.analyzer.MarkupConverter;
import lv.semti.morphology.analyzer.Wordform;
import lv.semti.morphology.attributes.AttributeNames;
import lv.semti.morphology.attributes.AttributeValues;

/* JADX WARN: Classes with same name are omitted:
  input_file:doc/demo/lib/morphology.jar:lv/semti/morphology/corpus/CorpusProcessing.class
 */
/* loaded from: input_file:lv/semti/morphology/corpus/CorpusProcessing.class */
public class CorpusProcessing {
    public static void legacyTransform(String str, String str2) throws IOException {
        String trim;
        String trim2;
        String str3;
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str), "UTF-8"));
        BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str2), "UTF-8"));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedWriter.flush();
                bufferedWriter.close();
                bufferedReader.close();
                return;
            }
            if (!readLine.trim().equals("")) {
                if (readLine.indexOf(60) > -1) {
                    trim = readLine.substring(0, readLine.indexOf(60) - 1);
                    str3 = readLine.substring(readLine.indexOf(60) + 1, readLine.indexOf(62)).toLowerCase();
                    trim2 = readLine.substring(readLine.indexOf(62) + 2);
                    if (str3.startsWith("v") && str3.charAt(3) != 'p') {
                        str3 = str3.substring(0, 6) + str3.substring(7);
                    }
                    if (str3.startsWith("v") && str3.charAt(3) != 'p') {
                        str3 = str3.substring(0, 1) + "_" + str3.substring(2);
                    }
                    if (str3.startsWith("v") && str3.charAt(3) != 'p') {
                        str3 = str3.substring(0, 5) + "_" + str3.substring(6);
                    }
                    if (str3.startsWith("v") && str3.charAt(3) != 'p' && str3.charAt(7) == '3') {
                        str3 = str3.substring(0, 8) + "0" + str3.substring(9);
                    }
                    if (str3.startsWith("v") && str3.charAt(3) == 'c') {
                        str3 = str3.substring(0, 4) + "0" + str3.substring(5);
                    }
                    if (str3.startsWith("v") && str3.charAt(3) == 'd') {
                        str3 = str3.substring(0, 4) + "0" + str3.substring(5, 7) + "00" + str3.substring(9);
                    }
                    if (trim2.equalsIgnoreCase("nebūt")) {
                        str3 = str3.substring(0, 1) + "_" + str3.substring(2);
                    }
                    if (trim2.equalsIgnoreCase("būt")) {
                        str3 = str3.substring(0, 1) + "_" + str3.substring(2);
                    }
                    if (trim2.equalsIgnoreCase("gribēt")) {
                        str3 = str3.substring(0, 1) + "_" + str3.substring(2);
                    }
                    if (trim2.equalsIgnoreCase("varēt")) {
                        str3 = str3.substring(0, 1) + "o" + str3.substring(2);
                    }
                    if (trim2.equalsIgnoreCase("nespēt")) {
                        str3 = str3.substring(0, 1) + "o" + str3.substring(2);
                    }
                    if (trim2.equalsIgnoreCase("tikt")) {
                        str3 = str3.substring(0, 1) + "t" + str3.substring(2);
                    }
                    if (trim2.equalsIgnoreCase("varēt")) {
                        str3 = str3.substring(0, 5) + "t" + str3.substring(6);
                    }
                    if (trim2.equalsIgnoreCase("palīdzēt")) {
                        str3 = str3.substring(0, 5) + "t" + str3.substring(6);
                    }
                    if (trim2.equalsIgnoreCase("censties")) {
                        str3 = str3.substring(0, 5) + "t" + str3.substring(6);
                    }
                    if (trim2.equalsIgnoreCase("gribēt")) {
                        str3 = str3.substring(0, 5) + "t" + str3.substring(6);
                    }
                    if (str3.startsWith("v") && str3.charAt(3) == 'n') {
                        str3 = str3.substring(0, 4) + "0" + str3.substring(5, 7) + "000" + str3.substring(10);
                    }
                } else {
                    trim = readLine.trim();
                    trim2 = readLine.trim();
                    str3 = "z";
                    if (trim.equalsIgnoreCase(",")) {
                        str3 = str3 + "c";
                    } else if (trim.equalsIgnoreCase(";")) {
                        str3 = str3 + "c";
                    } else if (trim.equalsIgnoreCase("\"")) {
                        str3 = str3 + "q";
                    } else if (trim.equalsIgnoreCase(".")) {
                        str3 = str3 + "s";
                    } else if (trim.equalsIgnoreCase("?")) {
                        str3 = str3 + "s";
                    } else if (trim.equalsIgnoreCase("...")) {
                        str3 = str3 + "s";
                    } else if (trim.equalsIgnoreCase("!")) {
                        str3 = str3 + "s";
                    } else if (trim.equalsIgnoreCase("(")) {
                        str3 = str3 + "b";
                    } else if (trim.equalsIgnoreCase(")")) {
                        str3 = str3 + "b";
                    } else if (trim.equalsIgnoreCase("-")) {
                        str3 = str3 + "d";
                    } else if (trim.equalsIgnoreCase("\\")) {
                        str3 = str3 + "q";
                    } else if (trim.equalsIgnoreCase(":")) {
                        str3 = str3 + "o";
                    } else {
                        System.out.println(trim);
                    }
                }
                bufferedWriter.write(trim + " <[");
                for (int i = 0; i < str3.length(); i++) {
                    bufferedWriter.write(str3.charAt(i));
                    if (i < str3.length() - 1) {
                        bufferedWriter.write(",");
                    }
                }
                bufferedWriter.write("],'" + trim2 + "',''>\n");
            }
        }
    }

    public static void processCorpus(Analyzer analyzer, String str) {
        Statistics statistics = new Statistics();
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(CorpusProcessing.class.getClassLoader().getResourceAsStream(str), "UTF-8"));
            PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(System.out, "UTF-8"));
            int i = 0;
            int i2 = 0;
            int i3 = 0;
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    printWriter.printf("Sakrīt %d/%d : %.1f%%\n", Integer.valueOf(i2), Integer.valueOf(i), Double.valueOf((100.0d * i2) / i));
                    printWriter.printf("Der    %d/%d : %.1f%%\n", Integer.valueOf(i3), Integer.valueOf(i), Double.valueOf((100.0d * i3) / i));
                    printWriter.flush();
                    statistics.toXML(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(Statistics.DEFAULT_STATISTICS_FILE), "UTF-8")));
                    return;
                }
                if (readLine.trim().length() != 0 && !readLine.contains("<s>") && !readLine.contains("</s>")) {
                    String[] split = readLine.split("\t");
                    String str2 = split[0];
                    String str3 = split[2];
                    String str4 = split[1];
                    AttributeValues fromKamolsMarkup = MarkupConverter.fromKamolsMarkup(str4);
                    boolean z = false;
                    boolean z2 = false;
                    String str5 = "";
                    Iterator<Wordform> it = analyzer.analyze(str2).wordforms.iterator();
                    while (it.hasNext()) {
                        Wordform next = it.next();
                        str5 = str5 + "|" + next.getTag() + "|";
                        if (next.getTag().equalsIgnoreCase(str4)) {
                            z = true;
                            String value = next.getValue(AttributeNames.i_LexemeID);
                            if (value != null) {
                                statistics.addLexeme(Integer.parseInt(value));
                            }
                            String value2 = next.getValue(AttributeNames.i_EndingID);
                            if (value2 != null) {
                                statistics.addEnding(Integer.parseInt(value2));
                            }
                        }
                        if (next.isMatchingWeak(fromKamolsMarkup)) {
                            z2 = true;
                        }
                    }
                    i++;
                    if (z) {
                        i2++;
                    }
                    if (z2) {
                        i3++;
                    }
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] strArr) throws Exception {
        Analyzer analyzer = new Analyzer();
        if (strArr.length == 0) {
            processCorpus(analyzer, "train.txt");
            return;
        }
        for (String str : strArr) {
            processCorpus(analyzer, str);
        }
    }
}
