package de.julielab.jcore.ae.opennlp.chunk.convert;

import de.julielab.java.utilities.FileUtilities;
import de.julielab.jcore.types.Sentence;
import de.julielab.jcore.types.Token;
import java.io.BufferedInputStream;
import java.io.BufferedWriter;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;

/* loaded from: input_file:de/julielab/jcore/ae/opennlp/chunk/convert/ToIOBConverter.class */
public class ToIOBConverter {
    private AnalysisEngine jtbd = AnalysisEngineFactory.createEngine("de.julielab.jcore.ae.jtbd.desc.jcore-jtbd-ae-biomedical-english", new Object[0]);
    private AnalysisEngine pennbioIEPosTagger = AnalysisEngineFactory.createEngine("de.julielab.jcore.ae.opennlp.postag.desc.jcore-opennlp-postag-ae-biomedical-english", new Object[0]);
    private JCas jCas = JCasFactory.createJCas(new String[]{"de.julielab.jcore.types.jcore-morpho-syntax-types"});

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:de/julielab/jcore/ae/opennlp/chunk/convert/ToIOBConverter$ChunkRecord.class */
    public class ChunkRecord {
        private String tokenPart;
        private String tokTag;
        private String iobState;
        private String consTag;

        public ChunkRecord(String str, String str2, String str3, String str4) {
            this.tokenPart = str;
            this.tokTag = str2;
            this.iobState = str3;
            this.consTag = str4;
        }

        public String getRecordLine() {
            return this.tokenPart + " " + this.tokTag + " " + this.iobState + this.consTag + "\n";
        }
    }

    public static void main(String[] strArr) throws Exception {
        File[] fileArr;
        if (strArr.length < 2) {
            System.err.println("Usage: " + ToIOBConverter.class.getCanonicalName() + " <from file or dir> <to file or dir> [true: create single output file]");
            System.exit(1);
        }
        File file = new File(strArr[0]);
        File file2 = new File(strArr[1]);
        boolean parseBoolean = strArr.length == 3 ? Boolean.parseBoolean(strArr[2]) : false;
        if (!parseBoolean && ((file.isFile() && file2.exists() && file2.isDirectory()) || (file.isDirectory() && file2.exists() && file2.isFile()))) {
            throw new IllegalArgumentException("Both paths must be directories or both must be files.");
        }
        System.out.println("Input: " + file.getAbsolutePath());
        System.out.println("Output: " + file2.getAbsolutePath());
        System.out.println("Output is written as a single file: " + parseBoolean);
        if (file.isDirectory()) {
            fileArr = file.listFiles((file3, str) -> {
                return str.endsWith(".xml") || str.endsWith(".xml.gz");
            });
            if (!file2.exists() && !parseBoolean) {
                System.out.println("Creating target directory " + file2.getAbsolutePath());
                file2.mkdirs();
            }
        } else {
            fileArr = new File[]{file};
        }
        System.out.println("Converting");
        ToIOBConverter toIOBConverter = new ToIOBConverter();
        if (parseBoolean) {
            BufferedWriter writerToFile = FileUtilities.getWriterToFile(file2);
            for (File file4 : fileArr) {
                try {
                    toIOBConverter.convert(FileUtilities.getInputStreamFromFile(file4), writerToFile);
                } catch (Throwable th) {
                    if (writerToFile != null) {
                        try {
                            writerToFile.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    }
                    throw th;
                }
            }
            if (writerToFile != null) {
                writerToFile.close();
            }
        } else {
            for (File file5 : fileArr) {
                File file6 = new File(file2.getAbsolutePath() + File.separator + file5.getName().replace(".xml", ".iob"));
                BufferedInputStream inputStreamFromFile = FileUtilities.getInputStreamFromFile(file5);
                try {
                    BufferedWriter writerToFile2 = FileUtilities.getWriterToFile(file6);
                    try {
                        toIOBConverter.convert(inputStreamFromFile, writerToFile2);
                        if (writerToFile2 != null) {
                            writerToFile2.close();
                        }
                        if (inputStreamFromFile != null) {
                            inputStreamFromFile.close();
                        }
                    } catch (Throwable th3) {
                        if (writerToFile2 != null) {
                            try {
                                writerToFile2.close();
                            } catch (Throwable th4) {
                                th3.addSuppressed(th4);
                            }
                        }
                        throw th3;
                    }
                } catch (Throwable th5) {
                    if (inputStreamFromFile != null) {
                        try {
                            inputStreamFromFile.close();
                        } catch (Throwable th6) {
                            th5.addSuppressed(th6);
                        }
                    }
                    throw th5;
                }
            }
        }
        System.out.println("Done.");
    }

    public void convert(File file, File file2, boolean z) throws Exception {
        BufferedInputStream inputStreamFromFile = FileUtilities.getInputStreamFromFile(file);
        try {
            BufferedWriter writerToFile = FileUtilities.getWriterToFile(file2);
            try {
                convert(inputStreamFromFile, writerToFile);
                if (writerToFile != null) {
                    writerToFile.close();
                }
                if (inputStreamFromFile != null) {
                    inputStreamFromFile.close();
                }
            } finally {
            }
        } catch (Throwable th) {
            if (inputStreamFromFile != null) {
                try {
                    inputStreamFromFile.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    /* JADX WARN: Can't fix incorrect switch cases order, some code will duplicate */
    /* JADX WARN: Code restructure failed: missing block: B:100:0x0358, code lost:
    
        if (r0.hasNext() == false) goto L130;
     */
    /* JADX WARN: Code restructure failed: missing block: B:101:0x035b, code lost:
    
        r0 = (de.julielab.jcore.ae.opennlp.chunk.convert.ToIOBConverter.ChunkRecord) r0.next();
     */
    /* JADX WARN: Code restructure failed: missing block: B:102:0x0371, code lost:
    
        if (r0.consTag.equals("S") == false) goto L134;
     */
    /* JADX WARN: Code restructure failed: missing block: B:105:0x0380, code lost:
    
        if (mapPosTagToPhraseType(r0.tokTag).isEmpty() != false) goto L131;
     */
    /* JADX WARN: Code restructure failed: missing block: B:107:0x0389, code lost:
    
        r0.consTag = "O";
        r0.iobState = "";
     */
    /* JADX WARN: Code restructure failed: missing block: B:111:0x0383, code lost:
    
        r22 = true;
     */
    /* JADX WARN: Code restructure failed: missing block: B:116:0x039c, code lost:
    
        if (r22 == false) goto L118;
     */
    /* JADX WARN: Code restructure failed: missing block: B:118:0x03a5, code lost:
    
        r0 = r0.iterator();
     */
    /* JADX WARN: Code restructure failed: missing block: B:120:0x03b5, code lost:
    
        if (r0.hasNext() == false) goto L137;
     */
    /* JADX WARN: Code restructure failed: missing block: B:121:0x03b8, code lost:
    
        r10.write(((de.julielab.jcore.ae.opennlp.chunk.convert.ToIOBConverter.ChunkRecord) r0.next()).getRecordLine());
     */
    /* JADX WARN: Code restructure failed: missing block: B:124:0x03d9, code lost:
    
        if (isPunctuation(r16.tokTag) == false) goto L111;
     */
    /* JADX WARN: Code restructure failed: missing block: B:125:0x03dc, code lost:
    
        r16.consTag = "O";
        r16.iobState = "";
     */
    /* JADX WARN: Code restructure failed: missing block: B:126:0x03ea, code lost:
    
        r10.write(r16.getRecordLine());
        r16 = null;
        r10.write("\n");
     */
    /* JADX WARN: Code restructure failed: missing block: B:129:0x039f, code lost:
    
        r16 = null;
     */
    /* JADX WARN: Code restructure failed: missing block: B:18:0x00a7, code lost:
    
        switch(r21) {
            case 0: goto L20;
            case 1: goto L21;
            case 2: goto L22;
            default: goto L78;
        };
     */
    /* JADX WARN: Code restructure failed: missing block: B:19:0x00c0, code lost:
    
        r0.clear();
     */
    /* JADX WARN: Code restructure failed: missing block: B:20:0x00ca, code lost:
    
        r13 = "";
        r12 = r0.getAttributeValue("", "cat");
        r0.add(r12);
     */
    /* JADX WARN: Code restructure failed: missing block: B:21:0x00e9, code lost:
    
        r0 = r0.getAttributeValue("", "cat");
        r0 = r0.getElementText();
        r25 = new java.util.ArrayList();
        r25.add(new java.lang.String[]{r0, r0});
     */
    /* JADX WARN: Code restructure failed: missing block: B:22:0x0123, code lost:
    
        if (r0.contains(" ") == false) goto L25;
     */
    /* JADX WARN: Code restructure failed: missing block: B:23:0x0126, code lost:
    
        r25 = tokenize(r0);
     */
    /* JADX WARN: Code restructure failed: missing block: B:25:0x0135, code lost:
    
        if (r0.contains("(") == false) goto L28;
     */
    /* JADX WARN: Code restructure failed: missing block: B:26:0x0138, code lost:
    
        r25 = balanceParenthesis(r25);
     */
    /* JADX WARN: Code restructure failed: missing block: B:27:0x0140, code lost:
    
        r0 = r25.iterator();
     */
    /* JADX WARN: Code restructure failed: missing block: B:29:0x0150, code lost:
    
        if (r0.hasNext() == false) goto L126;
     */
    /* JADX WARN: Code restructure failed: missing block: B:30:0x0153, code lost:
    
        r0 = r0.next();
        r0 = r0[0];
        r0 = r0[1];
     */
    /* JADX WARN: Code restructure failed: missing block: B:31:0x0170, code lost:
    
        if (r13.isEmpty() == false) goto L36;
     */
    /* JADX WARN: Code restructure failed: missing block: B:33:0x017a, code lost:
    
        if (r12.equals("O") != false) goto L36;
     */
    /* JADX WARN: Code restructure failed: missing block: B:34:0x017d, code lost:
    
        r13 = "B-";
     */
    /* JADX WARN: Code restructure failed: missing block: B:35:0x01bb, code lost:
    
        r29 = -1;
     */
    /* JADX WARN: Code restructure failed: missing block: B:36:0x01c7, code lost:
    
        switch(r0.hashCode()) {
            case 75644: goto L46;
            case 81410: goto L49;
            case 64305845: goto L52;
            default: goto L55;
        };
     */
    /* JADX WARN: Code restructure failed: missing block: B:38:0x01ef, code lost:
    
        if (r0.equals("LRB") == false) goto L55;
     */
    /* JADX WARN: Code restructure failed: missing block: B:39:0x01f2, code lost:
    
        r29 = false;
     */
    /* JADX WARN: Code restructure failed: missing block: B:41:0x01ff, code lost:
    
        if (r0.equals("RRB") == false) goto L55;
     */
    /* JADX WARN: Code restructure failed: missing block: B:42:0x0202, code lost:
    
        r29 = true;
     */
    /* JADX WARN: Code restructure failed: missing block: B:44:0x020f, code lost:
    
        if (r0.equals("COMMA") == false) goto L55;
     */
    /* JADX WARN: Code restructure failed: missing block: B:45:0x0212, code lost:
    
        r29 = 2;
     */
    /* JADX WARN: Code restructure failed: missing block: B:47:0x0217, code lost:
    
        switch(r29) {
            case 0: goto L57;
            case 1: goto L57;
            case 2: goto L58;
            default: goto L61;
        };
     */
    /* JADX WARN: Code restructure failed: missing block: B:48:0x0230, code lost:
    
        r12 = "O";
        r13 = "";
     */
    /* JADX WARN: Code restructure failed: missing block: B:50:0x0242, code lost:
    
        if (r13.equals("B-") == false) goto L61;
     */
    /* JADX WARN: Code restructure failed: missing block: B:51:0x0245, code lost:
    
        r12 = "O";
        r13 = "";
     */
    /* JADX WARN: Code restructure failed: missing block: B:53:0x0254, code lost:
    
        if (r14.equals(r12) == false) goto L66;
     */
    /* JADX WARN: Code restructure failed: missing block: B:55:0x025e, code lost:
    
        if (r12.equalsIgnoreCase("O") != false) goto L66;
     */
    /* JADX WARN: Code restructure failed: missing block: B:56:0x0261, code lost:
    
        r13 = "I-";
     */
    /* JADX WARN: Code restructure failed: missing block: B:58:0x0267, code lost:
    
        if (r16 == null) goto L73;
     */
    /* JADX WARN: Code restructure failed: missing block: B:60:0x0271, code lost:
    
        if (r13.equals("I-") != false) goto L73;
     */
    /* JADX WARN: Code restructure failed: missing block: B:62:0x027d, code lost:
    
        if (isPunctuation(r16.tokTag) == false) goto L73;
     */
    /* JADX WARN: Code restructure failed: missing block: B:63:0x0280, code lost:
    
        r16.consTag = "O";
        r16.iobState = "";
     */
    /* JADX WARN: Code restructure failed: missing block: B:64:0x028e, code lost:
    
        r0 = mapTokenToPennBioIETag(r0, mapPennTreebankToPennBioIETag(r0));
     */
    /* JADX WARN: Code restructure failed: missing block: B:65:0x02a3, code lost:
    
        if (null == r16) goto L128;
     */
    /* JADX WARN: Code restructure failed: missing block: B:66:0x02a6, code lost:
    
        r0.add(r16);
     */
    /* JADX WARN: Code restructure failed: missing block: B:68:0x02b0, code lost:
    
        r16 = new de.julielab.jcore.ae.opennlp.chunk.convert.ToIOBConverter.ChunkRecord(r8, r0, r0, r13, r12);
     */
    /* JADX WARN: Code restructure failed: missing block: B:71:0x018b, code lost:
    
        if (r13.equals("B-") == false) goto L39;
     */
    /* JADX WARN: Code restructure failed: missing block: B:72:0x018e, code lost:
    
        r13 = "I-";
     */
    /* JADX WARN: Code restructure failed: missing block: B:74:0x019c, code lost:
    
        if (r12.equals("O") == false) goto L44;
     */
    /* JADX WARN: Code restructure failed: missing block: B:75:0x019f, code lost:
    
        r0 = (java.lang.String) r0.peek();
     */
    /* JADX WARN: Code restructure failed: missing block: B:76:0x01b0, code lost:
    
        if (r0.equals("CC") != false) goto L44;
     */
    /* JADX WARN: Code restructure failed: missing block: B:77:0x01b3, code lost:
    
        r12 = r0;
        r13 = "B-";
     */
    /* JADX WARN: Code restructure failed: missing block: B:79:0x02c5, code lost:
    
        r14 = r12;
     */
    /* JADX WARN: Code restructure failed: missing block: B:92:0x031b, code lost:
    
        switch(r21) {
            case 0: goto L116;
            case 1: goto L117;
            default: goto L122;
        };
     */
    /* JADX WARN: Code restructure failed: missing block: B:94:0x0334, code lost:
    
        r12 = "O";
        r13 = "";
        r0.pop();
     */
    /* JADX WARN: Code restructure failed: missing block: B:98:0x0345, code lost:
    
        r22 = false;
        r0 = r0.iterator();
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public void convert(java.io.InputStream r9, java.io.BufferedWriter r10) throws java.lang.Exception {
        /*
            Method dump skipped, instructions count: 1024
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: de.julielab.jcore.ae.opennlp.chunk.convert.ToIOBConverter.convert(java.io.InputStream, java.io.BufferedWriter):void");
    }

    public void repairEnvironment(List<ChunkRecord> list, int i, ChunkRecord chunkRecord) {
        if (list.get(i - 1).consTag.equals(chunkRecord.consTag)) {
            chunkRecord.iobState = "I-";
        } else if (!chunkRecord.consTag.equals("O")) {
            chunkRecord.iobState = "B-";
        }
        if (i < list.size() - 1) {
            ChunkRecord chunkRecord2 = list.get(i + 1);
            if (!chunkRecord2.consTag.equals(chunkRecord.consTag) || chunkRecord2.consTag.equals("O")) {
                return;
            }
            chunkRecord2.iobState = "I-";
        }
    }

    private boolean isPunctuation(String str) {
        boolean z = -1;
        switch (str.hashCode()) {
            case 44:
                if (str.equals(",")) {
                    z = true;
                    break;
                }
                break;
            case 46:
                if (str.equals(".")) {
                    z = false;
                    break;
                }
                break;
            case 58:
                if (str.equals(":")) {
                    z = 2;
                    break;
                }
                break;
            case 1248:
                if (str.equals("''")) {
                    z = 4;
                    break;
                }
                break;
            case 3072:
                if (str.equals("``")) {
                    z = 3;
                    break;
                }
                break;
            case 43903454:
                if (str.equals("-LRB-")) {
                    z = 5;
                    break;
                }
                break;
            case 44082200:
                if (str.equals("-RRB-")) {
                    z = 6;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
                return true;
            default:
                return false;
        }
    }

    private List<String[]> balanceParenthesis(List<String[]> list) {
        if (list.get(0)[1].equals("-LRB-")) {
            return list;
        }
        ArrayList arrayList = new ArrayList();
        int i = 0;
        int i2 = 0;
        Matcher matcher = Pattern.compile("\\(").matcher("");
        Matcher matcher2 = Pattern.compile("\\)").matcher("");
        int i3 = 0;
        while (i3 < list.size()) {
            String[] strArr = list.get(i3);
            String str = strArr[0];
            matcher.reset(str);
            matcher2.reset(str);
            while (matcher.find()) {
                i++;
            }
            while (matcher2.find()) {
                i2++;
            }
            while (i > i2 && i3 < list.size() - 1) {
                i3++;
                String str2 = list.get(i3)[0];
                matcher.reset(str2);
                matcher2.reset(str2);
                while (matcher.find()) {
                    i++;
                }
                while (matcher2.find()) {
                    i2++;
                }
                str = str + str2;
            }
            arrayList.add(new String[]{str, strArr[1]});
            i3++;
        }
        return arrayList;
    }

    public String mapPosTagToPhraseType(String str) {
        boolean z = -1;
        switch (str.hashCode()) {
            case 2341:
                if (str.equals("IN")) {
                    z = 17;
                    break;
                }
                break;
            case 2455:
                if (str.equals("MD")) {
                    z = false;
                    break;
                }
                break;
            case 2496:
                if (str.equals("NN")) {
                    z = 9;
                    break;
                }
                break;
            case 2732:
                if (str.equals("VB")) {
                    z = true;
                    break;
                }
                break;
            case 2777:
                if (str.equals("WP")) {
                    z = 15;
                    break;
                }
                break;
            case 77456:
                if (str.equals("NNP")) {
                    z = 11;
                    break;
                }
                break;
            case 77459:
                if (str.equals("NNS")) {
                    z = 10;
                    break;
                }
                break;
            case 79502:
                if (str.equals("PRP")) {
                    z = 14;
                    break;
                }
                break;
            case 84759:
                if (str.equals("VBC")) {
                    z = 2;
                    break;
                }
                break;
            case 84760:
                if (str.equals("VBD")) {
                    z = 3;
                    break;
                }
                break;
            case 84762:
                if (str.equals("VBF")) {
                    z = 4;
                    break;
                }
                break;
            case 84763:
                if (str.equals("VBG")) {
                    z = 5;
                    break;
                }
                break;
            case 84770:
                if (str.equals("VBN")) {
                    z = 6;
                    break;
                }
                break;
            case 84772:
                if (str.equals("VBP")) {
                    z = 7;
                    break;
                }
                break;
            case 84782:
                if (str.equals("VBZ")) {
                    z = 8;
                    break;
                }
                break;
            case 86123:
                if (str.equals("WP$")) {
                    z = 16;
                    break;
                }
                break;
            case 2401219:
                if (str.equals("NNPS")) {
                    z = 12;
                    break;
                }
                break;
            case 2464598:
                if (str.equals("PRP$")) {
                    z = 13;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
                return "VP";
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
                return "NP";
            case true:
                return "PP";
            default:
                return "";
        }
    }

    public String mapPennTreebankToPennBioIETag(String str) {
        boolean z = -1;
        switch (str.hashCode()) {
            case -1938396735:
                if (str.equals("PERIOD")) {
                    z = 4;
                    break;
                }
                break;
            case 75631:
                if (str.equals("LQT")) {
                    z = 2;
                    break;
                }
                break;
            case 75644:
                if (str.equals("LRB")) {
                    z = 3;
                    break;
                }
                break;
            case 81397:
                if (str.equals("RQT")) {
                    z = 6;
                    break;
                }
                break;
            case 81410:
                if (str.equals("RRB")) {
                    z = 7;
                    break;
                }
                break;
            case 86167:
                if (str.equals("WPP")) {
                    z = 8;
                    break;
                }
                break;
            case 2464642:
                if (str.equals("PRPP")) {
                    z = 5;
                    break;
                }
                break;
            case 64304959:
                if (str.equals("COLON")) {
                    z = false;
                    break;
                }
                break;
            case 64305845:
                if (str.equals("COMMA")) {
                    z = true;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                return ":";
            case true:
                return ",";
            case true:
                return "``";
            case true:
                return "-LRB-";
            case true:
                return ".";
            case true:
                return "PRP$";
            case true:
                return "''";
            case true:
                return "-RRB-";
            case true:
                return "WP$";
            default:
                return str;
        }
    }

    public String mapTokenToPennBioIETag(String str, String str2) {
        boolean z = -1;
        switch (str.hashCode()) {
            case 45:
                if (str.equals("-")) {
                    z = false;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                return "HYPH";
            default:
                return str2;
        }
    }

    private List<String[]> tokenize(String str) throws AnalysisEngineProcessException {
        this.jCas.reset();
        this.jCas.setDocumentText(str);
        new Sentence(this.jCas, 0, str.length()).addToIndexes();
        this.jtbd.process(this.jCas.getCas());
        this.pennbioIEPosTagger.process(this.jCas.getCas());
        return (List) JCasUtil.select(this.jCas, Token.class).stream().map(token -> {
            return new String[]{token.getCoveredText(), token.getPosTag(0).getValue()};
        }).collect(Collectors.toList());
    }
}
