package de.julielab.jcore.reader.file.main;

import de.julielab.jcore.types.Sentence;
import de.julielab.jcore.types.Token;
import de.julielab.jcore.types.pubmed.Header;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import org.apache.commons.io.FileUtils;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;

/* loaded from: input_file:de/julielab/jcore/reader/file/main/FileReader.class */
public class FileReader extends CollectionReader_ImplBase {
    public static final String DIRECTORY_INPUT = "InputDirectory";
    public static final String FILENAME_AS_DOC_ID = "UseFilenameAsDocId";
    public static final String PUBLICATION_DATES_FILE = "PublicationDatesFile";
    public static final String ALLOWED_FILE_EXTENSIONS = "AllowedFileExtensions";
    public static final String SENTENCE_PER_LINE = "SentencePerLine";
    public static final String TOKEN_BY_TOKEN = "TokenByToken";
    public static final String FILE_NAME_SPLIT_UNDERSCORE = "FileNameSplitUnderscore";
    public static final String DIRECTORY_SUBDIRS = "ReadSubDirs";
    public static final String DIRECTORY_ORIG_FILES = "OriginalFolder";
    public static final String ORIG_FILES_EXT = "OriginalFileExt";
    private ArrayList<File> files;
    private int fileIndex;

    @ConfigurationParameter(name = DIRECTORY_INPUT, mandatory = true)
    private File inputDirectory;

    @ConfigurationParameter(name = FILENAME_AS_DOC_ID, mandatory = false)
    private boolean useFilenameAsDocId;

    @ConfigurationParameter(name = PUBLICATION_DATES_FILE, mandatory = false)
    private File publicationDatesFile;

    @ConfigurationParameter(name = SENTENCE_PER_LINE, mandatory = false)
    private boolean sentencePerLine;

    @ConfigurationParameter(name = TOKEN_BY_TOKEN, mandatory = false)
    private boolean tokenByToken;

    @ConfigurationParameter(name = FILE_NAME_SPLIT_UNDERSCORE, mandatory = false)
    private boolean fileNameSplitUnderscore;

    @ConfigurationParameter(name = ALLOWED_FILE_EXTENSIONS, mandatory = false)
    private String[] allowedExtensionsArray;

    @ConfigurationParameter(name = DIRECTORY_SUBDIRS, mandatory = false)
    private boolean useSubDirs;

    @ConfigurationParameter(name = DIRECTORY_ORIG_FILES, mandatory = false)
    private File origFolder;

    @ConfigurationParameter(name = ORIG_FILES_EXT, mandatory = false)
    private String origFileExt;

    public void initialize() throws ResourceInitializationException {
        this.inputDirectory = new File(((String) getConfigParameterValue(DIRECTORY_INPUT)).trim());
        if (getConfigParameterValue(PUBLICATION_DATES_FILE) != null) {
            this.publicationDatesFile = new File(((String) getConfigParameterValue(PUBLICATION_DATES_FILE)).trim());
        }
        Boolean bool = (Boolean) getConfigParameterValue(SENTENCE_PER_LINE);
        if (bool == null) {
            this.sentencePerLine = false;
        } else {
            this.sentencePerLine = bool.booleanValue();
        }
        Boolean bool2 = (Boolean) getConfigParameterValue(TOKEN_BY_TOKEN);
        if (null == bool2) {
            this.tokenByToken = false;
        } else {
            this.tokenByToken = bool2.booleanValue();
        }
        Boolean bool3 = (Boolean) getConfigParameterValue(FILE_NAME_SPLIT_UNDERSCORE);
        if (null == bool3) {
            this.fileNameSplitUnderscore = false;
        } else {
            this.fileNameSplitUnderscore = bool3.booleanValue();
        }
        Boolean bool4 = (Boolean) getConfigParameterValue(FILENAME_AS_DOC_ID);
        if (null == bool4) {
            this.useFilenameAsDocId = false;
        } else {
            this.useFilenameAsDocId = bool4.booleanValue();
        }
        this.allowedExtensionsArray = (String[]) getConfigParameterValue(ALLOWED_FILE_EXTENSIONS);
        HashSet hashSet = new HashSet();
        if (null != this.allowedExtensionsArray) {
            for (int i = 0; i < this.allowedExtensionsArray.length; i++) {
                hashSet.add(this.allowedExtensionsArray[i]);
            }
        }
        Boolean bool5 = (Boolean) getConfigParameterValue(DIRECTORY_SUBDIRS);
        if (null == bool5) {
            this.useSubDirs = false;
        } else {
            this.useSubDirs = bool5.booleanValue();
        }
        String str = (String) getConfigParameterValue(DIRECTORY_ORIG_FILES);
        if (null == str) {
            this.origFolder = null;
        } else {
            this.origFolder = new File(str.trim());
        }
        String str2 = (String) getConfigParameterValue(ORIG_FILES_EXT);
        if (null == str2) {
            this.origFileExt = "txt";
        } else {
            this.origFileExt = str2;
            if (str2.startsWith(".")) {
                this.origFileExt = str2.substring(1);
            }
        }
        if (!this.inputDirectory.exists()) {
            throw new ResourceInitializationException("annotator_resource_not_found", new Object[]{this.inputDirectory.getAbsolutePath()});
        }
        this.fileIndex = 0;
        this.files = new ArrayList<>();
        try {
            createFileListByType(this.inputDirectory, hashSet);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public boolean hasNext() {
        return this.fileIndex < this.files.size();
    }

    public void getNext(CAS cas) throws IOException, CollectionException {
        try {
            JCas jCas = cas.getJCas();
            ArrayList<File> arrayList = this.files;
            int i = this.fileIndex;
            this.fileIndex = i + 1;
            File file = arrayList.get(i);
            String readFileToString = FileUtils.readFileToString(file, "UTF-8");
            Pattern compile = Pattern.compile("[^\\s]+", 258);
            String readFileToString2 = this.origFolder != null ? FileUtils.readFileToString(new File(this.origFolder, getFileName(file) + "." + this.origFileExt), "UTF-8") : null;
            if (this.sentencePerLine) {
                BufferedReader bufferedReader = new BufferedReader(new StringReader(readFileToString));
                ArrayList arrayList2 = new ArrayList();
                ArrayList arrayList3 = new ArrayList();
                ArrayList arrayList4 = new ArrayList();
                int i2 = 0;
                while (true) {
                    Integer num = i2;
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        break;
                    }
                    if (!Pattern.matches("\\s*", readLine)) {
                        arrayList2.add(readLine);
                        arrayList3.add(num);
                        arrayList4.add(Integer.valueOf(num.intValue() + readLine.length()));
                    }
                    i2 = Integer.valueOf(num.intValue() + readLine.length() + 1);
                }
                bufferedReader.close();
                int i3 = 0;
                for (Integer num2 = 0; num2.intValue() < arrayList2.size(); num2 = Integer.valueOf(num2.intValue() + 1)) {
                    boolean z = true;
                    Sentence sentence = new Sentence(jCas);
                    if (readFileToString2 != null) {
                        Matcher matcher = Pattern.compile((String) Stream.of((Object[]) ((String) arrayList2.get(num2.intValue())).split("\\s+")).map(str -> {
                            return Pattern.quote(str);
                        }).reduce((str2, str3) -> {
                            return str2 + "\\s*" + str3;
                        }).get(), 256).matcher(readFileToString2);
                        if (matcher.find(i3)) {
                            int start = matcher.start();
                            int end = matcher.end();
                            i3 = matcher.end() + 1;
                            sentence.setBegin(start);
                            sentence.setEnd(end);
                        } else {
                            z = false;
                        }
                    } else {
                        sentence.setBegin(((Integer) arrayList3.get(num2.intValue())).intValue());
                        sentence.setEnd(((Integer) arrayList4.get(num2.intValue())).intValue());
                    }
                    sentence.setComponentId(getClass().getName() + " : Sentence per Line Mode");
                    if (z) {
                        sentence.addToIndexes();
                    }
                }
            }
            if (this.tokenByToken) {
                ArrayList arrayList5 = new ArrayList();
                ArrayList arrayList6 = new ArrayList();
                ArrayList arrayList7 = new ArrayList();
                Integer num3 = 0;
                Integer num4 = 0;
                Matcher matcher2 = compile.matcher(readFileToString);
                while (matcher2.find()) {
                    String group = matcher2.group();
                    int start2 = matcher2.start();
                    int end2 = matcher2.end();
                    arrayList5.add(group);
                    arrayList6.add(Integer.valueOf(start2));
                    arrayList7.add(Integer.valueOf(end2));
                    num4 = Integer.valueOf(num4.intValue() + 1);
                }
                Integer.valueOf(num3.intValue() + num4.intValue());
                int i4 = 0;
                for (Integer num5 = 0; num5.intValue() < arrayList5.size(); num5 = Integer.valueOf(num5.intValue() + 1)) {
                    Token token = new Token(jCas);
                    if (readFileToString2 != null) {
                        String str4 = (String) arrayList5.get(num5.intValue());
                        int indexOf = readFileToString2.indexOf(str4, i4);
                        int length = indexOf + str4.length();
                        i4 = length;
                        token.setBegin(indexOf);
                        token.setEnd(length);
                    } else {
                        token.setBegin(((Integer) arrayList6.get(num5.intValue())).intValue());
                        token.setEnd(((Integer) arrayList7.get(num5.intValue())).intValue());
                    }
                    token.setComponentId(getClass().getName() + " : Tokenized Mode");
                    if (1 != 0) {
                        token.addToIndexes();
                    }
                }
            }
            if (readFileToString2 != null) {
                jCas.setDocumentText(readFileToString2);
            } else {
                jCas.setDocumentText(readFileToString);
            }
            if (this.useFilenameAsDocId) {
                String fileName = getFileName(file);
                Header header = new Header(jCas);
                header.setDocId(fileName);
                addDateForID(header, jCas, fileName);
                header.addToIndexes();
            }
        } catch (CASException e) {
            throw new CollectionException(e);
        }
    }

    /* JADX WARN: Code restructure failed: missing block: B:18:0x0065, code lost:
    
        r0 = new de.julielab.jcore.types.Date(r8);
        r14 = 0;
        r15 = 0;
     */
    /* JADX WARN: Code restructure failed: missing block: B:20:0x0075, code lost:
    
        r14 = java.lang.Integer.parseInt(r0[1].substring(0, 4));
        r15 = java.lang.Integer.parseInt(r0[1].substring(5));
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    private void addDateForID(de.julielab.jcore.types.pubmed.Header r7, org.apache.uima.jcas.JCas r8, java.lang.String r9) {
        /*
            r6 = this;
            r0 = r6
            java.io.File r0 = r0.publicationDatesFile
            if (r0 == 0) goto Lcc
            r0 = r6
            java.io.File r0 = r0.publicationDatesFile
            boolean r0 = r0.exists()
            if (r0 == 0) goto Lcc
            r0 = r6
            java.io.File r0 = r0.publicationDatesFile
            boolean r0 = r0.isFile()
            if (r0 == 0) goto Lcc
            java.io.BufferedReader r0 = new java.io.BufferedReader     // Catch: java.io.FileNotFoundException -> Lbb java.io.IOException -> Lc5
            r1 = r0
            java.io.FileReader r2 = new java.io.FileReader     // Catch: java.io.FileNotFoundException -> Lbb java.io.IOException -> Lc5
            r3 = r2
            r4 = r6
            java.io.File r4 = r4.publicationDatesFile     // Catch: java.io.FileNotFoundException -> Lbb java.io.IOException -> Lc5
            r3.<init>(r4)     // Catch: java.io.FileNotFoundException -> Lbb java.io.IOException -> Lc5
            r1.<init>(r2)     // Catch: java.io.FileNotFoundException -> Lbb java.io.IOException -> Lc5
            r10 = r0
            java.lang.String r0 = ""
            r11 = r0
        L33:
            r0 = r10
            java.lang.String r0 = r0.readLine()     // Catch: java.io.FileNotFoundException -> Lbb java.io.IOException -> Lc5
            r1 = r0
            r11 = r1
            if (r0 == 0) goto Lb8
            r0 = r11
            java.lang.String r1 = "\\s+"
            java.lang.String[] r0 = r0.split(r1)     // Catch: java.io.FileNotFoundException -> Lbb java.io.IOException -> Lc5
            r12 = r0
            r0 = r12
            int r0 = r0.length     // Catch: java.io.FileNotFoundException -> Lbb java.io.IOException -> Lc5
            r1 = 2
            if (r0 != r1) goto Lb5
            r0 = r12
            r1 = 0
            r0 = r0[r1]     // Catch: java.io.FileNotFoundException -> Lbb java.io.IOException -> Lc5
            r1 = r9
            boolean r0 = r0.equals(r1)     // Catch: java.io.FileNotFoundException -> Lbb java.io.IOException -> Lc5
            if (r0 == 0) goto Lb5
            r0 = r12
            r1 = 1
            r0 = r0[r1]     // Catch: java.io.FileNotFoundException -> Lbb java.io.IOException -> Lc5
            int r0 = r0.length()     // Catch: java.io.FileNotFoundException -> Lbb java.io.IOException -> Lc5
            r1 = 7
            if (r0 != r1) goto Lb5
            de.julielab.jcore.types.Date r0 = new de.julielab.jcore.types.Date     // Catch: java.io.FileNotFoundException -> Lbb java.io.IOException -> Lc5
            r1 = r0
            r2 = r8
            r1.<init>(r2)     // Catch: java.io.FileNotFoundException -> Lbb java.io.IOException -> Lc5
            r13 = r0
            r0 = 0
            r14 = r0
            r0 = 0
            r15 = r0
            r0 = r12
            r1 = 1
            r0 = r0[r1]     // Catch: java.lang.NumberFormatException -> L93 java.io.FileNotFoundException -> Lbb java.io.IOException -> Lc5
            r1 = 0
            r2 = 4
            java.lang.String r0 = r0.substring(r1, r2)     // Catch: java.lang.NumberFormatException -> L93 java.io.FileNotFoundException -> Lbb java.io.IOException -> Lc5
            int r0 = java.lang.Integer.parseInt(r0)     // Catch: java.lang.NumberFormatException -> L93 java.io.FileNotFoundException -> Lbb java.io.IOException -> Lc5
            r14 = r0
            r0 = r12
            r1 = 1
            r0 = r0[r1]     // Catch: java.lang.NumberFormatException -> L93 java.io.FileNotFoundException -> Lbb java.io.IOException -> Lc5
            r1 = 5
            java.lang.String r0 = r0.substring(r1)     // Catch: java.lang.NumberFormatException -> L93 java.io.FileNotFoundException -> Lbb java.io.IOException -> Lc5
            int r0 = java.lang.Integer.parseInt(r0)     // Catch: java.lang.NumberFormatException -> L93 java.io.FileNotFoundException -> Lbb java.io.IOException -> Lc5
            r15 = r0
            goto L95
        L93:
            r16 = move-exception
        L95:
            r0 = r15
            if (r0 == 0) goto La1
            r0 = r13
            r1 = r15
            r0.setMonth(r1)     // Catch: java.io.FileNotFoundException -> Lbb java.io.IOException -> Lc5
        La1:
            r0 = r14
            if (r0 == 0) goto Lb8
            r0 = r13
            r1 = r14
            r0.setYear(r1)     // Catch: java.io.FileNotFoundException -> Lbb java.io.IOException -> Lc5
            r0 = r13
            r0.addToIndexes()     // Catch: java.io.FileNotFoundException -> Lbb java.io.IOException -> Lc5
            goto Lb8
        Lb5:
            goto L33
        Lb8:
            goto Lcc
        Lbb:
            r10 = move-exception
            r0 = r10
            r0.printStackTrace()
            goto Lcc
        Lc5:
            r10 = move-exception
            r0 = r10
            r0.printStackTrace()
        Lcc:
            return
        */
        throw new UnsupportedOperationException("Method not decompiled: de.julielab.jcore.reader.file.main.FileReader.addDateForID(de.julielab.jcore.types.pubmed.Header, org.apache.uima.jcas.JCas, java.lang.String):void");
    }

    public void close() throws IOException {
    }

    public Progress[] getProgress() {
        return new Progress[]{new ProgressImpl(this.fileIndex, this.files.size(), "entities")};
    }

    private String[] createFileListByType(File file, Set<String> set) throws IOException {
        String[] list = new File(file.getPath()).list();
        for (int i = 0; i < list.length; i++) {
            File file2 = new File(file.getAbsolutePath() + "/" + list[i]);
            if (this.useSubDirs || !file2.isDirectory()) {
                String substring = list[i].substring(list[i].lastIndexOf(46) + 1);
                if (set.isEmpty() || set.contains(substring)) {
                    this.files.add(file2);
                }
                if (this.useSubDirs && file2.isDirectory()) {
                    createFileListByType(file2, set);
                }
            }
        }
        return list;
    }

    private String getFileName(File file) {
        int lastIndexOf;
        String name = file.getName();
        int lastIndexOf2 = name.lastIndexOf(46);
        if (lastIndexOf2 > 0) {
            name = name.substring(0, lastIndexOf2);
        }
        if (this.fileNameSplitUnderscore && (lastIndexOf = name.lastIndexOf(95)) > 0) {
            name = name.substring(0, lastIndexOf);
        }
        return name;
    }
}
