package de.julielab.jules.ae.genemapping.resources;

import de.julielab.jules.ae.genemapping.CandidateFilter;
import de.julielab.jules.ae.genemapping.utils.norm.TermNormalizer;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.zip.GZIPInputStream;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/julielab/jules/ae/genemapping/resources/SynonymIndexGenerator.class */
public class SynonymIndexGenerator {
    private static final Logger log = LoggerFactory.getLogger(SynonymIndexGenerator.class);
    private static final Boolean OMIT_FILTERED = true;
    private static final int MAX_SYNLENGTH = 8;
    private static final int MIN_SYNLENGTH = 2;
    private File dictFile;
    Map<String, String> id2tax;
    Directory indexDirectory;
    private static final boolean debug = false;

    public static void main(String[] strArr) {
        long currentTimeMillis = System.currentTimeMillis();
        if (strArr.length != 3) {
            System.err.println("Usage: SynonymIndexGenerator <resourcesDirectory> <gene_info file name> <geneSynonymIndicesDirectory>");
            System.exit(1);
        }
        String str = strArr[debug];
        if (!new File(str).isDirectory()) {
            System.err.println("Could not find resources directory");
            System.exit(1);
        }
        if (!str.endsWith(File.separator)) {
            str = str + File.separator;
        }
        File file = new File(str + strArr[1]);
        if (!file.exists()) {
            System.err.println("Gene info file could not be found at " + file.getAbsolutePath());
            System.exit(1);
        }
        String str2 = strArr[MIN_SYNLENGTH];
        if (!str2.endsWith("/")) {
            str2 = str2 + "/";
        }
        File file2 = new File(str2 + "geneSynonymIndex");
        new File(str2 + "proteinSynonymIndex");
        checkFile(new File(str + "gene.dict.up"));
        File file3 = new File(str + "gene.dict.eg");
        checkFile(file3);
        checkFile(new File(str + "up2eg2tax.map"));
        try {
            SynonymIndexGenerator synonymIndexGenerator = new SynonymIndexGenerator(file3, file2);
            synonymIndexGenerator.readEgTaxMap(file);
            synonymIndexGenerator.createIndex();
        } catch (IOException e) {
            e.printStackTrace();
        }
        System.out.println("Index created successfully! (" + ((System.currentTimeMillis() - currentTimeMillis) / 1000) + " sec)");
    }

    private static void checkFile(File file) {
        if (!file.isFile()) {
            throw new IllegalArgumentException("File \"" + file.getAbsolutePath() + "\" could not be found.");
        }
    }

    public SynonymIndexGenerator(File file, File file2) throws FileNotFoundException, IOException {
        System.out.println("Building synonym index from dictionary " + file.getAbsolutePath());
        this.dictFile = file;
        this.indexDirectory = createIndexDirectory(file2);
    }

    public void createIndex() throws IOException {
        CandidateFilter candidateFilter = new CandidateFilter();
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer());
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        IndexWriter indexWriter = new IndexWriter(this.indexDirectory, indexWriterConfig);
        TermNormalizer termNormalizer = new TermNormalizer();
        int i = debug;
        BufferedReader bufferedReader = new BufferedReader(new FileReader(this.dictFile));
        System.out.println("Generating index now. This may take quite a while (up to several hours when input files are large) ...");
        while (true) {
            try {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    indexWriter.close();
                    bufferedReader.close();
                    return;
                }
                String[] split = readLine.split("\t");
                if (split.length != 3) {
                    System.err.println("ERR: normalized dictionary not in expected format. \ncritical line: " + readLine);
                } else {
                    String str = split[debug];
                    String normalize = termNormalizer.normalize(str);
                    Stream stream = termNormalizer.generateVariants(str).stream();
                    Objects.requireNonNull(termNormalizer);
                    List list = (List) stream.map(termNormalizer::normalize).collect(Collectors.toList());
                    String str2 = split[1];
                    Integer valueOf = Integer.valueOf(Integer.parseInt(split[MIN_SYNLENGTH]));
                    boolean z = debug;
                    int length = normalize.split(" ").length;
                    if (length > MAX_SYNLENGTH || (length < MIN_SYNLENGTH && normalize.length() < MIN_SYNLENGTH)) {
                        log.debug("Removed due to illegal length (too short or too long): {}", normalize);
                    } else {
                        if (candidateFilter.patternDomainFamilies.matcher(normalize).matches()) {
                            log.debug("DOMAIN/FAMILY REMOVED: |{}|", normalize);
                            z = true;
                        }
                        if (candidateFilter.patternUnspecifieds.matcher(normalize).matches()) {
                            log.debug("UNSPECIFIED REMOVED: |{}|", normalize);
                            z = true;
                        }
                        if (!z || !OMIT_FILTERED.booleanValue()) {
                            showDebug(str2 + "\t" + normalize);
                            String str3 = this.id2tax.get(str2) != null ? this.id2tax.get(str2) : "";
                            ArrayList arrayList = new ArrayList();
                            StringField stringField = new StringField("entry_id", str2, Field.Store.YES);
                            TextField textField = new TextField("original_name", str.toLowerCase(), Field.Store.YES);
                            TextField textField2 = new TextField("indexed_syn", normalize, Field.Store.YES);
                            StringField stringField2 = new StringField("tax_id", str3, Field.Store.YES);
                            IntPoint intPoint = new IntPoint("priority", new int[]{valueOf.intValue()});
                            StoredField storedField = new StoredField("priority", valueOf.intValue());
                            if (!OMIT_FILTERED.booleanValue()) {
                                int[] iArr = new int[1];
                                iArr[debug] = z ? 1 : debug;
                                IntPoint intPoint2 = new IntPoint("filtered", iArr);
                                StoredField storedField2 = new StoredField("filtered", z ? 1 : debug);
                                arrayList.add(intPoint2);
                                arrayList.add(storedField2);
                            }
                            arrayList.add(stringField);
                            arrayList.add(textField);
                            arrayList.add(textField2);
                            arrayList.add(stringField2);
                            arrayList.add(intPoint);
                            arrayList.add(storedField);
                            for (int i2 = debug; i2 < list.size(); i2++) {
                                arrayList.add(new TextField("variant_name", (String) list.get(i2), Field.Store.YES));
                            }
                            for (int i3 = debug; i3 < list.size(); i3++) {
                                arrayList.add(new TextField("stemmed_normalized_name", (String) list.get(i3), Field.Store.YES));
                            }
                            Document document = new Document();
                            Iterator it = arrayList.iterator();
                            while (it.hasNext()) {
                                document.add((Field) it.next());
                            }
                            indexWriter.addDocument(document);
                            i++;
                            if (i % 10000 == 0) {
                                System.err.println("# entries processed: " + i);
                            }
                        }
                    }
                }
            } catch (IOException e) {
                e.printStackTrace();
                return;
            }
        }
    }

    private FSDirectory createIndexDirectory(File file) {
        FSDirectory fSDirectory = debug;
        try {
            fSDirectory = FSDirectory.open(file.toPath());
        } catch (IOException e) {
            e.printStackTrace();
        }
        return fSDirectory;
    }

    private void showDebug(String str) {
    }

    private void readUpTaxMap(File file) throws IOException {
        System.out.println("Reading up2eg2tax.map ...");
        this.id2tax = new HashMap();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return;
            }
            String[] split = readLine.split("\t");
            if (split.length != 3) {
                System.err.println("ERR: up2eg2tax.map not in expected format. \ncritical line: " + readLine);
                System.exit(-1);
            }
            this.id2tax.put(split[debug].trim(), split[MIN_SYNLENGTH].trim());
        }
    }

    private void readEgTaxMap(File file) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(file))));
        try {
            this.id2tax = (Map) bufferedReader.lines().collect(Collectors.toMap(str -> {
                return str.split("\\t", 3)[1];
            }, str2 -> {
                return str2.split("\\t", 3)[debug];
            }));
            bufferedReader.close();
        } catch (Throwable th) {
            try {
                bufferedReader.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
    }
}
