package de.julielab.genemapper.resources;

import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import de.julielab.geneexpbase.CandidateFilter;
import de.julielab.geneexpbase.TermNormalizer;
import de.julielab.geneexpbase.genemodel.GeneMention;
import de.julielab.java.utilities.FileUtilities;
import de.julielab.java.utilities.ProgressBar;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.lang.ref.SoftReference;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.zip.GZIPInputStream;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/julielab/genemapper/resources/GeneRecordIndexGenerator.class */
public class GeneRecordIndexGenerator {
    private static final Logger log = LoggerFactory.getLogger(GeneRecordIndexGenerator.class);
    private static final Boolean OMIT_FILTERED = true;
    private final Directory indexDirectory;
    private final File dictFile;
    private final Map<String, File> extendedInformationFields;
    Map<String, String> id2tax;
    Set<SoftReference<Document>> documents = new HashSet();

    public GeneRecordIndexGenerator(File file, Map<String, File> map, File file2) throws FileNotFoundException, IOException {
        this.extendedInformationFields = map;
        log.info("Building gene records index from dictionary {}", file);
        this.dictFile = file;
        this.indexDirectory = FSDirectory.open(file2.toPath());
    }

    public static void main(String[] strArr) {
        long currentTimeMillis = System.currentTimeMillis();
        if (strArr.length != 3) {
            System.err.println("Usage: GeneRecordIndexGenerator <dictionary file> <resourcesDirectory> <geneRecordIndicesDirectory>");
            System.exit(1);
        }
        String str = strArr[0];
        String str2 = strArr[1];
        if (!new File(str2).isDirectory()) {
            System.err.println("Could not find resources directory");
            System.exit(1);
        }
        if (!str2.endsWith(File.separator)) {
            str2 = str2 + File.separator;
        }
        String str3 = strArr[2];
        File file = str.equals("gene.dict.uniqueprioritynames.sortedbyid.eg") ? new File(str3, "geneNamesRecordsIndexOriginalNames") : new File(str3, "geneRecordsIndex");
        File file2 = str.equals("gene.dict.uniqueprioritynames.sortedbyid.eg") ? new File(str3, "ProteinRecordsIndexOriginalNames") : new File(str3, "proteinRecordsIndex");
        if (file.exists()) {
            FileUtils.deleteQuietly(file);
        }
        if (file2.exists()) {
            FileUtils.deleteQuietly(file2);
        }
        checkFile(new File(str2 + "gene.dict.up"));
        File file3 = new File(str2 + str);
        checkFile(file3);
        File file4 = new File(str2 + "eg2chromosome");
        File file5 = new File(str2 + "eg2description");
        File file6 = new File(str2 + "eg2generif");
        File file7 = new File(str2 + "eg2go");
        File file8 = new File(str2 + "go_all");
        File file9 = new File(str2 + "eg2interaction");
        File file10 = new File(str2 + "eg2maplocation");
        File file11 = new File(str2 + "eg2summary");
        File file12 = new File(str2 + "eg2ecnumber-genexmldownloader.gz");
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        linkedHashMap.put("chromosome", file4);
        linkedHashMap.put("description", file5);
        linkedHashMap.put("generif", file6);
        linkedHashMap.put("go", file7);
        linkedHashMap.put("godesc", file8);
        linkedHashMap.put("interaction", file9);
        linkedHashMap.put("maplocation", file10);
        linkedHashMap.put("summary", file11);
        linkedHashMap.put("ecnumber", file12);
        checkFile(new File(str2 + "up2eg2tax.map"));
        File file13 = new File(str2 + "gene_info_organism_filtered.gz");
        try {
            GeneRecordIndexGenerator geneRecordIndexGenerator = new GeneRecordIndexGenerator(file3, linkedHashMap, file);
            geneRecordIndexGenerator.readEgTaxMap(file13);
            geneRecordIndexGenerator.createIndex();
        } catch (IOException e) {
            e.printStackTrace();
        }
        System.out.println("Index created successfully! (" + ((System.currentTimeMillis() - currentTimeMillis) / 1000) + " sec)");
    }

    private static void checkFile(File file) {
        if (!file.isFile()) {
            throw new IllegalArgumentException("File \"" + file.getAbsolutePath() + "\" could not be found.");
        }
    }

    public void createIndex() throws IOException {
        CandidateFilter candidateFilter = new CandidateFilter();
        TermNormalizer termNormalizer = new TermNormalizer();
        Map<String, Multimap<String, String>> readExtendedInformationFiles = readExtendedInformationFiles();
        FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
        fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
        FieldType fieldType2 = new FieldType(TextField.TYPE_STORED);
        fieldType2.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
        FieldType fieldType3 = new FieldType(StringField.TYPE_STORED);
        fieldType3.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer());
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        ExecutorService newFixedThreadPool = Executors.newFixedThreadPool(20);
        AtomicInteger atomicInteger = new AtomicInteger();
        try {
            IndexWriter indexWriter = new IndexWriter(this.indexDirectory, indexWriterConfig);
            try {
                log.info("Counting number of lines of the dictionary file {}", this.dictFile);
                BufferedReader bufferedReader = new BufferedReader(new FileReader(this.dictFile));
                try {
                    long count = bufferedReader.lines().count();
                    bufferedReader.close();
                    log.info("Generating index now for {} synonyms.", Long.valueOf(count));
                    ProgressBar progressBar = new ProgressBar(count, 80);
                    try {
                        bufferedReader = new BufferedReader(new FileReader(this.dictFile));
                        try {
                            HashMap hashMap = new HashMap();
                            String str = null;
                            boolean z = false;
                            int i = 1;
                            while (true) {
                                String readLine = bufferedReader.readLine();
                                if (readLine == null) {
                                    break;
                                }
                                String[] split = readLine.split("\t");
                                if (split.length != 3) {
                                    System.err.println("ERR: normalized dictionary not in expected format. \ncritical line: " + readLine);
                                } else {
                                    String str2 = split[0];
                                    String str3 = split[1];
                                    Integer valueOf = Integer.valueOf(Integer.parseInt(split[2]));
                                    boolean contains = str3.contains("GENO:");
                                    if (candidateFilter != null && !OMIT_FILTERED.booleanValue() && valueOf.intValue() != -1 && !contains) {
                                        z = DictionaryFamilyDomainFilter.isFiltered(str3, candidateFilter, str2);
                                    }
                                    if (!z || !OMIT_FILTERED.booleanValue()) {
                                        if (str != null && !str.equals(str3)) {
                                            boolean z2 = z;
                                            log.trace("Indexing gene record with ID {} and synonym/priority pairs {}", str, hashMap);
                                            String str4 = str;
                                            HashMap hashMap2 = new HashMap(hashMap);
                                            newFixedThreadPool.submit(() -> {
                                                try {
                                                    indexGeneRecord(str4, hashMap2, readExtendedInformationFiles, z2, termNormalizer, indexWriter, fieldType, fieldType2, fieldType3, atomicInteger);
                                                } catch (IOException e) {
                                                    log.error("Could not create index document for gene id {}", str4, e);
                                                }
                                            });
                                            hashMap.clear();
                                        }
                                        hashMap.put(str2, valueOf);
                                        str = str3;
                                        if (i % 1000 == 0 && !log.isDebugEnabled()) {
                                            progressBar.incrementDone(i - progressBar.getDone(), true);
                                        }
                                        i++;
                                    }
                                }
                            }
                            boolean z3 = z;
                            String str5 = str;
                            newFixedThreadPool.submit(() -> {
                                try {
                                    indexGeneRecord(str5, new HashMap(hashMap), readExtendedInformationFiles, z3, termNormalizer, indexWriter, fieldType, fieldType2, fieldType3, atomicInteger);
                                } catch (IOException e) {
                                    log.error("Could not create index document for gene id {}", str5, e);
                                } catch (Throwable th) {
                                    log.error("Error", th);
                                }
                            });
                            log.info("Dictionary file {} has been consumed, all indexing jobs have been sent.", this.dictFile);
                            bufferedReader.close();
                            try {
                                log.info("Shutting down executor.");
                                log.info("Waiting for running threads to terminate.");
                                newFixedThreadPool.shutdown();
                                newFixedThreadPool.awaitTermination(100L, TimeUnit.DAYS);
                            } catch (InterruptedException e) {
                                log.warn("Waiting for running threads to finish has been interrupted. Shutting down the executor service now.");
                                newFixedThreadPool.shutdownNow();
                            }
                            log.info("ExecutorService has been shut down.");
                            log.info("Committing {} documents to the index.", Integer.valueOf(atomicInteger.get()));
                            indexWriter.commit();
                            indexWriter.forceMerge(5);
                            indexWriter.close();
                        } catch (Throwable th) {
                            throw th;
                        }
                    } catch (Throwable th2) {
                        try {
                            log.info("Shutting down executor.");
                            log.info("Waiting for running threads to terminate.");
                            newFixedThreadPool.shutdown();
                            newFixedThreadPool.awaitTermination(100L, TimeUnit.DAYS);
                        } catch (InterruptedException e2) {
                            log.warn("Waiting for running threads to finish has been interrupted. Shutting down the executor service now.");
                            newFixedThreadPool.shutdownNow();
                        }
                        log.info("ExecutorService has been shut down.");
                        throw th2;
                    }
                } finally {
                    try {
                        bufferedReader.close();
                    } catch (Throwable th3) {
                        th.addSuppressed(th3);
                    }
                }
            } finally {
            }
        } catch (IOException e3) {
            e3.printStackTrace();
        }
    }

    private Map<String, Multimap<String, String>> readExtendedInformationFiles() {
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        log.info("Loading extended information files");
        for (String str : this.extendedInformationFields.keySet()) {
            File file = this.extendedInformationFields.get(str);
            log.info("Reading {}", file);
            try {
                BufferedReader readerFromFile = FileUtilities.getReaderFromFile(file);
                try {
                    Stream map = readerFromFile.lines().map(str2 -> {
                        return str2.split("\t");
                    });
                    if (str.equals("godesc")) {
                        map = map.map(strArr -> {
                            strArr[0] = hashMap.containsKey(strArr[0]) ? ((String) hashMap.get(strArr[0].intern())).intern() : null;
                            return strArr;
                        });
                    }
                    if (str.equals("go")) {
                        map.forEach(strArr2 -> {
                            hashMap.put(strArr2[1].intern(), strArr2[0].intern());
                        });
                    } else {
                        map.filter((v0) -> {
                            return Objects.nonNull(v0);
                        }).filter(strArr3 -> {
                            return (strArr3[0] == null || strArr3[1] == null) ? false : true;
                        }).filter(strArr4 -> {
                            return (strArr4[0].isBlank() || strArr4[1].isBlank()) ? false : true;
                        }).forEach(strArr5 -> {
                            ((Multimap) hashMap2.compute(strArr5[0].intern(), (str3, multimap) -> {
                                return multimap != null ? multimap : HashMultimap.create();
                            })).put(str.intern(), strArr5[1]);
                        });
                    }
                    if (readerFromFile != null) {
                        readerFromFile.close();
                    }
                } catch (Throwable th) {
                    if (readerFromFile != null) {
                        try {
                            readerFromFile.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    }
                    throw th;
                    break;
                }
            } catch (IOException e) {
                log.error("Could not read file {}. The respective extended information will not be added to the index", file, e);
            }
        }
        return hashMap2;
    }

    public void indexGeneRecord(String str, Map<String, Integer> map, Map<String, Multimap<String, String>> map2, boolean z, TermNormalizer termNormalizer, IndexWriter indexWriter, FieldType fieldType, FieldType fieldType2, FieldType fieldType3, AtomicInteger atomicInteger) throws IOException {
        String str2;
        try {
            String str3 = this.id2tax.get(str) != null ? this.id2tax.get(str) : "";
            ArrayList arrayList = new ArrayList();
            arrayList.add(new StringField("entity_type", str.contains("GENO:") ? GeneMention.SpecificType.FAMILYNAME.name() : GeneMention.SpecificType.GENE.name(), Field.Store.YES));
            StringField stringField = new StringField("entry_id", str, Field.Store.YES);
            for (String str4 : map.keySet()) {
                Integer num = map.get(str4);
                if (num.intValue() == -1) {
                    str2 = "symbol";
                } else if (num.intValue() == 0) {
                    str2 = "symbol_from_nomenclature";
                } else if (num.intValue() == 1) {
                    str2 = "full_names";
                } else if (num.intValue() == 2) {
                    str2 = "synonyms";
                } else if (num.intValue() == 3) {
                    str2 = "other_designations";
                } else if (num.intValue() == 4) {
                    str2 = "protein_names";
                } else if (num.intValue() == 5) {
                    str2 = "uniprot_names";
                } else if (num.intValue() == 6) {
                    str2 = "xrefs";
                } else {
                    if (num.intValue() != 7) {
                        throw new IllegalArgumentException("Unsupported synonym priority: " + num);
                    }
                    str2 = "bio_thesaurus";
                }
                log.trace("Now adding field {} for synonym {} for ID {}", new Object[]{str2, str4, str});
                arrayList.add(new Field(str2, str4, fieldType2));
                arrayList.add(new Field(str2 + "_exact", str4, fieldType3));
            }
            StringField stringField2 = new StringField("tax_id", str3, Field.Store.YES);
            if (!OMIT_FILTERED.booleanValue()) {
                int[] iArr = new int[1];
                iArr[0] = z ? 1 : 0;
                IntPoint intPoint = new IntPoint("filtered", iArr);
                StoredField storedField = new StoredField("filtered", z ? 1 : 0);
                arrayList.add(intPoint);
                arrayList.add(storedField);
            }
            Multimap<String, String> multimap = map2.get(str);
            if (multimap != null) {
                for (String str5 : multimap.keySet()) {
                    for (String str6 : multimap.get(str5)) {
                        if (str5.equals("godesc")) {
                            for (String str7 : str6.split("\\|")) {
                                arrayList.add(new Field(str5, termNormalizer.normalize(str7), fieldType2));
                            }
                        } else {
                            arrayList.add(new Field(str5, termNormalizer.normalize(str6), fieldType2));
                        }
                    }
                }
            }
            map2.remove(str);
            arrayList.add(stringField);
            arrayList.add(stringField2);
            Document document = new Document();
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                document.add((Field) it.next());
            }
            if (!arrayList.isEmpty()) {
                indexWriter.addDocument(document);
                atomicInteger.incrementAndGet();
                synchronized (this.documents) {
                    this.documents.add(new SoftReference<>(document));
                }
            }
        } catch (Throwable th) {
            log.error("Error occurred", th);
            throw th;
        }
    }

    private void readUpTaxMap(File file) throws IOException {
        log.info("Reading up2eg2tax.map ...");
        this.id2tax = new HashMap();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return;
            }
            String[] split = readLine.split("\t");
            if (split.length != 3) {
                System.err.println("ERR: up2eg2tax.map not in expected format. \ncritical line: " + readLine);
                System.exit(-1);
            }
            this.id2tax.put(split[0].trim().intern(), split[2].trim().intern());
        }
    }

    private void readEgTaxMap(File file) throws IOException {
        log.info("Reading file gene ID to taxonomy ID map from {}", file);
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(file))));
        try {
            this.id2tax = (Map) bufferedReader.lines().collect(Collectors.toMap(str -> {
                return str.split("\\t", 3)[1].intern();
            }, str2 -> {
                return str2.split("\\t", 3)[0].intern();
            }));
            bufferedReader.close();
        } catch (Throwable th) {
            try {
                bufferedReader.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
    }
}
