package de.julielab.jules.ae.genemapping.resources;

import de.julielab.java.utilities.FileUtilities;
import de.julielab.jules.ae.genemapping.resources.ncbigene.GeneXMLUtils;
import de.julielab.jules.ae.genemapping.utils.ReaderInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.stream.Stream;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import javax.xml.stream.XMLStreamException;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/julielab/jules/ae/genemapping/resources/GeneXMLDownloader.class */
public class GeneXMLDownloader {
    private static final String TOOL_NAME = "JulieLabGeneXMLDownloader";
    private static final Logger log = LoggerFactory.getLogger(GeneXMLDownloader.class);
    public static String EUTILS = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/";
    private static DownloadRestricter downloadRestricter;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:de/julielab/jules/ae/genemapping/resources/GeneXMLDownloader$DownloadHandle.class */
    public static class DownloadHandle {
        int count;
        String queryKey;
        String webEnv;

        private DownloadHandle() {
        }

        public String toString() {
            return "DownloadHandle [count=" + this.count + ", queryKey=" + this.queryKey + ", webEnv=" + this.webEnv + "]";
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:de/julielab/jules/ae/genemapping/resources/GeneXMLDownloader$DownloadRestricter.class */
    public static class DownloadRestricter extends Thread {
        private ArrayBlockingQueue<Object> tickets = new ArrayBlockingQueue<>(3);

        @Override // java.lang.Thread, java.lang.Runnable
        public void run() {
            while (true) {
                try {
                    if (!this.tickets.offer(new Object())) {
                        Thread.sleep(1000L);
                    }
                } catch (InterruptedException e) {
                    GeneXMLDownloader.log.info("Download restricter has finished execution.");
                    return;
                }
            }
        }

        public void waitForTicket() {
            try {
                GeneXMLDownloader.log.trace("Awaiting a free request ticket.");
                this.tickets.take();
                GeneXMLDownloader.log.trace("Ticket acquired.");
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    }

    public static void main(String[] strArr) throws Exception {
        if (strArr.length < 3) {
            System.err.println("Usage: " + GeneXMLDownloader.class.getSimpleName() + " <taxonomy ids file> <extracted information storage directory> <eMail address for eUtils> [gene_info file] [XML directory]");
            System.err.println("If XML directory is given but empty, XML files with the downloaded batches will be stored there for later use. If the files already exist, no download will happen but the existing files will be read.");
            System.err.println("If the gene_info file is given, its creation date will be compared to the oldest XML file in the XML directory, if not empty. If the gene_info file is newer than the existing XML files, the files will be refreshed by downloading them.");
            System.exit(0);
        }
        File file = new File(strArr[0]);
        File file2 = new File(strArr[1]);
        String str = strArr[2];
        File file3 = (strArr.length <= 3 || StringUtils.isBlank(strArr[3])) ? null : new File(strArr[3]);
        File file4 = (strArr.length <= 4 || StringUtils.isBlank(strArr[4])) ? null : new File(strArr[4]);
        if (!str.contains("@") || !str.contains(".")) {
            throw new IllegalArgumentException(str + " does not appear to be a valid eMail address. Please provide a valid eMail address for the eUtils. NCBI would like to be able to contact you for the case that the script goes wild which should not be the case if you don't alter it.");
        }
        log.info("Taxonomy ID file: {}", file);
        log.info("Storage directory for created resource files: {}", file2);
        log.info("gene_info file to compare timestamp to (optional): {}", file3);
        log.info("XML directory to store/read XML from to/from (optional): {}", file4);
        List<File> metaFiles = GeneXMLUtils.getMetaFiles(file2);
        File file5 = metaFiles.get(0);
        File file6 = metaFiles.get(1);
        File file7 = metaFiles.get(2);
        File file8 = metaFiles.get(3);
        if (!file2.exists()) {
            log.info("Creating directory {}", file2);
            file2.mkdirs();
        }
        boolean z = file3.exists() ? file7.lastModified() < file3.lastModified() : true;
        Set<String> determineMissingTaxIds = GeneXMLUtils.determineMissingTaxIds(file, file2, file3, file7);
        moveMetaFilesToTemporaryLocation(file7, metaFiles, z);
        System.setProperty("java.util.concurrent.ForkJoinPool.common.parallelism", "5");
        try {
            BufferedOutputStream outputStreamToFile = FileUtilities.getOutputStreamToFile(file5);
            try {
                BufferedOutputStream outputStreamToFile2 = FileUtilities.getOutputStreamToFile(file6);
                try {
                    outputStreamToFile2 = FileUtilities.getOutputStreamToFile(file7);
                    try {
                        outputStreamToFile2 = FileUtilities.getOutputStreamToFile(file8);
                        try {
                            log.info("Writing summaries into file {}", file5);
                            log.info("Writing protein names into file {}", file6);
                            log.info("Writing taxonomy IDs were gene meta information has been downloaded for into file {}", file7);
                            determineMissingTaxIds.parallelStream().forEach(str2 -> {
                                log.debug("Processing taxonomy ID {}", str2);
                                try {
                                    boolean isGeneInfoNewerThanXmlFiles = isGeneInfoNewerThanXmlFiles(file4, str2, file3);
                                    File[] xmlFilesInDirectoryForTaxId = getXmlFilesInDirectoryForTaxId(file4, str2);
                                    if (file4 != null) {
                                        log.info("Found {} XML files for organism with taxonomy ID {} in directory {}", new Object[]{Integer.valueOf(xmlFilesInDirectoryForTaxId.length), str2, file4});
                                    }
                                    if (xmlFilesInDirectoryForTaxId.length == 0 || isGeneInfoNewerThanXmlFiles) {
                                        synchronized (GeneXMLDownloader.class) {
                                            if (null == downloadRestricter) {
                                                downloadRestricter = new DownloadRestricter();
                                                downloadRestricter.start();
                                            }
                                        }
                                        boolean z2 = null != file4 && (xmlFilesInDirectoryForTaxId.length == 0 || isGeneInfoNewerThanXmlFiles);
                                        if (isGeneInfoNewerThanXmlFiles) {
                                            clearXmlFilesForTaxId(file4, str2);
                                        }
                                        if (z2 && !file4.exists()) {
                                            log.info("XML directory {} does not exist and is created.", file4);
                                            file4.mkdirs();
                                        }
                                        URL url = new URL(EUTILS + "esearch.fcgi?db=gene&retmax=1&usehistory=y&term=" + str2 + "[taxid]+AND+alive[properties]&tool=JulieLabGeneXMLDownloader&email=" + str);
                                        log.trace("Request for download handle: {}", url);
                                        downloadRestricter.waitForTicket();
                                        InputStream inputStream = url.openConnection().getInputStream();
                                        log.debug("Contacting E-Utils for download of XML gene information for taxonomy ID {}...", str2);
                                        DownloadHandle readDownloadHandleXml = readDownloadHandleXml(inputStream);
                                        log.debug("Got a download handle for a search result of {} entries for taxonomy ID {}", Integer.valueOf(readDownloadHandleXml.count), str2);
                                        if (readDownloadHandleXml.count == 0) {
                                            log.debug("Did not receive any entries for taxonomy ID {}. This could point to an error or just no available entries. This taxonomy ID is skipped. The request URL was {}", str2, url);
                                        } else {
                                            if (z2) {
                                                log.info("Downloading Gene XML data to {}. This will take a few hours.", file4);
                                            }
                                            log.info("Gene meta information will be written into directory {}", file2);
                                            for (int i = 0; i < readDownloadHandleXml.count; i += 500) {
                                                log.debug("Downloading gene XML records for taxonomy ID {}: {}", str2, i + " - " + Math.min((i + 500) - 1, readDownloadHandleXml.count));
                                                String format = String.format(EUTILS + "efetch.fcgi?rettype=xml&retmode=text&retstart=%s&retmax=%s&db=gene&query_key=%s&WebEnv=%s&tool=%s&email=%s", Integer.valueOf(i), 500, readDownloadHandleXml.queryKey, readDownloadHandleXml.webEnv, TOOL_NAME, str);
                                                log.trace("Request URL: {}", format);
                                                URL url2 = new URL(format);
                                                downloadRestricter.waitForTicket();
                                                log.debug("Reading stream response and parsing the respective XML (this is the download step and will take a while)");
                                                InputStream openStream = url2.openStream();
                                                if (z2) {
                                                    try {
                                                        String iOUtils = IOUtils.toString(openStream, "UTF-8");
                                                        GZIPOutputStream gZIPOutputStream = new GZIPOutputStream(new FileOutputStream(new File(file4.getAbsolutePath() + File.separator + "genes-taxid" + str2 + "-" + i + "-" + Math.min((i + 500) - 1, readDownloadHandleXml.count) + ".xml.gz")));
                                                        try {
                                                            IOUtils.copy(new StringReader(iOUtils), gZIPOutputStream, "UTF-8");
                                                            gZIPOutputStream.close();
                                                            openStream = new ReaderInputStream(new StringReader(iOUtils), "UTF-8");
                                                        } catch (Throwable th) {
                                                            try {
                                                                gZIPOutputStream.close();
                                                            } catch (Throwable th2) {
                                                                th.addSuppressed(th2);
                                                            }
                                                            throw th;
                                                        }
                                                    } catch (Exception e) {
                                                        throw new RuntimeException("Error while getting XML data for taxonomy ID " + str2 + " from NCBI eUtils.", e);
                                                    }
                                                }
                                                GeneXMLUtils.extractAndWriteGeneInfoToFile(outputStreamToFile, outputStreamToFile2, outputStreamToFile2, openStream);
                                            }
                                        }
                                    } else {
                                        log.info("Reading existing gene XML data from {}", file4);
                                        ((Stream) Stream.of((Object[]) xmlFilesInDirectoryForTaxId).parallel()).map(file9 -> {
                                            try {
                                                GZIPInputStream gZIPInputStream = new GZIPInputStream(new FileInputStream(file9));
                                                log.trace("Reading XML file {}", file9);
                                                return GeneXMLUtils.extractGeneInfoFromXml(gZIPInputStream);
                                            } catch (IOException | XMLStreamException e2) {
                                                e2.printStackTrace();
                                                return null;
                                            }
                                        }).forEach(list -> {
                                            try {
                                                GeneXMLUtils.writeGeneInfoToFile(list, outputStreamToFile, outputStreamToFile2, outputStreamToFile2);
                                            } catch (IOException e2) {
                                                e2.printStackTrace();
                                            }
                                        });
                                    }
                                    log.info("Done extracting gene data from XML and writing result files for taxonomy ID {}.", str2);
                                    IOUtils.write(str2 + "\n", outputStreamToFile2, "UTF-8");
                                } catch (IOException | XMLStreamException e2) {
                                    e2.printStackTrace();
                                }
                            });
                            mergeGeneMetaFiles(file2, metaFiles);
                            if (outputStreamToFile2 != null) {
                                outputStreamToFile2.close();
                            }
                            if (outputStreamToFile2 != null) {
                                outputStreamToFile2.close();
                            }
                            if (outputStreamToFile2 != null) {
                                outputStreamToFile2.close();
                            }
                            if (outputStreamToFile != null) {
                                outputStreamToFile.close();
                            }
                            log.info("Interrupting download restriction thread.");
                            downloadRestricter.interrupt();
                            downloadRestricter.join();
                            log.info("Extraction of data from Gene XML complete.");
                        } finally {
                            if (outputStreamToFile2 != null) {
                                try {
                                    outputStreamToFile2.close();
                                } catch (Throwable th) {
                                    th.addSuppressed(th);
                                }
                            }
                        }
                    } finally {
                    }
                } catch (Throwable th2) {
                    throw th2;
                }
            } finally {
            }
        } catch (Throwable th3) {
            log.info("Interrupting download restriction thread.");
            downloadRestricter.interrupt();
            downloadRestricter.join();
            log.info("Extraction of data from Gene XML complete.");
            throw th3;
        }
    }

    private static void moveMetaFilesToTemporaryLocation(File file, List<File> list, boolean z) throws IOException {
        if (file.exists()) {
            for (File file2 : list) {
                if (z || !file2.exists()) {
                    file2.delete();
                } else {
                    File file3 = new File(file2.getAbsolutePath().replaceAll("\\.gz|\\.gzip", "") + "tmp.gz");
                    log.trace("Moving existing file {} to temporary file {}", file2, file3);
                    Files.move(file2.toPath(), file3.toPath(), StandardCopyOption.REPLACE_EXISTING);
                }
            }
        }
    }

    private static void mergeGeneMetaFiles(File file, List<File> list) throws IOException {
        for (File file2 : list) {
            File file3 = new File(file2.getAbsolutePath().replaceAll("\\.gz|\\.gzip", "") + ".tmp.gz");
            if (file3.exists()) {
                File file4 = new File(file.getAbsolutePath() + File.separator + "merging.tmp.gz");
                log.trace("Merging temporary file {} into main file {}", file3, file2);
                BufferedWriter writerToFile = FileUtilities.getWriterToFile(file4);
                try {
                    BufferedReader readerFromFile = FileUtilities.getReaderFromFile(file3);
                    try {
                        char[] cArr = new char[2048];
                        while (true) {
                            int read = readerFromFile.read(cArr);
                            if (read == -1) {
                                break;
                            } else {
                                writerToFile.write(cArr, 0, read);
                            }
                        }
                        if (readerFromFile != null) {
                            readerFromFile.close();
                        }
                        readerFromFile = FileUtilities.getReaderFromFile(file2);
                        try {
                            char[] cArr2 = new char[2048];
                            while (true) {
                                int read2 = readerFromFile.read(cArr2);
                                if (read2 == -1) {
                                    break;
                                } else {
                                    writerToFile.write(cArr2, 0, read2);
                                }
                            }
                            if (readerFromFile != null) {
                                readerFromFile.close();
                            }
                            if (writerToFile != null) {
                                writerToFile.close();
                            }
                            Files.move(file4.toPath(), file2.toPath(), StandardCopyOption.REPLACE_EXISTING);
                            file3.delete();
                        } finally {
                        }
                    } finally {
                    }
                } catch (Throwable th) {
                    if (writerToFile != null) {
                        try {
                            writerToFile.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    }
                    throw th;
                }
            }
        }
    }

    private static void clearXmlFilesForTaxId(File file, String str) {
        File[] xmlFilesInDirectoryForTaxId = getXmlFilesInDirectoryForTaxId(file, str);
        log.debug("Deleting {} XML files in directory {}", Integer.valueOf(xmlFilesInDirectoryForTaxId.length), file);
        for (File file2 : xmlFilesInDirectoryForTaxId) {
            file2.delete();
        }
    }

    private static boolean isGeneInfoNewerThanXmlFiles(File file, String str, File file2) {
        if (file == null || !file.exists() || file2 == null) {
            return false;
        }
        long j = Long.MAX_VALUE;
        for (File file3 : getXmlFilesInDirectoryForTaxId(file, str)) {
            if (file3.lastModified() < j) {
                j = file3.lastModified();
            }
        }
        boolean z = file2.lastModified() > j;
        Logger logger = log;
        Object[] objArr = new Object[4];
        objArr[0] = file2;
        objArr[1] = z ? "newer" : "older";
        objArr[2] = str;
        objArr[3] = file;
        logger.debug("gene_info file at {} is {} than the oldest XML file for taxonomy ID {} in {}", objArr);
        return z;
    }

    private static File[] getXmlFilesInDirectoryForTaxId(File file, final String str) {
        if (file == null || !file.exists()) {
            return new File[0];
        }
        File[] listFiles = file.listFiles(new FilenameFilter() { // from class: de.julielab.jules.ae.genemapping.resources.GeneXMLDownloader.1
            @Override // java.io.FilenameFilter
            public boolean accept(File file2, String str2) {
                return str2.toLowerCase().contains("taxid" + str) && str2.toLowerCase().endsWith("xml.gz");
            }
        });
        if (listFiles != null && listFiles.length > 0) {
            Arrays.sort(listFiles, new Comparator<File>() { // from class: de.julielab.jules.ae.genemapping.resources.GeneXMLDownloader.2
                @Override // java.util.Comparator
                public int compare(File file2, File file3) {
                    return Integer.valueOf(Integer.parseInt(file2.getName().split("-")[2])).compareTo(Integer.valueOf(Integer.parseInt(file3.getName().split("-")[2])));
                }
            });
        }
        return listFiles == null ? new File[0] : listFiles;
    }

    /* JADX WARN: Can't fix incorrect switch cases order, some code will duplicate */
    /* JADX WARN: Code restructure failed: missing block: B:20:0x00cf, code lost:
    
        switch(r17) {
            case 0: goto L21;
            case 1: goto L24;
            case 2: goto L25;
            default: goto L26;
        };
     */
    /* JADX WARN: Code restructure failed: missing block: B:22:0x00ec, code lost:
    
        if (r0.count != 0) goto L26;
     */
    /* JADX WARN: Code restructure failed: missing block: B:23:0x00ef, code lost:
    
        r0.count = java.lang.Integer.parseInt(r0.getElementText());
     */
    /* JADX WARN: Code restructure failed: missing block: B:24:0x0100, code lost:
    
        r0.queryKey = r0.getElementText();
     */
    /* JADX WARN: Code restructure failed: missing block: B:25:0x010e, code lost:
    
        r0.webEnv = r0.getElementText();
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    private static de.julielab.jules.ae.genemapping.resources.GeneXMLDownloader.DownloadHandle readDownloadHandleXml(java.io.InputStream r7) throws javax.xml.stream.XMLStreamException {
        /*
            Method dump skipped, instructions count: 382
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: de.julielab.jules.ae.genemapping.resources.GeneXMLDownloader.readDownloadHandleXml(java.io.InputStream):de.julielab.jules.ae.genemapping.resources.GeneXMLDownloader$DownloadHandle");
    }
}
