package net.maizegenetics.analysis.rna;

import java.awt.Frame;
import java.io.BufferedReader;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.LongAdder;
import javax.swing.ImageIcon;
import net.maizegenetics.analysis.gbs.Barcode;
import net.maizegenetics.analysis.gbs.v2.BarcodeTrie;
import net.maizegenetics.analysis.gbs.v2.GBSEnzyme;
import net.maizegenetics.analysis.gbs.v2.GBSUtils;
import net.maizegenetics.dna.BaseEncoder;
import net.maizegenetics.dna.tag.Tag;
import net.maizegenetics.dna.tag.TagBuilder;
import net.maizegenetics.dna.tag.TagDataSQLite;
import net.maizegenetics.dna.tag.TaxaDistBuilder;
import net.maizegenetics.dna.tag.TaxaDistribution;
import net.maizegenetics.plugindef.AbstractPlugin;
import net.maizegenetics.plugindef.DataSet;
import net.maizegenetics.plugindef.PluginParameter;
import net.maizegenetics.prefs.TasselPrefs;
import net.maizegenetics.taxa.TaxaList;
import net.maizegenetics.taxa.TaxaListIOUtils;
import net.maizegenetics.taxa.Taxon;
import net.maizegenetics.util.DirectoryCrawler;
import net.maizegenetics.util.Utils;
import org.apache.log4j.Logger;

/* loaded from: input_file:net/maizegenetics/analysis/rna/RNADeMultiPlexSeqToDBPlugin.class */
public class RNADeMultiPlexSeqToDBPlugin extends AbstractPlugin {
    private PluginParameter<String> myInputDir;
    private PluginParameter<String> myKeyFile;
    private PluginParameter<Integer> myMinKmerLength;
    private PluginParameter<Integer> myMinKmerCount;
    private PluginParameter<String> myOutputDB;
    private PluginParameter<Integer> myMinQualScore;
    static final String inputFileGlob = "glob:*{.fq,fq.gz,fastq,fastq.txt,fastq.gz,fastq.txt.gz,_sequence.txt,_sequence.txt.gz}";
    static final String sampleNameField = "FullSampleName";
    static final String flowcellField = "Flowcell";
    static final String laneField = "Lane";
    static final String barcodeField = "Barcode";
    private static TagDistributionMap tagCntMap;
    private static boolean taglenException;
    private static final Logger myLogger = Logger.getLogger(RNADeMultiPlexSeqToDBPlugin.class);
    static LongAdder roughTagCnt = new LongAdder();
    private static String myEnzyme = "ignore";
    private static Integer myMaxKmerNumber = 50000000;
    private static Integer myBatchSize = 8;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:net/maizegenetics/analysis/rna/RNADeMultiPlexSeqToDBPlugin$TagDistributionMap.class */
    public static class TagDistributionMap extends ConcurrentHashMap<Tag, TaxaDistribution> {
        private final int maxTagNum;
        private int minDepthToRetainInMap;
        private final int minCount;

        TagDistributionMap(int i, float f, int i2, int i3) {
            super(i * 2, f, i2);
            this.minDepthToRetainInMap = 2;
            this.maxTagNum = i;
            this.minCount = i3;
        }

        @Override // java.util.concurrent.ConcurrentHashMap, java.util.AbstractMap, java.util.Map
        public TaxaDistribution put(Tag tag, TaxaDistribution taxaDistribution) {
            return (TaxaDistribution) super.put((TagDistributionMap) tag, (Tag) taxaDistribution);
        }

        public synchronized void removeTagByCount(int i) {
            entrySet().parallelStream().filter(entry -> {
                return ((TaxaDistribution) entry.getValue()).totalDepth() < i;
            }).forEach(entry2 -> {
                remove(entry2.getKey());
            });
        }

        public long estimateMapMemorySize() {
            long j = 0;
            int i = 0;
            Iterator<Map.Entry<Tag, TaxaDistribution>> it = entrySet().iterator();
            while (it.hasNext()) {
                j = j + 25 + 16 + it.next().getValue().memorySize();
                i++;
                if (i > 10000) {
                    break;
                }
            }
            return (size() / i) * j;
        }

        public long[] depthDistribution() {
            long[] jArr = new long[34];
            int i = 0;
            Iterator<Map.Entry<Tag, TaxaDistribution>> it = entrySet().iterator();
            while (it.hasNext()) {
                int numberOfLeadingZeros = 31 - Integer.numberOfLeadingZeros(it.next().getValue().totalDepth());
                jArr[numberOfLeadingZeros] = jArr[numberOfLeadingZeros] + 1;
                i++;
            }
            return jArr;
        }
    }

    @Override // net.maizegenetics.plugindef.AbstractPlugin, net.maizegenetics.plugindef.Plugin
    public DataSet processData(DataSet dataSet) {
        tagCntMap = new TagDistributionMap(myMaxKmerNumber.intValue(), 0.95f, 128, minKmerCount().intValue());
        try {
            Path absolutePath = Paths.get(keyFile(), new String[0]).toAbsolutePath();
            List<Path> listPaths = DirectoryCrawler.listPaths("glob:*{.fq,fq.gz,fastq,fastq.txt,fastq.gz,fastq.txt.gz,_sequence.txt,_sequence.txt.gz}", Paths.get(inputDirectory(), new String[0]).toAbsolutePath());
            if (listPaths.isEmpty()) {
                myLogger.warn("No files matching:glob:*{.fq,fq.gz,fastq,fastq.txt,fastq.gz,fastq.txt.gz,_sequence.txt,_sequence.txt.gz}");
                System.out.println("RNADeMultiPlex - no files matching glob:*{.fq,fq.gz,fastq,fastq.txt,fastq.gz,fastq.txt.gz,_sequence.txt,_sequence.txt.gz}");
                return null;
            }
            if (listPaths.size() == 0) {
                return null;
            }
            int size = listPaths.size() / myBatchSize.intValue();
            if (listPaths.size() % myBatchSize.intValue() != 0) {
                size++;
            }
            TaxaList readTaxaAnnotationFile = TaxaListIOUtils.readTaxaAnnotationFile(keyFile(), "FullSampleName", new HashMap(), true);
            if (Files.exists(Paths.get(this.myOutputDB.value(), new String[0]), new LinkOption[0])) {
                try {
                    Files.delete(Paths.get(outputDatabaseFile(), new String[0]));
                } catch (Exception e) {
                    System.out.println("Error when trying to delete database file: " + outputDatabaseFile());
                    System.out.println("File delete error: " + e.getMessage());
                    return null;
                }
            }
            TagDataSQLite tagDataSQLite = new TagDataSQLite(outputDatabaseFile());
            taglenException = false;
            for (int i = 0; i < listPaths.size(); i += myBatchSize.intValue()) {
                int intValue = i + myBatchSize.intValue();
                if (intValue > listPaths.size()) {
                    intValue = listPaths.size();
                }
                ArrayList arrayList = new ArrayList();
                for (int i2 = i; i2 < intValue; i2++) {
                    arrayList.add(listPaths.get(i2));
                }
                System.out.println("\nStart processing batch " + String.valueOf((i / myBatchSize.intValue()) + 1));
                arrayList.parallelStream().forEach(path -> {
                    try {
                        processFastQFile(readTaxaAnnotationFile, absolutePath, path, myEnzyme, minimumQualityScore().intValue(), minimumKmerLength().intValue(), tagCntMap);
                    } catch (StringIndexOutOfBoundsException e2) {
                        e2.printStackTrace();
                        myLogger.error(e2.getMessage());
                        setTagLenException();
                    }
                });
                if (taglenException) {
                    return null;
                }
                System.out.println("\nKmers are added from batch " + String.valueOf((i / myBatchSize.intValue()) + 1) + ". Total batch number: " + size);
                int size2 = tagCntMap.size();
                System.out.println("Current number: " + String.valueOf(size2) + ". Max kmer number: " + String.valueOf(myMaxKmerNumber));
                System.out.println(String.valueOf(size2 / myMaxKmerNumber.intValue()) + " of max tag number");
                if (size2 > 0) {
                    calcTagMapStats(tagCntMap);
                    System.out.println();
                    roughTagCnt.reset();
                    roughTagCnt.add(tagCntMap.size());
                } else {
                    System.out.println("WARNING: Current tagcntmap size is 0 after processing batch " + String.valueOf((i / myBatchSize.intValue()) + 1));
                }
                System.out.println("Total memory: " + String.valueOf(((Runtime.getRuntime().totalMemory() / 1024) / 1024) / 1024) + " Gb");
                System.out.println("Free memory: " + String.valueOf(((Runtime.getRuntime().freeMemory() / 1024) / 1024) / 1024) + " Gb");
                System.out.println("Max memory: " + String.valueOf(((Runtime.getRuntime().maxMemory() / 1024) / 1024) / 1024) + " Gb");
                System.out.println("\n");
            }
            System.out.println("\nAll the batch are processed");
            tagCntMap.removeTagByCount(minKmerCount().intValue());
            System.out.println("By removing kmers with minCount of " + this.myMinKmerCount + " Kmer number is reduced to " + tagCntMap.size() + "\n");
            tagDataSQLite.putTaxaList(readTaxaAnnotationFile);
            tagDataSQLite.putAllTag(tagCntMap.keySet());
            tagDataSQLite.close();
            return null;
        } catch (Exception e2) {
            e2.printStackTrace();
            return null;
        }
    }

    private long[] calcTagMapStats(TagDistributionMap tagDistributionMap) {
        long j = 0;
        long j2 = 0;
        int i = 0;
        for (Map.Entry<Tag, TaxaDistribution> entry : tagDistributionMap.entrySet()) {
            j2 = j2 + entry.getValue().memorySize() + 25;
            j += entry.getValue().totalDepth();
            i++;
        }
        long[] jArr = {tagDistributionMap.size(), j2 + (tagDistributionMap.size() * 2 * 16), j, j / i};
        System.out.printf("Map Tags:%,d  Memory:%,d  TotalDepth:%,d  AvgDepthPerTag:%d%n", Long.valueOf(jArr[0]), Long.valueOf(jArr[1]), Long.valueOf(jArr[2]), Long.valueOf(jArr[3]));
        return jArr;
    }

    private static void processFastQFile(TaxaList taxaList, Path path, Path path2, String str, int i, int i2, TagDistributionMap tagDistributionMap) throws StringIndexOutOfBoundsException {
        ArrayList<Taxon> readTaxaAnnotationFileAL = TaxaListIOUtils.readTaxaAnnotationFileAL(path.toAbsolutePath().toString(), "FullSampleName", new HashMap());
        if (readTaxaAnnotationFileAL.size() == 0) {
            return;
        }
        try {
            processFastQ(path2, GBSUtils.initializeBarcodeTrie(readTaxaAnnotationFileAL, taxaList, new GBSEnzyme(str)), taxaList, tagDistributionMap, i, i2);
        } catch (StringIndexOutOfBoundsException e) {
            throw e;
        }
    }

    private static void processFastQ(Path path, BarcodeTrie barcodeTrie, TaxaList taxaList, TagDistributionMap tagDistributionMap, int i, int i2) throws StringIndexOutOfBoundsException {
        int i3 = 0;
        int i4 = 0;
        int i5 = 0;
        int i6 = 0;
        int i7 = 0;
        int i8 = 0;
        int size = taxaList.size();
        myLogger.info("processing file " + path.toString());
        try {
            int determineQualityScoreBase = GBSUtils.determineQualityScoreBase(path);
            BufferedReader bufferedReader = Utils.getBufferedReader(path.toString(), 4194304);
            long nanoTime = System.nanoTime();
            while (true) {
                String[] readDeMultiPlexFastQBlock = GBSUtils.readDeMultiPlexFastQBlock(bufferedReader, i3);
                if (readDeMultiPlexFastQBlock == null) {
                    myLogger.info("Summary for " + path.toString() + "\nTotal number of reads in lane=" + i3 + "\nTotal number of good barcoded reads=" + i4 + "\nTotal number of low quality reads=" + i5 + "\nTotal number of short reads=" + i8 + "\nTotal number of bad or no barcode found=" + i6 + "\nTotal number of null tags created=" + i7 + "\nTiming process (sorting, collapsing, and writing TagCount to file).\nProcess took " + ((System.nanoTime() - nanoTime) / 1000000.0d) + " milliseconds.");
                    bufferedReader.close();
                    return;
                }
                i3++;
                Barcode longestPrefix = barcodeTrie.longestPrefix(readDeMultiPlexFastQBlock[2] + readDeMultiPlexFastQBlock[0]);
                if (longestPrefix == null) {
                    System.out.println("BC not found: " + readDeMultiPlexFastQBlock[0]);
                    i6++;
                } else {
                    String str = readDeMultiPlexFastQBlock[0];
                    int indexOf = str.indexOf("AGATCGGA");
                    if (indexOf > 0) {
                        str = str.substring(0, indexOf - 1);
                    }
                    if (str.length() < i2) {
                        i8++;
                    } else if (i <= 0 || BaseEncoder.getFirstLowQualityPos(readDeMultiPlexFastQBlock[1], i, determineQualityScoreBase) >= str.length()) {
                        Tag build = TagBuilder.instance(str).build();
                        if (build == null) {
                            i7++;
                        } else {
                            i4++;
                            TaxaDistribution taxaDistribution = (TaxaDistribution) tagDistributionMap.get(build);
                            if (taxaDistribution == null) {
                                tagDistributionMap.put(build, TaxaDistBuilder.create(size, longestPrefix.getTaxaIndex()));
                                roughTagCnt.increment();
                            } else {
                                taxaDistribution.increment(longestPrefix.getTaxaIndex());
                            }
                            if (i3 % 10000000 == 0) {
                                myLogger.info("Total Reads:" + i3 + " Reads with barcode and cut site overhang:" + i4 + " rate:" + ((System.nanoTime() - nanoTime) / i3) + " ns/read. Current tag count:" + roughTagCnt);
                            }
                            if (i3 % 10000000 == 0) {
                                myLogger.info("Total Reads:" + i3 + " Reads with barcode and cut site overhang:" + i4 + " rate:" + ((System.nanoTime() - nanoTime) / i3) + " ns/read. Current tag count:" + roughTagCnt);
                            }
                        }
                    } else {
                        i5++;
                    }
                }
            }
        } catch (StringIndexOutOfBoundsException e) {
            throw e;
        } catch (Exception e2) {
            myLogger.error("Good Barcodes Read: " + i4);
            e2.printStackTrace();
        }
    }

    public static void setTagLenException() {
        taglenException = true;
    }

    public RNADeMultiPlexSeqToDBPlugin() {
        super(null, false);
        this.myInputDir = new PluginParameter.Builder("i", null, String.class).guiName("Input Directory").required(true).inDir().description("Input directory containing FASTQ files in text or gzipped text.\n     NOTE: Directory will be searched recursively and should\n     be written WITHOUT a slash after its name.").build();
        this.myKeyFile = new PluginParameter.Builder("k", null, String.class).guiName("Key File").required(true).inFile().description("Key file listing barcodes distinguishing the samples").build();
        this.myMinKmerLength = new PluginParameter.Builder("minKmerL", 20, Integer.class).guiName("Minimum Kmer Length").description("Minimum kmer Length after second cut site is removed").build();
        this.myMinKmerCount = new PluginParameter.Builder("c", 10, Integer.class).guiName("Min Kmer Count").description("Minimum kmer count").build();
        this.myOutputDB = new PluginParameter.Builder(TasselPrefs.GOBII_DB, null, String.class).guiName("Output Database File").required(true).outFile().description("Output Database File").build();
        this.myMinQualScore = new PluginParameter.Builder("mnQS", 0, Integer.class).guiName("Minimum quality score").required(false).description("Minimum quality score within the barcode and read length to be accepted").build();
    }

    public RNADeMultiPlexSeqToDBPlugin(Frame frame) {
        super(frame, false);
        this.myInputDir = new PluginParameter.Builder("i", null, String.class).guiName("Input Directory").required(true).inDir().description("Input directory containing FASTQ files in text or gzipped text.\n     NOTE: Directory will be searched recursively and should\n     be written WITHOUT a slash after its name.").build();
        this.myKeyFile = new PluginParameter.Builder("k", null, String.class).guiName("Key File").required(true).inFile().description("Key file listing barcodes distinguishing the samples").build();
        this.myMinKmerLength = new PluginParameter.Builder("minKmerL", 20, Integer.class).guiName("Minimum Kmer Length").description("Minimum kmer Length after second cut site is removed").build();
        this.myMinKmerCount = new PluginParameter.Builder("c", 10, Integer.class).guiName("Min Kmer Count").description("Minimum kmer count").build();
        this.myOutputDB = new PluginParameter.Builder(TasselPrefs.GOBII_DB, null, String.class).guiName("Output Database File").required(true).outFile().description("Output Database File").build();
        this.myMinQualScore = new PluginParameter.Builder("mnQS", 0, Integer.class).guiName("Minimum quality score").required(false).description("Minimum quality score within the barcode and read length to be accepted").build();
    }

    public RNADeMultiPlexSeqToDBPlugin(Frame frame, boolean z) {
        super(frame, z);
        this.myInputDir = new PluginParameter.Builder("i", null, String.class).guiName("Input Directory").required(true).inDir().description("Input directory containing FASTQ files in text or gzipped text.\n     NOTE: Directory will be searched recursively and should\n     be written WITHOUT a slash after its name.").build();
        this.myKeyFile = new PluginParameter.Builder("k", null, String.class).guiName("Key File").required(true).inFile().description("Key file listing barcodes distinguishing the samples").build();
        this.myMinKmerLength = new PluginParameter.Builder("minKmerL", 20, Integer.class).guiName("Minimum Kmer Length").description("Minimum kmer Length after second cut site is removed").build();
        this.myMinKmerCount = new PluginParameter.Builder("c", 10, Integer.class).guiName("Min Kmer Count").description("Minimum kmer count").build();
        this.myOutputDB = new PluginParameter.Builder(TasselPrefs.GOBII_DB, null, String.class).guiName("Output Database File").required(true).outFile().description("Output Database File").build();
        this.myMinQualScore = new PluginParameter.Builder("mnQS", 0, Integer.class).guiName("Minimum quality score").required(false).description("Minimum quality score within the barcode and read length to be accepted").build();
    }

    @Override // net.maizegenetics.plugindef.Plugin
    public ImageIcon getIcon() {
        return null;
    }

    @Override // net.maizegenetics.plugindef.Plugin
    public String getButtonName() {
        return null;
    }

    @Override // net.maizegenetics.plugindef.Plugin
    public String getToolTipText() {
        return null;
    }

    public String inputDirectory() {
        return this.myInputDir.value();
    }

    public RNADeMultiPlexSeqToDBPlugin inputDirectory(String str) {
        this.myInputDir = new PluginParameter<>(this.myInputDir, str);
        return this;
    }

    public String keyFile() {
        return this.myKeyFile.value();
    }

    public RNADeMultiPlexSeqToDBPlugin keyFile(String str) {
        this.myKeyFile = new PluginParameter<>(this.myKeyFile, str);
        return this;
    }

    public Integer minimumKmerLength() {
        return this.myMinKmerLength.value();
    }

    public RNADeMultiPlexSeqToDBPlugin minimumKmerLength(Integer num) {
        this.myMinKmerLength = new PluginParameter<>(this.myMinKmerLength, num);
        return this;
    }

    public Integer minKmerCount() {
        return this.myMinKmerCount.value();
    }

    public RNADeMultiPlexSeqToDBPlugin minKmerCount(Integer num) {
        this.myMinKmerCount = new PluginParameter<>(this.myMinKmerCount, num);
        return this;
    }

    public String outputDatabaseFile() {
        return this.myOutputDB.value();
    }

    public RNADeMultiPlexSeqToDBPlugin outputDatabaseFile(String str) {
        this.myOutputDB = new PluginParameter<>(this.myOutputDB, str);
        return this;
    }

    public Integer minimumQualityScore() {
        return this.myMinQualScore.value();
    }

    public RNADeMultiPlexSeqToDBPlugin minimumQualityScore(Integer num) {
        this.myMinQualScore = new PluginParameter<>(this.myMinQualScore, num);
        return this;
    }
}
