package de.charite.compbio.jannovar.impl.parse.refseq;

import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.collect.UnmodifiableIterator;
import de.charite.compbio.jannovar.JannovarException;
import de.charite.compbio.jannovar.UncheckedJannovarException;
import de.charite.compbio.jannovar.data.ReferenceDictionary;
import de.charite.compbio.jannovar.datasource.TranscriptModelBuilderHGNCExtender;
import de.charite.compbio.jannovar.hgnc.AltGeneIDType;
import de.charite.compbio.jannovar.impl.parse.FASTAParser;
import de.charite.compbio.jannovar.impl.parse.FASTARecord;
import de.charite.compbio.jannovar.impl.parse.TranscriptParseException;
import de.charite.compbio.jannovar.impl.parse.TranscriptParser;
import de.charite.compbio.jannovar.impl.parse.gtfgff.FeatureRecord;
import de.charite.compbio.jannovar.impl.parse.gtfgff.GFFParser;
import de.charite.compbio.jannovar.impl.util.DNAUtils;
import de.charite.compbio.jannovar.impl.util.PathUtil;
import de.charite.compbio.jannovar.reference.GenomeInterval;
import de.charite.compbio.jannovar.reference.Strand;
import de.charite.compbio.jannovar.reference.TranscriptModel;
import de.charite.compbio.jannovar.reference.TranscriptModelBuilder;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.annotation.Nullable;
import org.ini4j.Profile;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/charite/compbio/jannovar/impl/parse/refseq/RefSeqParser.class */
public class RefSeqParser implements TranscriptParser {
    private static final Logger LOGGER;
    private ReferenceDictionary refDict;
    private final ImmutableMap<String, Integer> contigDict;
    private String basePath;
    private Profile.Section iniSection;
    private final List<String> geneIdentifiers;
    static final /* synthetic */ boolean $assertionsDisabled;

    public RefSeqParser(ReferenceDictionary referenceDictionary, String str, List<String> list, Profile.Section section) {
        this.refDict = referenceDictionary;
        this.contigDict = referenceDictionary.getContigNameToID();
        this.basePath = str;
        this.iniSection = section;
        this.geneIdentifiers = list;
    }

    @Override // de.charite.compbio.jannovar.impl.parse.TranscriptParser
    public ImmutableList<TranscriptModel> run() throws TranscriptParseException {
        Map<String, TranscriptModelBuilder> loadTranscriptModels = loadTranscriptModels(PathUtil.join(this.basePath, getINIFileName("gff")));
        LOGGER.info("Assigning additional HGNC information to {} transcripts..", Integer.valueOf(loadTranscriptModels.size()));
        try {
            new TranscriptModelBuilderHGNCExtender(this.basePath, hGNCRecord -> {
                return Lists.newArrayList(new String[]{hGNCRecord.getEntrezID()});
            }, (v0) -> {
                return v0.getGeneID();
            }).run(loadTranscriptModels);
            for (TranscriptModelBuilder transcriptModelBuilder : loadTranscriptModels.values()) {
                if (transcriptModelBuilder.getAltGeneIDs().isEmpty() && transcriptModelBuilder.getGeneID() != null) {
                    LOGGER.debug("Using UCSC Entrez ID {} for transcript {} as HGNC did not provide alternative gene ID", transcriptModelBuilder.getGeneID(), transcriptModelBuilder.getAccession());
                    transcriptModelBuilder.getAltGeneIDs().put(AltGeneIDType.ENTREZ_ID.toString(), transcriptModelBuilder.getGeneID());
                }
            }
            String join = PathUtil.join(this.basePath, getINIFileName("rna"));
            loadMitochondrialFASTA(loadTranscriptModels);
            loadFASTA(loadTranscriptModels, join);
            ImmutableList.Builder builder = new ImmutableList.Builder();
            Iterator<Map.Entry<String, TranscriptModelBuilder>> it = loadTranscriptModels.entrySet().iterator();
            while (it.hasNext()) {
                TranscriptModelBuilder value = it.next().getValue();
                if (this.geneIdentifiers == null || this.geneIdentifiers.isEmpty()) {
                    builder.add(value.build());
                } else if (this.geneIdentifiers.contains(value.getAccession()) || this.geneIdentifiers.contains(value.getGeneID()) || !Sets.intersection(ImmutableSet.copyOf(this.geneIdentifiers), ImmutableSet.copyOf(value.getAltGeneIDs().values())).isEmpty()) {
                    builder.add(value.build());
                }
            }
            return builder.build();
        } catch (JannovarException e) {
            throw new UncheckedJannovarException("Problem extending transcripts with HGNC information", e);
        }
    }

    private void loadMitochondrialFASTA(Map<String, TranscriptModelBuilder> map) throws TranscriptParseException {
        if (!this.refDict.getContigNameToID().containsKey("chrMT")) {
            LOGGER.info("The genome does not have a chrMT, skipping.");
            return;
        }
        if (!this.iniSection.containsKey("faMT")) {
            LOGGER.warn("Key for chrMT FASTA File does not exist, skipping.");
            return;
        }
        String join = PathUtil.join(this.basePath, getINIFileName("faMT"));
        if (!new File(join).exists()) {
            LOGGER.warn("The chrMT FASTA File {} does not exist, skipping.", new Object[]{join});
            return;
        }
        int intValue = ((Integer) this.refDict.getContigNameToID().get("chrMT")).intValue();
        try {
            try {
                FASTARecord next = new FASTAParser(new File(join)).next();
                String sequence = next != null ? next.getSequence() : "";
                int i = 0;
                for (TranscriptModelBuilder transcriptModelBuilder : map.values()) {
                    if (transcriptModelBuilder.getTXRegion().getChr() == intValue) {
                        GenomeInterval withStrand = transcriptModelBuilder.getTXRegion().withStrand(Strand.FWD);
                        String substring = sequence.substring(withStrand.getBeginPos(), withStrand.getEndPos());
                        if (transcriptModelBuilder.getTXRegion().getStrand() == Strand.REV) {
                            substring = DNAUtils.reverseComplement(substring);
                        }
                        transcriptModelBuilder.setSequence(substring);
                        i++;
                    }
                }
                LOGGER.info("Successfully assigned sequence to {} chrMT transcripts.", new Object[]{Integer.valueOf(i)});
            } catch (IOException e) {
                throw new TranscriptParseException("Problem with reading FASTA file", e);
            }
        } catch (IOException e2) {
            throw new TranscriptParseException("Problem with opening FASTA file", e2);
        }
    }

    private String getINIFileName(String str) {
        String name = new File((String) this.iniSection.get(str)).getName();
        if (name.contains("?")) {
            name = name.split("\\?")[0];
        }
        return name;
    }

    /* JADX WARN: Code restructure failed: missing block: B:38:0x0115, code lost:
    
        throw new java.lang.AssertionError();
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    private void loadFASTA(java.util.Map<java.lang.String, de.charite.compbio.jannovar.reference.TranscriptModelBuilder> r7, java.lang.String r8) throws de.charite.compbio.jannovar.impl.parse.TranscriptParseException {
        /*
            Method dump skipped, instructions count: 432
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: de.charite.compbio.jannovar.impl.parse.refseq.RefSeqParser.loadFASTA(java.util.Map, java.lang.String):void");
    }

    private Map<String, TranscriptModelBuilder> loadTranscriptModels(String str) throws TranscriptParseException {
        TranscriptModelBuilder transcriptModelBuilder;
        LOGGER.info("Loading feature records");
        try {
            GFFParser gFFParser = new GFFParser(new File(str));
            HashMap hashMap = new HashMap(200000);
            Map<String, List<String>> hashMap2 = new HashMap<>(hashMap.size());
            HashSet newHashSet = Sets.newHashSet(new String[]{"exon", "CDS", "stop_codon"});
            boolean onlyCurated = onlyCurated();
            HashMap hashMap3 = new HashMap();
            HashMap hashMap4 = new HashMap();
            int i = 0;
            while (true) {
                try {
                    FeatureRecord next = gFFParser.next();
                    if (next == null) {
                        Map<String, TranscriptModelBuilder> mapTranscriptIdsToTranscriptModels = mapTranscriptIdsToTranscriptModels(hashMap, hashMap2);
                        LOGGER.info("Parsed {} GFF records as {} TranscriptModels", Integer.valueOf(i), Integer.valueOf(mapTranscriptIdsToTranscriptModels.size()));
                        return mapTranscriptIdsToTranscriptModels;
                    }
                    i++;
                    String str2 = (String) next.getAttributes().get("transcript_id");
                    if (onlyCurated && str2 != null && str2.startsWith("X")) {
                        LOGGER.debug("Skipping non-curated transcript {}", str2);
                    } else {
                        boolean z = this.contigDict.containsKey("chrMT") && ((Integer) this.contigDict.get("chrMT")).equals(this.contigDict.get(next.getSeqID()));
                        if ("gene".equals(next.getType())) {
                            String parseGeneID = parseGeneID(next);
                            String str3 = (String) next.getAttributes().get("gene");
                            hashMap3.put(parseGeneID, str3);
                            if (z) {
                                if (next.getAttributes().get("gene_synonym") == null) {
                                    str3 = (String) next.getAttributes().get("gene");
                                } else {
                                    for (String str4 : ((String) next.getAttributes().get("gene_synonym")).split(",")) {
                                        if (str4.startsWith("MT")) {
                                            str3 = str4;
                                        }
                                    }
                                }
                                hashMap4.put(parseGeneID, str3);
                            }
                        }
                        String str5 = (String) next.getAttributes().get("Parent");
                        if (str5 != null && this.contigDict.containsKey(next.getSeqID()) && newHashSet.contains(next.getType())) {
                            if (hashMap.containsKey(str5)) {
                                transcriptModelBuilder = hashMap.get(str5);
                                updateExonsTxRegionsCdsAndCdnaMatch(next, transcriptModelBuilder);
                            } else {
                                if (next.getType().equals("cDNA_match")) {
                                    throw new TranscriptParseException("Saw cDNA_match before the transcript for " + next);
                                }
                                transcriptModelBuilder = createNewTranscriptModelBuilder(next, str2, hashMap3);
                                hashMap.put(str5, transcriptModelBuilder);
                                if (z) {
                                    if ("CDS".equals(next.getType())) {
                                        transcriptModelBuilder.setTXRegion(transcriptModelBuilder.getCDSRegion());
                                        transcriptModelBuilder.addExonRegion(transcriptModelBuilder.getCDSRegion());
                                        transcriptModelBuilder.getAltGeneIDs().put("protein_id", (String) next.getAttributes().get("protein_id"));
                                    }
                                    str2 = (String) hashMap4.get(parseGeneID(next));
                                    transcriptModelBuilder.setAccession(str2);
                                }
                                updateTranscriptIdToParentIds(hashMap2, str2, str5);
                            }
                            if (next.getAttributes().containsKey("Note")) {
                                if (((String) next.getAttributes().get("Note")).contains("substitution")) {
                                    transcriptModelBuilder.setHasSubstitutions(true);
                                }
                                if (((String) next.getAttributes().get("Note")).contains("indel")) {
                                    transcriptModelBuilder.setHasIndels(true);
                                }
                            }
                        }
                        if ("cDNA_match".equals(next.getType())) {
                            String[] split = ((String) next.getAttributes().get("Target")).split(" ");
                            if (!"+".equals(split[3])) {
                                throw new TranscriptParseException("Can only handle Target on strand '+' for cDNA_match: " + next);
                            }
                            String str6 = split[0];
                            int parseInt = Integer.parseInt(split[1]) - 1;
                            int parseInt2 = Integer.parseInt(split[2]);
                            int begin = next.getBegin();
                            int end = next.getEnd();
                            if (end - begin != parseInt2 - parseInt && next.getAttributes().get("Gap") == null) {
                                throw new TranscriptParseException("ref len != tx len but no gap string: " + next);
                            }
                            String str7 = next.getAttributes().get("Gap") == null ? "M" + (parseInt2 - parseInt) : (String) next.getAttributes().get("Gap");
                            List<String> list = hashMap2.get(str6);
                            if (list != null) {
                                Iterator<String> it = list.iterator();
                                while (it.hasNext()) {
                                    hashMap.get(it.next()).getAlignmentParts().add(new TranscriptModelBuilder.AlignmentPart(begin, end, parseInt, parseInt2, str7));
                                }
                            }
                        }
                    }
                } catch (IOException e) {
                    throw new TranscriptParseException("Problem parsing GFF file", e);
                }
            }
        } catch (IOException e2) {
            throw new TranscriptParseException("Problem opening GFF file", e2);
        }
    }

    private boolean onlyCurated() {
        return checkFlagInSection(this.iniSection.fetch("onlyCurated"));
    }

    private boolean preferPARTranscriptsOnChrX() {
        return checkFlagInSection(this.iniSection.fetch("preferPARTranscriptsOnChrX"));
    }

    private boolean allowNonCodingNm() {
        return checkFlagInSection(this.iniSection.fetch("allowNonCodingNm"));
    }

    private static boolean checkFlagInSection(String str) {
        if (str == null) {
            return false;
        }
        String lowerCase = str.toLowerCase();
        UnmodifiableIterator it = ImmutableList.of("true", "1", "yes").iterator();
        while (it.hasNext()) {
            if (((String) it.next()).equals(lowerCase)) {
                return true;
            }
        }
        return false;
    }

    private void updateTranscriptIdToParentIds(Map<String, List<String>> map, String str, String str2) {
        if (str != null) {
            if (!map.containsKey(str)) {
                map.put(str, Lists.newArrayList(new String[]{str2}));
                return;
            }
            List<String> list = map.get(str);
            if (list.contains(str2)) {
                return;
            }
            LOGGER.debug("Adding new parentId {} for transcript {}", str2, str);
            list.add(str2);
        }
    }

    private TranscriptModelBuilder createNewTranscriptModelBuilder(FeatureRecord featureRecord, String str, Map<String, String> map) {
        TranscriptModelBuilder transcriptModelBuilder = new TranscriptModelBuilder();
        transcriptModelBuilder.setStrand(parseStrand(featureRecord));
        transcriptModelBuilder.setAccession(str);
        transcriptModelBuilder.setTxVersion((String) featureRecord.getAttributes().get("transcript_version"));
        String parseGeneID = parseGeneID(featureRecord);
        transcriptModelBuilder.setGeneID(parseGeneID);
        String str2 = (String) featureRecord.getAttributes().get("gene");
        if (str2 == null && parseGeneID != null && map.containsKey(parseGeneID)) {
            str2 = map.get(parseGeneID);
        }
        transcriptModelBuilder.setGeneSymbol(str2);
        transcriptModelBuilder.setSequence(str);
        updateExonsTxRegionsCdsAndCdnaMatch(featureRecord, transcriptModelBuilder);
        return transcriptModelBuilder;
    }

    @Nullable
    private String parseGeneID(FeatureRecord featureRecord) {
        String str = (String) featureRecord.getAttributes().get("Dbxref");
        if (str == null) {
            return null;
        }
        Iterator it = Splitter.on(',').split(str).iterator();
        while (it.hasNext()) {
            List splitToList = Splitter.on(':').limit(2).splitToList((String) it.next());
            if (splitToList.size() == 2 && ((String) splitToList.get(0)).equals("GeneID")) {
                return (String) splitToList.get(1);
            }
        }
        return null;
    }

    private void updateExonsTxRegionsCdsAndCdnaMatch(FeatureRecord featureRecord, TranscriptModelBuilder transcriptModelBuilder) {
        Strand parseStrand = parseStrand(featureRecord);
        if (featureRecord.getType().equals("exon")) {
            GenomeInterval buildGenomeInterval = buildGenomeInterval(featureRecord, parseStrand);
            transcriptModelBuilder.setTXRegion(updateGenomeInterval(buildGenomeInterval, transcriptModelBuilder.getTXRegion()));
            transcriptModelBuilder.addExonRegion(buildGenomeInterval);
        } else if ("CDS".equals(featureRecord.getType()) || "stop_codon".equals(featureRecord.getType())) {
            transcriptModelBuilder.setCDSRegion(updateGenomeInterval(buildGenomeInterval(featureRecord, parseStrand), transcriptModelBuilder.getCDSRegion()));
        }
    }

    private GenomeInterval updateGenomeInterval(GenomeInterval genomeInterval, GenomeInterval genomeInterval2) {
        return genomeInterval2 == null ? genomeInterval : genomeInterval2.union(genomeInterval);
    }

    private Strand parseStrand(FeatureRecord featureRecord) {
        return featureRecord.getStrand() == FeatureRecord.Strand.FORWARD ? Strand.FWD : Strand.REV;
    }

    private GenomeInterval buildGenomeInterval(FeatureRecord featureRecord, Strand strand) {
        return new GenomeInterval(this.refDict, Strand.FWD, ((Integer) this.contigDict.get(featureRecord.getSeqID())).intValue(), featureRecord.getBegin(), featureRecord.getEnd()).withStrand(strand);
    }

    private Map<String, TranscriptModelBuilder> mapTranscriptIdsToTranscriptModels(Map<String, TranscriptModelBuilder> map, Map<String, List<String>> map2) {
        return mapNonRedundantTranscriptIdsToTranscriptModelBuilder(mapParentIdsToTranscriptModelsWithTxRegion(map), map2);
    }

    private Map<String, TranscriptModelBuilder> mapParentIdsToTranscriptModelsWithTxRegion(Map<String, TranscriptModelBuilder> map) {
        HashMap hashMap = new HashMap(map.size());
        map.forEach((str, transcriptModelBuilder) -> {
            GenomeInterval tXRegion = transcriptModelBuilder.getTXRegion();
            if (tXRegion == null) {
                LOGGER.debug("No transcript region for {}; skipping", str);
                return;
            }
            if (transcriptModelBuilder.getCDSRegion() == null) {
                failIfCdsRegionExpected(transcriptModelBuilder);
                transcriptModelBuilder.setCDSRegion(new GenomeInterval(tXRegion.getGenomeBeginPos(), 0));
            }
            hashMap.put(str, transcriptModelBuilder);
        });
        return hashMap;
    }

    private void failIfCdsRegionExpected(TranscriptModelBuilder transcriptModelBuilder) {
        if (((Boolean) Optional.ofNullable(transcriptModelBuilder.getAccession()).map(str -> {
            return Boolean.valueOf(str.startsWith("NM_") || str.startsWith("XM_"));
        }).orElse(false)).booleanValue()) {
            String str2 = "No CDS region found for coding transcript '" + transcriptModelBuilder.getAccession() + "'.";
            if (!allowNonCodingNm()) {
                throw new IllegalStateException(str2);
            }
            LOGGER.warn(str2);
        }
    }

    private Map<String, TranscriptModelBuilder> mapNonRedundantTranscriptIdsToTranscriptModelBuilder(Map<String, TranscriptModelBuilder> map, Map<String, List<String>> map2) {
        HashMap hashMap = new HashMap(map.size());
        boolean preferPARTranscriptsOnChrX = preferPARTranscriptsOnChrX();
        if (preferPARTranscriptsOnChrX) {
            LOGGER.info("Pseudoautosomal transcripts will be assigned to X chromosome");
        }
        int i = 0;
        for (Map.Entry<String, List<String>> entry : map2.entrySet()) {
            String key = entry.getKey();
            List<String> value = entry.getValue();
            if (!value.isEmpty()) {
                if (value.size() == 1) {
                    hashMap.put(key, map.get(value.get(0)));
                } else {
                    TranscriptModelBuilder transcriptModelBuilder = map.get(value.get(0));
                    TranscriptModelBuilder transcriptModelBuilder2 = map.get(value.get(1));
                    int chr = transcriptModelBuilder.getTXRegion().getChr();
                    int chr2 = transcriptModelBuilder2.getTXRegion().getChr();
                    if (chr == 23 && chr2 == 24) {
                        TranscriptModelBuilder transcriptModelBuilder3 = preferPARTranscriptsOnChrX ? transcriptModelBuilder : transcriptModelBuilder2;
                        LOGGER.info("Assigning pseudoautosomal gene {} transcript {} to chromsome {}", new Object[]{transcriptModelBuilder3.getGeneSymbol(), transcriptModelBuilder3.getAccession(), Integer.valueOf(transcriptModelBuilder3.getTXRegion().getChr())});
                        hashMap.put(key, transcriptModelBuilder3);
                    } else {
                        i++;
                        LOGGER.warn("Transcript {} has {} possible transcript models - picking best by cDNA_match", key, Integer.valueOf(value.size()));
                        Stream<String> stream = value.stream();
                        Objects.requireNonNull(map);
                        hashMap.put(key, findBestModelByCdnaMatch((List) stream.map((v1) -> {
                            return r1.get(v1);
                        }).collect(Collectors.toList())));
                    }
                }
            }
        }
        if (i != 0) {
            LOGGER.warn("{} duplicated transcript ids", Integer.valueOf(i));
        }
        return hashMap;
    }

    private static int countCdnaMatches(TranscriptModelBuilder transcriptModelBuilder) {
        int i;
        int i2;
        int i3 = 0;
        int i4 = 0;
        Iterator<TranscriptModelBuilder.AlignmentPart> it = transcriptModelBuilder.getAlignmentParts().iterator();
        while (it.hasNext()) {
            TranscriptModelBuilder.AlignmentPart next = it.next();
            if (i4 < transcriptModelBuilder.getExonRegions().size()) {
                GenomeInterval withStrand = transcriptModelBuilder.getExonRegions().get(i4).withStrand(Strand.FWD);
                if (next.refBeginPos < next.refEndPos) {
                    i2 = next.refBeginPos;
                    i = next.refEndPos;
                } else {
                    i = next.refBeginPos;
                    i2 = next.refEndPos;
                }
                if (withStrand.getBeginPos() == i2 && withStrand.getEndPos() == i) {
                    i3++;
                }
                i4++;
            }
        }
        return i3;
    }

    private TranscriptModelBuilder findBestModelByCdnaMatch(List<TranscriptModelBuilder> list) {
        TranscriptModelBuilder transcriptModelBuilder = list.get(0);
        int countCdnaMatches = countCdnaMatches(transcriptModelBuilder);
        for (TranscriptModelBuilder transcriptModelBuilder2 : list) {
            int countCdnaMatches2 = countCdnaMatches(transcriptModelBuilder2);
            if (countCdnaMatches2 > countCdnaMatches) {
                transcriptModelBuilder = transcriptModelBuilder2;
                countCdnaMatches = countCdnaMatches2;
            }
        }
        return transcriptModelBuilder;
    }

    static {
        $assertionsDisabled = !RefSeqParser.class.desiredAssertionStatus();
        LOGGER = LoggerFactory.getLogger(RefSeqParser.class);
    }
}
