package de.charite.compbio.jannovar.impl.parse.refseq;

import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.collect.UnmodifiableIterator;
import de.charite.compbio.jannovar.JannovarException;
import de.charite.compbio.jannovar.UncheckedJannovarException;
import de.charite.compbio.jannovar.data.ReferenceDictionary;
import de.charite.compbio.jannovar.datasource.TranscriptModelBuilderHGNCExtender;
import de.charite.compbio.jannovar.hgnc.AltGeneIDType;
import de.charite.compbio.jannovar.impl.parse.TranscriptParseException;
import de.charite.compbio.jannovar.impl.parse.TranscriptParser;
import de.charite.compbio.jannovar.impl.parse.gtfgff.FeatureRecord;
import de.charite.compbio.jannovar.impl.parse.gtfgff.GFFParser;
import de.charite.compbio.jannovar.impl.util.PathUtil;
import de.charite.compbio.jannovar.reference.GenomeInterval;
import de.charite.compbio.jannovar.reference.Strand;
import de.charite.compbio.jannovar.reference.TranscriptModel;
import de.charite.compbio.jannovar.reference.TranscriptModelBuilder;
import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.annotation.Nullable;
import org.ini4j.Profile;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/charite/compbio/jannovar/impl/parse/refseq/RefSeqParser.class */
public class RefSeqParser implements TranscriptParser {
    private static final Logger LOGGER;
    private static final ImmutableSet<String> TX_LEVEL_FEATURE_TYPES;
    private ReferenceDictionary refDict;
    private final ImmutableMap<String, Integer> contigDict;
    private String basePath;
    private Profile.Section iniSection;
    private final List<String> geneIdentifiers;
    static final /* synthetic */ boolean $assertionsDisabled;

    public RefSeqParser(ReferenceDictionary referenceDictionary, String str, List<String> list, Profile.Section section) {
        this.refDict = referenceDictionary;
        this.contigDict = referenceDictionary.getContigNameToID();
        this.basePath = str;
        this.iniSection = section;
        this.geneIdentifiers = list;
    }

    @Override // de.charite.compbio.jannovar.impl.parse.TranscriptParser
    public ImmutableList<TranscriptModel> run() throws TranscriptParseException {
        Map<String, TranscriptModelBuilder> loadTranscriptModels = loadTranscriptModels(PathUtil.join(this.basePath, getINIFileName("gff")));
        LOGGER.info("Assigning additional HGNC information to {} transcripts..", Integer.valueOf(loadTranscriptModels.size()));
        try {
            new TranscriptModelBuilderHGNCExtender(this.basePath, hGNCRecord -> {
                return Lists.newArrayList(hGNCRecord.getEntrezID());
            }, (v0) -> {
                return v0.getGeneID();
            }).run(loadTranscriptModels);
            for (TranscriptModelBuilder transcriptModelBuilder : loadTranscriptModels.values()) {
                if (transcriptModelBuilder.getAltGeneIDs().isEmpty() && transcriptModelBuilder.getGeneID() != null) {
                    LOGGER.debug("Using UCSC Entrez ID {} for transcript {} as HGNC did not provide alternative gene ID", transcriptModelBuilder.getGeneID(), transcriptModelBuilder.getAccession());
                    transcriptModelBuilder.getAltGeneIDs().put(AltGeneIDType.ENTREZ_ID.toString(), transcriptModelBuilder.getGeneID());
                }
            }
            loadFASTA(loadTranscriptModels, PathUtil.join(this.basePath, getINIFileName("rna")));
            ImmutableList.Builder builder = new ImmutableList.Builder();
            Iterator<Map.Entry<String, TranscriptModelBuilder>> it = loadTranscriptModels.entrySet().iterator();
            while (it.hasNext()) {
                TranscriptModelBuilder value = it.next().getValue();
                if (this.geneIdentifiers == null || this.geneIdentifiers.isEmpty()) {
                    builder.add((ImmutableList.Builder) value.build());
                } else if (this.geneIdentifiers.contains(value.getAccession()) || this.geneIdentifiers.contains(value.getGeneID()) || !Sets.intersection(ImmutableSet.copyOf((Collection) this.geneIdentifiers), ImmutableSet.copyOf((Collection) value.getAltGeneIDs().values())).isEmpty()) {
                    builder.add((ImmutableList.Builder) value.build());
                }
            }
            return builder.build();
        } catch (JannovarException e) {
            throw new UncheckedJannovarException("Problem extending transcripts with HGNC information", e);
        }
    }

    private String getINIFileName(String str) {
        return new File((String) this.iniSection.get(str)).getName();
    }

    /* JADX WARN: Code restructure failed: missing block: B:19:0x0093, code lost:
    
        throw new java.lang.AssertionError();
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    private void loadFASTA(java.util.Map<java.lang.String, de.charite.compbio.jannovar.reference.TranscriptModelBuilder> r7, java.lang.String r8) throws de.charite.compbio.jannovar.impl.parse.TranscriptParseException {
        /*
            Method dump skipped, instructions count: 302
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: de.charite.compbio.jannovar.impl.parse.refseq.RefSeqParser.loadFASTA(java.util.Map, java.lang.String):void");
    }

    private Map<String, TranscriptModelBuilder> loadTranscriptModels(String str) throws TranscriptParseException {
        LOGGER.info("Loading feature records");
        try {
            GFFParser gFFParser = new GFFParser(new File(str));
            HashMap hashMap = new HashMap(200000);
            HashMap hashMap2 = new HashMap(hashMap.size());
            HashSet newHashSet = Sets.newHashSet("exon", "CDS", "stop_codon");
            boolean onlyCurated = onlyCurated();
            int i = 0;
            while (true) {
                try {
                    FeatureRecord next = gFFParser.next();
                    if (next == null) {
                        Map<String, TranscriptModelBuilder> mapTranscriptIdsToTranscriptModels = mapTranscriptIdsToTranscriptModels(hashMap, hashMap2);
                        LOGGER.info("Parsed {} GFF records as {} TranscriptModels", Integer.valueOf(i), Integer.valueOf(mapTranscriptIdsToTranscriptModels.size()));
                        return mapTranscriptIdsToTranscriptModels;
                    }
                    i++;
                    String str2 = next.getAttributes().get("transcript_id");
                    if (onlyCurated && (str2 == null || str2.startsWith("X"))) {
                        LOGGER.debug("Skipping non-curated transcript {}", str2);
                    } else {
                        String str3 = next.getAttributes().get("Parent");
                        if (str3 != null && this.contigDict.containsKey(next.getSeqID()) && newHashSet.contains(next.getType())) {
                            if (hashMap.containsKey(str3)) {
                                updateExonsTxRegionsAndCds(next, hashMap.get(str3));
                            } else {
                                hashMap.put(str3, createNewTranscriptModelBuilder(next, str2));
                                updateTransciptIdToParentIds(hashMap2, str2, str3);
                            }
                        }
                    }
                } catch (IOException e) {
                    throw new TranscriptParseException("Problem parsing GFF file", e);
                }
            }
        } catch (IOException e2) {
            throw new TranscriptParseException("Problem opening GFF file", e2);
        }
    }

    private boolean onlyCurated() {
        return checkFlagInSection(this.iniSection.fetch("onlyCurated"));
    }

    private boolean preferPARTranscriptsOnChrX() {
        return checkFlagInSection(this.iniSection.fetch("preferPARTranscriptsOnChrX"));
    }

    /* JADX WARN: Multi-variable type inference failed */
    private static boolean checkFlagInSection(String str) {
        if (str == null) {
            return false;
        }
        String lowerCase = str.toLowerCase();
        UnmodifiableIterator it = ImmutableList.of("true", "1", "yes").iterator();
        while (it.hasNext()) {
            if (((String) it.next()).equals(lowerCase)) {
                return true;
            }
        }
        return false;
    }

    private void updateTransciptIdToParentIds(Map<String, List<String>> map, String str, String str2) {
        if (str != null) {
            if (!map.containsKey(str)) {
                map.put(str, Lists.newArrayList(str2));
                return;
            }
            List<String> list = map.get(str);
            if (list.contains(str2)) {
                return;
            }
            LOGGER.debug("Adding new parentId {} for transcript {}", str2, str);
            list.add(str2);
        }
    }

    private TranscriptModelBuilder createNewTranscriptModelBuilder(FeatureRecord featureRecord, String str) {
        TranscriptModelBuilder transcriptModelBuilder = new TranscriptModelBuilder();
        transcriptModelBuilder.setStrand(parseStrand(featureRecord));
        transcriptModelBuilder.setAccession(str);
        transcriptModelBuilder.setTxVersion(featureRecord.getAttributes().get("transcript_version"));
        transcriptModelBuilder.setGeneID(parseGeneID(featureRecord));
        transcriptModelBuilder.setGeneSymbol(featureRecord.getAttributes().get("gene"));
        transcriptModelBuilder.setSequence(str);
        updateExonsTxRegionsAndCds(featureRecord, transcriptModelBuilder);
        return transcriptModelBuilder;
    }

    @Nullable
    private String parseGeneID(FeatureRecord featureRecord) {
        Iterator<String> it = Splitter.on(',').split(featureRecord.getAttributes().get("Dbxref")).iterator();
        while (it.hasNext()) {
            List<String> splitToList = Splitter.on(':').limit(2).splitToList(it.next());
            if (splitToList.size() == 2 && splitToList.get(0).equals("GeneID")) {
                return splitToList.get(1);
            }
        }
        return null;
    }

    private void updateExonsTxRegionsAndCds(FeatureRecord featureRecord, TranscriptModelBuilder transcriptModelBuilder) {
        Strand parseStrand = parseStrand(featureRecord);
        if (featureRecord.getType().equals("exon")) {
            GenomeInterval buildGenomeInterval = buildGenomeInterval(featureRecord, parseStrand);
            transcriptModelBuilder.setTXRegion(updateGenomeInterval(buildGenomeInterval, transcriptModelBuilder.getTXRegion()));
            transcriptModelBuilder.addExonRegion(buildGenomeInterval);
        } else if ("CDS".equals(featureRecord.getType()) || "stop_codon".equals(featureRecord.getType())) {
            transcriptModelBuilder.setCDSRegion(updateGenomeInterval(buildGenomeInterval(featureRecord, parseStrand), transcriptModelBuilder.getCDSRegion()));
        }
    }

    private GenomeInterval updateGenomeInterval(GenomeInterval genomeInterval, GenomeInterval genomeInterval2) {
        return genomeInterval2 == null ? genomeInterval : genomeInterval2.union(genomeInterval);
    }

    private Strand parseStrand(FeatureRecord featureRecord) {
        return featureRecord.getStrand() == FeatureRecord.Strand.FORWARD ? Strand.FWD : Strand.REV;
    }

    private GenomeInterval buildGenomeInterval(FeatureRecord featureRecord, Strand strand) {
        return new GenomeInterval(this.refDict, Strand.FWD, this.contigDict.get(featureRecord.getSeqID()).intValue(), featureRecord.getBegin(), featureRecord.getEnd()).withStrand(strand);
    }

    private Map<String, TranscriptModelBuilder> mapTranscriptIdsToTranscriptModels(Map<String, TranscriptModelBuilder> map, Map<String, List<String>> map2) {
        return mapNonRedundantTranscriptIdsToTranscriptModelBuilder(mapParentIdsToTranscriptModelsWithTxRegion(map), map2);
    }

    private Map<String, TranscriptModelBuilder> mapParentIdsToTranscriptModelsWithTxRegion(Map<String, TranscriptModelBuilder> map) {
        HashMap hashMap = new HashMap(map.size());
        map.forEach((str, transcriptModelBuilder) -> {
            GenomeInterval tXRegion = transcriptModelBuilder.getTXRegion();
            if (tXRegion == null) {
                LOGGER.debug("No transcript region for {}; skipping", str);
                return;
            }
            if (transcriptModelBuilder.getCDSRegion() == null) {
                transcriptModelBuilder.setCDSRegion(new GenomeInterval(tXRegion.getGenomeBeginPos(), 0));
            }
            hashMap.put(str, transcriptModelBuilder);
        });
        return hashMap;
    }

    private Map<String, TranscriptModelBuilder> mapNonRedundantTranscriptIdsToTranscriptModelBuilder(Map<String, TranscriptModelBuilder> map, Map<String, List<String>> map2) {
        HashMap hashMap = new HashMap(map.size());
        boolean preferPARTranscriptsOnChrX = preferPARTranscriptsOnChrX();
        if (preferPARTranscriptsOnChrX) {
            LOGGER.info("Pseudoautosomal transcripts will be assigned to X chromosome");
        }
        int i = 0;
        for (Map.Entry<String, List<String>> entry : map2.entrySet()) {
            String key = entry.getKey();
            List<String> value = entry.getValue();
            if (!value.isEmpty()) {
                if (value.size() == 1) {
                    hashMap.put(key, map.get(value.get(0)));
                } else {
                    TranscriptModelBuilder transcriptModelBuilder = map.get(value.get(0));
                    TranscriptModelBuilder transcriptModelBuilder2 = map.get(value.get(1));
                    int chr = transcriptModelBuilder.getTXRegion().getChr();
                    int chr2 = transcriptModelBuilder2.getTXRegion().getChr();
                    if (chr == 23 && chr2 == 24) {
                        TranscriptModelBuilder transcriptModelBuilder3 = preferPARTranscriptsOnChrX ? transcriptModelBuilder : transcriptModelBuilder2;
                        LOGGER.info("Assigning pseudoautosomal gene {} transcript {} to chromsome {}", transcriptModelBuilder3.getGeneSymbol(), transcriptModelBuilder3.getAccession(), Integer.valueOf(transcriptModelBuilder3.getTXRegion().getChr()));
                        hashMap.put(key, transcriptModelBuilder3);
                    } else {
                        i++;
                        LOGGER.warn("Transcript {} has {} possible transcript models - using the longest model", key, Integer.valueOf(value.size()));
                        Stream<String> stream = value.stream();
                        map.getClass();
                        hashMap.put(key, findLongestTranscriptRegion((List) stream.map((v1) -> {
                            return r1.get(v1);
                        }).collect(Collectors.toList())));
                    }
                }
            }
        }
        if (i != 0) {
            LOGGER.warn("{} duplicated transcript ids", Integer.valueOf(i));
        }
        return hashMap;
    }

    private TranscriptModelBuilder findLongestTranscriptRegion(List<TranscriptModelBuilder> list) {
        TranscriptModelBuilder transcriptModelBuilder = list.get(0);
        for (TranscriptModelBuilder transcriptModelBuilder2 : list) {
            if (transcriptModelBuilder2.getTXRegion().length() > transcriptModelBuilder.getTXRegion().length()) {
                transcriptModelBuilder = transcriptModelBuilder2;
            }
        }
        return transcriptModelBuilder;
    }

    static {
        $assertionsDisabled = !RefSeqParser.class.desiredAssertionStatus();
        LOGGER = LoggerFactory.getLogger((Class<?>) RefSeqParser.class);
        TX_LEVEL_FEATURE_TYPES = ImmutableSet.of("mRNA", "ncRNA", "rRNA", "tRNA", "primary_transcript", "transcript", new String[0]);
    }
}
