package de.julielab.jcore.reader.pmc.parser;

import com.ximpleware.NavException;
import com.ximpleware.VTDException;
import com.ximpleware.VTDGen;
import com.ximpleware.VTDNav;
import de.julielab.xml.JulieXMLTools;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.zip.GZIPInputStream;
import org.apache.commons.lang.StringUtils;
import org.apache.uima.jcas.JCas;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.yaml.snakeyaml.Yaml;

/* loaded from: input_file:de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.class */
public class NxmlDocumentParser extends NxmlParser {
    private static final Logger log;
    protected JCas cas;
    private Map<String, NxmlElementParser> parserRegistry;
    private DefaultElementParser defaultElementParser;
    private Map<String, Map<String, Object>> tagProperties;
    private Tagset tagset;
    private Object currentSource;
    static final /* synthetic */ boolean $assertionsDisabled;

    /* loaded from: input_file:de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser$Tagset.class */
    public enum Tagset {
        JATS_1_0,
        JATS_1_2_MATH_ML_3,
        JATS_1_3,
        NLM_2_3,
        NLM_3_0
    }

    public void reset(File file, JCas jCas) throws DocumentParsingException {
        reset(file.toURI(), jCas);
        this.currentSource = file;
    }

    public Object getCurrentSource() {
        return this.currentSource;
    }

    public void reset(URI uri, JCas jCas) throws DocumentParsingException {
        boolean z = uri.toString().endsWith(".gz") || uri.toString().endsWith(".gzip");
        try {
            log.debug("Reading from URL {}", uri.toURL());
            InputStream openStream = uri.toURL().openStream();
            if (z) {
                openStream = new GZIPInputStream(openStream);
            }
            reset(openStream, jCas);
            this.currentSource = uri;
        } catch (IOException e) {
            throw new DocumentParsingException(e);
        }
    }

    public void reset(InputStream inputStream, JCas jCas) throws DocumentParsingException {
        this.cas = jCas;
        try {
            byte[] readStream = JulieXMLTools.readStream(inputStream, 8192);
            VTDGen vTDGen = new VTDGen();
            vTDGen.setDoc(readStream);
            vTDGen.enableIgnoredWhiteSpace(true);
            vTDGen.parse(false);
            this.vn = vTDGen.getNav();
            setTagset();
            setupParserRegistry();
            this.currentSource = "<input stream>";
        } catch (IOException | VTDException e) {
            throw new DocumentParsingException(e);
        }
    }

    private void setTagset() throws NavException, DocTypeNotFoundException {
        for (int i = 0; i < this.vn.getTokenCount(); i++) {
            if (this.vn.getTokenType(i) == 12) {
                String replaceAll = StringUtils.normalizeSpace(this.vn.toString(i)).replaceAll("'", "\"");
                if (replaceAll.contains("JATS-archivearticle1.dtd")) {
                    this.tagset = Tagset.JATS_1_0;
                    return;
                }
                if (replaceAll.contains("JATS-archivearticle1-mathml3.dtd")) {
                    this.tagset = Tagset.JATS_1_2_MATH_ML_3;
                    return;
                }
                if (replaceAll.contains("JATS-archivearticle1-3-mathml3.dtd")) {
                    this.tagset = Tagset.JATS_1_3;
                    return;
                }
                if (replaceAll.contains("journalpublishing.dtd") || replaceAll.contains("archivearticle.dtd")) {
                    this.tagset = Tagset.NLM_2_3;
                    return;
                }
                if (replaceAll.contains("journalpublishing3.dtd") || replaceAll.contains("archivearticle3.dtd")) {
                    this.tagset = Tagset.NLM_3_0;
                    return;
                }
                if (replaceAll.contains("JATS")) {
                    log.warn("Unknown document type: {}. Assigning the latest JATS tagset in assumption of backward compatibility.", replaceAll);
                    this.tagset = Tagset.JATS_1_3;
                    return;
                } else {
                    if (replaceAll.contains("journalpublishing") || replaceAll.contains("archivearticle")) {
                        log.warn("Unknown document type: {}. Assigning the latest NLM tagset in assumption of backward compatibility.", replaceAll);
                        this.tagset = Tagset.NLM_3_0;
                        return;
                    }
                    return;
                }
            }
        }
        throw new DocTypeNotFoundException("Could not find a known doctype.");
    }

    private void setupParserRegistry() {
        this.defaultElementParser = new DefaultElementParser(this);
        this.parserRegistry = new HashMap();
        this.parserRegistry.put("front", new FrontParser(this));
        this.parserRegistry.put("contrib-group", new ContribGroupParser(this));
        this.parserRegistry.put("contrib", new ContribParser(this));
        this.parserRegistry.put("sec", new SectionParser(this));
        this.parserRegistry.put("table-wrap", new TableWrapParser(this));
        this.parserRegistry.put("fig", new FigParser(this));
        this.parserRegistry.put("list", new ListParser(this));
        this.parserRegistry.put("xref", new XRefParser(this));
    }

    public VTDNav getVn() {
        return this.vn;
    }

    public Tagset getTagset() {
        return this.tagset;
    }

    public Map<String, NxmlElementParser> getParserRegistry() {
        return this.parserRegistry;
    }

    public ElementParsingResult parse() throws ElementParsingException, DocumentParsingException {
        try {
            String moveToNextStartingTag = moveToNextStartingTag();
            if ($assertionsDisabled || moveToNextStartingTag.equals("article")) {
                return getParser(moveToNextStartingTag).parse();
            }
            throw new AssertionError("Did not encounter an article element as first start element");
        } catch (Exception e) {
            log.error("Exception while parsing document from source {}", this.currentSource);
            throw e;
        }
    }

    public NxmlElementParser getParser(String str) {
        return this.parserRegistry.getOrDefault(str, this.defaultElementParser);
    }

    public Map<String, Object> getTagProperties(String str) {
        return this.tagProperties != null ? this.tagProperties.getOrDefault(str, Collections.emptyMap()) : Collections.emptyMap();
    }

    public void loadElementPropertyFile(String str) throws IOException {
        Yaml yaml = new Yaml();
        InputStream resourceAsStream = getClass().getResourceAsStream(str.startsWith("/") ? str : "/" + str);
        if (resourceAsStream == null && new File(str).exists()) {
            resourceAsStream = new FileInputStream(str);
        }
        if (resourceAsStream == null) {
            throw new IOException("Resource " + str + " could neither be found as a file nor as a classpath resource");
        }
        Iterator it = yaml.loadAll(resourceAsStream).iterator();
        while (it.hasNext()) {
            this.tagProperties = (Map) it.next();
        }
    }

    static {
        $assertionsDisabled = !NxmlDocumentParser.class.desiredAssertionStatus();
        log = LoggerFactory.getLogger(NxmlDocumentParser.class);
    }
}
