package de.julielab.jcore.reader.pmc;

import de.julielab.jcore.reader.pmc.parser.DocumentParsingException;
import de.julielab.jcore.reader.pmc.parser.ElementParsingException;
import de.julielab.jcore.reader.pmc.parser.ElementParsingResult;
import de.julielab.jcore.reader.pmc.parser.NxmlDocumentParser;
import de.julielab.jcore.reader.pmc.parser.ParsingResult;
import de.julielab.jcore.reader.pmc.parser.TextParsingResult;
import de.julielab.jcore.types.Header;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.commons.lang3.StringUtils;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/julielab/jcore/reader/pmc/CasPopulator.class */
public class CasPopulator {
    private NxmlDocumentParser nxmlDocumentParser;
    private Iterator<URI> nxmlIterator;
    private int truncationSize;
    private static final Logger log = LoggerFactory.getLogger(CasPopulator.class);
    private static final String LINESEP = System.getProperty("line.separator");

    public CasPopulator(Iterator<URI> it, Boolean bool, int i) throws IOException {
        this.nxmlIterator = it;
        this.truncationSize = i;
        this.nxmlDocumentParser = new NxmlDocumentParser();
        this.nxmlDocumentParser.loadElementPropertyFile(bool.booleanValue() ? "/de/julielab/jcore/reader/pmc/resources/elementproperties-no-bib-refs.yml" : "/de/julielab/jcore/reader/pmc/resources/elementproperties.yml");
    }

    public CasPopulator(Boolean bool, int i) throws IOException {
        this(null, bool, i);
    }

    public CasPopulator(Boolean bool) throws IOException {
        this(null, bool, Integer.MAX_VALUE);
    }

    public CasPopulator(Iterator<URI> it, boolean z) throws IOException {
        this(it, Boolean.valueOf(z), Integer.MAX_VALUE);
    }

    public void populateCas(URI uri, JCas jCas) throws ElementParsingException, NoDataAvailableException {
        ElementParsingResult elementParsingResult = null;
        URI uri2 = uri;
        while (uri2 != null && elementParsingResult == null) {
            try {
                this.nxmlDocumentParser.reset(uri2, jCas);
                elementParsingResult = this.nxmlDocumentParser.parse();
            } catch (DocumentParsingException e) {
                log.warn("Error occurred when trying to read from URI {} (ASCII string: {}): {}. Skipping document.", new Object[]{uri2, uri2.toASCIIString(), e.getMessage()});
                if (!this.nxmlIterator.hasNext()) {
                    log.warn("Cannot just skip the errored document because there is no next document currently available. Returning without adding any data to the CAS.");
                    throw new NoDataAvailableException("Cannot just skip the errored document because there is no next document currently available. Returning without adding any data to the CAS.");
                }
                uri2 = this.nxmlIterator.next();
            }
        }
        truncateTextAndAnnotations(populateCas(elementParsingResult, new StringBuilder()).toString(), jCas);
    }

    private void truncateTextAndAnnotations(String str, JCas jCas) {
        String substring = str.length() > this.truncationSize ? str.substring(0, this.truncationSize) : str;
        jCas.setDocumentText(substring);
        ArrayList arrayList = new ArrayList();
        if (substring.length() < str.length()) {
            FSIterator it = jCas.getAnnotationIndex().iterator();
            while (it.hasNext()) {
                Annotation annotation = (Annotation) it.next();
                if (annotation.getEnd() > substring.length()) {
                    if (annotation instanceof Header) {
                        annotation.removeFromIndexes();
                        if (annotation.getBegin() > substring.length()) {
                            annotation.setBegin(0);
                        }
                        annotation.setEnd(substring.length());
                        annotation.addToIndexes();
                    } else {
                        arrayList.add(annotation);
                    }
                }
            }
        }
        arrayList.forEach((v0) -> {
            v0.removeFromIndexes();
        });
    }

    private String truncateText(String str) {
        return str.length() > this.truncationSize ? str.substring(0, this.truncationSize) : str;
    }

    public void populateCas(InputStream inputStream, JCas jCas) throws ElementParsingException, NoDataAvailableException {
        try {
            this.nxmlDocumentParser.reset(inputStream, jCas);
            truncateTextAndAnnotations(populateCas(this.nxmlDocumentParser.parse(), new StringBuilder()).toString(), jCas);
        } catch (DocumentParsingException e) {
            throw new NoDataAvailableException(e);
        }
    }

    private StringBuilder populateCas(ParsingResult parsingResult, StringBuilder sb) {
        switch (parsingResult.getResultType()) {
            case ELEMENT:
                ElementParsingResult elementParsingResult = (ElementParsingResult) parsingResult;
                boolean z = elementParsingResult.isBlockElement() || ((Boolean) this.nxmlDocumentParser.getTagProperties(elementParsingResult.getElementName()).getOrDefault(ElementProperties.BLOCK_ELEMENT, false)).booleanValue();
                if (z && sb.length() > 0 && sb.charAt(sb.length() - 1) != '\n') {
                    sb.append("\n");
                }
                int length = sb.length();
                Iterator<ParsingResult> it = elementParsingResult.getSubResults().iterator();
                while (it.hasNext()) {
                    populateCas(it.next(), sb);
                }
                int length2 = sb.length();
                if (z && sb.length() > 0 && sb.charAt(sb.length() - 1) != '\n') {
                    sb.append("\n");
                }
                Annotation annotation = elementParsingResult.getAnnotation();
                if (annotation != null) {
                    annotation.setBegin(length);
                    annotation.setEnd(length2);
                    if (elementParsingResult.addAnnotationToIndexes()) {
                        annotation.addToIndexes();
                        break;
                    }
                }
                break;
            case TEXT:
                String text = ((TextParsingResult) parsingResult).getText();
                boolean z2 = text.isEmpty() ? false : Character.isWhitespace(text.charAt(0)) && !text.startsWith(LINESEP);
                boolean z3 = text.isEmpty() ? false : Character.isWhitespace(text.charAt(text.length() - 1)) && !text.endsWith(LINESEP);
                boolean isWhitespace = sb.length() == 0 ? false : Character.isWhitespace(sb.charAt(sb.length() - 1));
                if (z2 && !isWhitespace) {
                    sb.append(" ");
                }
                sb.append(StringUtils.normalizeSpace(text));
                if (z3) {
                    sb.append(" ");
                    break;
                }
                break;
        }
        return sb;
    }
}
