package de.julielab.jcore.consumer.cas2iob.main;

import de.julielab.jcore.consumer.cas2iob.utils.UIMAUtils;
import de.julielab.jcore.types.Header;
import de.julielab.jcore.types.Paragraph;
import de.julielab.jcore.types.Sentence;
import de.julielab.jcore.types.Token;
import de.julielab.jcore.utility.JCoReAnnotationTools;
import de.julielab.jcore.utility.index.Comparators;
import de.julielab.jcore.utility.index.JCoReTreeMapAnnotationIndex;
import de.julielab.jcore.utility.index.TermGenerators;
import de.julielab.segmentationEvaluator.IOBToken;
import de.julielab.segmentationEvaluator.IOToken;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.Optional;
import java.util.TreeMap;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ResourceMetaData(name = "JCoRe IOB Writer", description = "This component help to write CAS entity or chunk annotations into a text file in IOB format.")
/* loaded from: input_file:de/julielab/jcore/consumer/cas2iob/main/ToIOBConsumer.class */
public class ToIOBConsumer extends JCasAnnotator_ImplBase {
    public static final String PARAM_LABELS = "labels";
    public static final String PARAM_OUTFOLDER = "outFolder";
    public static final String PARAM_LABEL_METHODS = "labelNameMethods";
    public static final String PARAM_IOB_LABEL_NAMES = "iobLabelNames";
    public static final String PARAM_TYPE_PATH = "typePath";
    public static final String PARAM_MODE = "mode";
    public static final String PARAM_ADD_POS = "addPos";
    public static final String PARAM_COLUMN_SEPARATOR = "columnSeparator";
    public static final String PARAM_IOB_MARK_SEPARATOR = "iobMarkSeparator";
    private static final Logger LOGGER = LoggerFactory.getLogger(ToIOBConsumer.class);
    private final String SENTENCE_END_MARK = "SENTENCE_END_MARKER";
    private final String PARAGRAPH_END_MARK = "PARAGRAPH_END_MARKER";

    @ConfigurationParameter(name = PARAM_OUTFOLDER, description = "Path to folder where IOB-files should be written to.")
    String outFolder = null;

    @ConfigurationParameter(name = PARAM_TYPE_PATH, mandatory = false, description = "The path of the UIMA types, e.g. \"de.julielab.jcore.types.\" (with terminating \".\"!). It is prepended to the class names in labelNameMethods. This parameter may be null which is equivalent to the empty String \"\".")
    String typePath = null;

    @ConfigurationParameter(name = PARAM_LABELS, mandatory = false, description = "The labels NOT to be exported into IOB format. Label does here not refer to an UIMA type but to the specific label aquired by the labelNameMethod.")
    String[] labels = null;
    HashMap<String, String> objNameMethMap = null;
    Map<String, String> labelIOBMap = null;
    int id = 1;

    @ConfigurationParameter(name = PARAM_MODE, mandatory = false, description = "This parameter determines whether the IOB or IO annotation schema should be used. The parameter defaults to IOB, the value is not case sensitive.", defaultValue = {"IOB"})
    private String mode = null;

    @ConfigurationParameter(name = PARAM_LABEL_METHODS, description = "This is the primary parameter to define from which types IOB labels should be derived. The parameter expects pairs of UIMA-annotation-type-names and their corresponding method for extracting the annotation label. Format: <annotationName>[\\s=/\\\\|]<method Name>. The annotation name is fully qualified name of the UIMA type. For abbreviation purposes, the \"typePath\" parameter can be used to define a type prefix that will then be prepended to all UIMA type names given in this parameter. So, for example, the prefix \"de.julielab.jcore.types.\" will allow to use the \"specificType\" feature of the \"de.julielab.jcore.types.Gene\" type by providing \"Gene=getSpecificType\".  If the name of the annotation class itself is to be being used as label, only the class name is expected: <annotationName> (here, again, applies the use of the \"typePath\" parameter). You also may specify a mix of pairs and single class names. If you give the name extracting method for a class and have also specified its superclass as a single class name, the given method is used rather than the superclass name.")
    private String[] labelNameMethods;

    @ConfigurationParameter(name = PARAM_IOB_LABEL_NAMES, mandatory = false, description = "Pairs of label names in UIMA (aquired by the methods given in labelNameMethods) and the name the label is supposed to get in the outcoming IOB file. Format: <UIMA label name>[\\s=/\\\\|]&lt;IOB label name&gt;")
    private String[] iobLabelNames;

    @ConfigurationParameter(name = PARAM_ADD_POS, mandatory = false, description = "If set to true and if annotations of (sub-)type de.julielab.jcore.types.POSTag are present in the CAS, the PoS tags will be added to the output file as the second column. Defaults to false.")
    private Boolean addPos;

    @ConfigurationParameter(name = PARAM_COLUMN_SEPARATOR, mandatory = false, description = "The string given with this parameter will be used to separate the columns in the output file. Defaults to a single tab character.", defaultValue = {"\\t"})
    private String separator;

    @ConfigurationParameter(name = PARAM_IOB_MARK_SEPARATOR, mandatory = false, description = "This string will be used to separate the IO(B) mark - i. e. I or B - from the entity or chunk label in the output file. Defaults to an underscore character.")
    private String iobMarkSeparator;

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        LOGGER.info("Initializing...");
        this.labels = (String[]) Optional.ofNullable((String[]) uimaContext.getConfigParameterValue(PARAM_LABELS)).orElse(new String[0]);
        this.outFolder = (String) uimaContext.getConfigParameterValue(PARAM_OUTFOLDER);
        this.labelNameMethods = (String[]) uimaContext.getConfigParameterValue(PARAM_LABEL_METHODS);
        this.iobLabelNames = (String[]) uimaContext.getConfigParameterValue(PARAM_IOB_LABEL_NAMES);
        this.typePath = (String) uimaContext.getConfigParameterValue(PARAM_TYPE_PATH);
        if (this.typePath == null) {
            this.typePath = "";
        }
        this.addPos = (Boolean) Optional.ofNullable((Boolean) uimaContext.getConfigParameterValue(PARAM_ADD_POS)).orElse(false);
        this.separator = (String) Optional.ofNullable((String) uimaContext.getConfigParameterValue(PARAM_COLUMN_SEPARATOR)).orElse("\t");
        this.separator = this.separator.replaceAll("\\\\t", "\t");
        this.iobMarkSeparator = (String) Optional.ofNullable((String) uimaContext.getConfigParameterValue(PARAM_IOB_MARK_SEPARATOR)).orElse("_");
        this.mode = (String) uimaContext.getConfigParameterValue(PARAM_MODE);
        if (this.mode.equals("IOB") || this.mode.equals("iob")) {
            this.mode = "IOB";
        } else {
            if (!this.mode.equals("IO") && !this.mode.equals("io")) {
                throw new ResourceInitializationException();
            }
            this.mode = "IO";
        }
        if (this.labelNameMethods != null) {
            this.objNameMethMap = new HashMap<>();
            for (int i = 0; i < this.labelNameMethods.length; i++) {
                String[] split = this.labelNameMethods[i].split("[\\s=/\\|]");
                if (split.length == 1) {
                    this.objNameMethMap.put(this.typePath + split[0], null);
                } else {
                    this.objNameMethMap.put(this.typePath + split[0], split[1]);
                }
            }
        }
        if (this.iobLabelNames == null) {
            this.labelIOBMap = Collections.emptyMap();
            return;
        }
        this.labelIOBMap = new HashMap();
        for (int i2 = 0; i2 < this.iobLabelNames.length; i2++) {
            String[] split2 = this.iobLabelNames[i2].split("[\\s=/\\|]");
            this.labelIOBMap.put(split2[0], split2[1]);
        }
    }

    public void process(JCas jCas) {
        LOGGER.trace("Converting CAS to IO(B)Tokens...");
        IOToken[] convertToIOB = convertToIOB(jCas);
        LOGGER.trace("Writing IO(B) file...");
        String str = Paths.get(this.outFolder, getDocumentId(jCas)).toString() + ".iob";
        if (Files.notExists(Paths.get(this.outFolder, new String[0]), new LinkOption[0])) {
            new File(this.outFolder).mkdirs();
        }
        try {
            BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(str));
            for (IOToken iOToken : convertToIOB) {
                if (iOToken.getText().equals("") || iOToken.getText().equals("SENTENCE_END_MARKER")) {
                    bufferedWriter.newLine();
                } else if (iOToken.getText().equals("") || iOToken.getText().equals("PARAGRAPH_END_MARKER")) {
                    bufferedWriter.newLine();
                } else {
                    Stream.Builder builder = Stream.builder();
                    builder.accept(iOToken.getText());
                    builder.accept(iOToken.getPos());
                    builder.accept(iOToken.getIobMark().equals("O") ? iOToken.getIobMark() : iOToken.getIobMark() + this.iobMarkSeparator + iOToken.getLabel());
                    bufferedWriter.write((String) builder.build().filter((v0) -> {
                        return Objects.nonNull(v0);
                    }).collect(Collectors.joining(this.separator)));
                    bufferedWriter.newLine();
                }
            }
            bufferedWriter.newLine();
            if (bufferedWriter != null) {
                bufferedWriter.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        LOGGER.trace("The IO(B) file was written to " + str);
    }

    public IOToken[] convertToIOB(JCas jCas) {
        ArrayList arrayList = new ArrayList();
        Iterator[] itArr = new Iterator[this.objNameMethMap.size()];
        Iterator<String> it = this.objNameMethMap.keySet().iterator();
        int i = 0;
        while (it.hasNext()) {
            itArr[i] = jCas.getAnnotationIndex(jCas.getTypeSystem().getType(it.next())).iterator();
            i++;
        }
        TreeMap<Integer, IOToken> treeMap = new TreeMap<>();
        tokenLabeling(treeMap, itArr, jCas);
        FSIterator it2 = jCas.getAnnotationIndex(Paragraph.type).iterator();
        ArrayList arrayList2 = new ArrayList();
        while (it2.hasNext()) {
            arrayList2.add((Paragraph) it2.next());
        }
        Paragraph paragraph = null;
        if (arrayList2.isEmpty()) {
            try {
                paragraph = (Paragraph) JCoReAnnotationTools.getAnnotationByClassName(jCas, Paragraph.class.getName());
            } catch (ClassNotFoundException | IllegalAccessException | IllegalArgumentException | InstantiationException | NoSuchMethodException | SecurityException | InvocationTargetException e) {
                e.printStackTrace();
            }
            paragraph.setBegin(0);
            paragraph.setEnd(jCas.getDocumentText().length());
            paragraph.setComponentId(ToIOBConsumer.class.getCanonicalName());
            paragraph.addToIndexes(jCas);
            arrayList2.add(paragraph);
        }
        int i2 = 0;
        FSIterator it3 = jCas.getAnnotationIndex(Sentence.type).iterator();
        while (it3.hasNext()) {
            Sentence sentence = (Sentence) it3.next();
            int i3 = 0;
            Paragraph paragraph2 = null;
            Iterator it4 = arrayList2.iterator();
            while (it4.hasNext()) {
                Paragraph paragraph3 = (Paragraph) it4.next();
                if (paragraph3.getBegin() <= sentence.getBegin() && paragraph3.getEnd() >= sentence.getEnd()) {
                    paragraph2 = paragraph3;
                }
                ArrayList arrayList3 = (ArrayList) UIMAUtils.getAnnotations(jCas, sentence, new Token(jCas, 0, 0).getClass());
                for (int i4 = 0; i4 < arrayList3.size(); i4++) {
                    Token token = (Token) arrayList3.get(i4);
                    if (i4 == 0 && i2 > 0) {
                        treeMap.put(Integer.valueOf(token.getBegin() - 1), paragraph2 != null ? new IOBToken("PARAGRAPH_END_MARKER", "PARAGRAPH_END_MARKER") : new IOBToken("SENTENCE_END_MARKER", "SENTENCE_END_MARKER"));
                    }
                    if (!treeMap.containsKey(Integer.valueOf(token.getBegin()))) {
                        treeMap.put(Integer.valueOf(token.getBegin()), new IOBToken(token.getCoveredText(), "O", (!this.addPos.booleanValue() || token.getPosTag().size() <= 0) ? null : token.getPosTag(0).getValue()));
                    }
                }
                i2++;
                i3++;
            }
        }
        Iterator<Integer> it5 = treeMap.keySet().iterator();
        while (it5.hasNext()) {
            arrayList.add(treeMap.get(it5.next()));
        }
        IOToken[] iOTokenArr = new IOToken[arrayList.size()];
        if (this.mode.equals("IOB")) {
            iOTokenArr = (IOToken[]) arrayList.toArray(iOTokenArr);
        } else {
            for (int i5 = 0; i5 < arrayList.size(); i5++) {
                iOTokenArr[i5] = ((IOBToken) arrayList.get(i5)).toXIoToken();
            }
        }
        if (paragraph != null) {
            paragraph.removeFromIndexes();
        }
        return iOTokenArr;
    }

    private void tokenLabeling(TreeMap<Integer, IOToken> treeMap, Iterator[] itArr, JCas jCas) {
        Token token;
        loop0: for (Iterator it : itArr) {
            JCoReTreeMapAnnotationIndex jCoReTreeMapAnnotationIndex = new JCoReTreeMapAnnotationIndex(Comparators.longOverlapComparator(), TermGenerators.longOffsetTermGenerator(), TermGenerators.longOffsetTermGenerator(), jCas, Token.type);
            while (it.hasNext()) {
                Annotation annotation = (Annotation) it.next();
                String annotationLabel = getAnnotationLabel(annotation);
                Iterator it2 = jCoReTreeMapAnnotationIndex.searchFuzzy(annotation).iterator();
                try {
                    token = (Token) it2.next();
                } catch (NoSuchElementException e) {
                    LOGGER.warn("no token annotation in label annotation: " + annotation.getCoveredText() + ", " + annotation);
                }
                if (this.addPos.booleanValue() && token.getPosTag() == null) {
                    throw new IllegalStateException("The IOB consumer is configured to add the part of speech tag to each token but the token \"" + token.getCoveredText() + "\", " + token + " doesn't have any (the PoS list is null).");
                    break loop0;
                }
                String value = (!this.addPos.booleanValue() || token.getPosTag().size() <= 0) ? null : token.getPosTag(0).getValue();
                Integer valueOf = Integer.valueOf(token.getBegin());
                if (treeMap.containsKey(valueOf)) {
                    handleCompetingAnnotations(treeMap, annotationLabel, it2, token, valueOf, value);
                } else {
                    treeMap.put(valueOf, new IOBToken(token.getCoveredText(), "B_" + annotationLabel, value));
                    while (it2.hasNext()) {
                        Token token2 = (Token) it2.next();
                        treeMap.put(Integer.valueOf(token2.getBegin()), new IOBToken(token2.getCoveredText(), "I_" + annotationLabel, value));
                    }
                }
            }
        }
    }

    private void handleCompetingAnnotations(TreeMap<Integer, IOToken> treeMap, String str, Iterator it, Token token, Integer num, String str2) {
        int i = 0;
        for (Integer num2 : treeMap.keySet()) {
            IOToken iOToken = treeMap.get(num2);
            if (num2.intValue() >= num.intValue()) {
                if (!iOToken.getLabel().equals(str) || (!iOToken.getIobMark().equals("I") && i > 0)) {
                    break;
                } else {
                    i++;
                }
            }
        }
        HashMap hashMap = new HashMap();
        hashMap.put(new IOBToken(token.getCoveredText(), "B_" + str, str2), num);
        while (it.hasNext()) {
            Token token2 = (Token) it.next();
            hashMap.put(new IOBToken(token2.getCoveredText(), "I_" + str, str2), Integer.valueOf(token2.getBegin()));
        }
        if (hashMap.size() > i) {
            for (IOBToken iOBToken : hashMap.keySet()) {
                treeMap.put((Integer) hashMap.get(iOBToken), iOBToken);
            }
        }
    }

    private String getAnnotationLabel(Annotation annotation) {
        String str = null;
        Class<?> cls = annotation.getClass();
        String str2 = this.objNameMethMap.get(cls.getName());
        try {
            str = str2 == null ? cls.getName() : (String) cls.getMethod(str2, new Class[0]).invoke(annotation, (Object[]) null);
        } catch (NoSuchMethodException e) {
            LOGGER.error("The class \"" + cls.getName() + "\" does not have a method \"" + str2 + "\".");
            e.printStackTrace();
        } catch (Exception e2) {
            e2.printStackTrace();
        }
        if (str != null) {
            for (String str3 : this.labels) {
                if (str.equals(str3)) {
                    str = null;
                }
            }
        }
        if (str != null && this.labelIOBMap.get(str) != null) {
            str = this.labelIOBMap.get(str);
        }
        return str;
    }

    private String getDocumentId(JCas jCas) {
        Header header = null;
        try {
            header = (Header) jCas.getAnnotationIndex(Header.type).iterator().next();
        } catch (NoSuchElementException e) {
            LOGGER.trace("No annotation of type {} found in current CAS", Header.class.getCanonicalName());
        }
        if (header != null) {
            return header.getDocId();
        }
        int i = this.id;
        this.id = i + 1;
        return String.valueOf(i);
    }
}
