package gate.corpora;

import gate.AnnotationSet;
import gate.Document;
import gate.GateConstants;
import gate.Resource;
import gate.creole.ResourceInstantiationException;
import gate.creole.metadata.AutoInstance;
import gate.creole.metadata.CreoleResource;
import gate.util.DocumentFormatException;
import gate.util.InvalidOffsetException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

@CreoleResource(name = "GATE CoNLL Document Format", isPrivate = true, autoinstances = {@AutoInstance(hidden = true)})
/* loaded from: input_file:gate/corpora/ConllDocumentFormat.class */
public class ConllDocumentFormat extends TextualDocumentFormat {
    private static final long serialVersionUID = 5756433194230855515L;
    public static final String ANNOTATION_COLUMN_FEATURE = "column";
    public static final String ANNOTATION_KIND_FEATURE = "kind";
    private static final boolean DEBUG = false;

    @Override // gate.corpora.TextualDocumentFormat, gate.DocumentFormat
    public void unpackMarkup(Document document) throws DocumentFormatException {
        if (document == null || (document.getSourceUrl() == null && document.getContent() == null)) {
            throw new DocumentFormatException("GATE document is null or no content found. Nothing to parse!");
        }
        setNewLineProperty(document);
        String[] split = document.getContent().toString().split("[\\n\\r]+");
        StringBuilder sb = new StringBuilder();
        ArrayList arrayList = new ArrayList();
        HashMap hashMap = new HashMap();
        long j = 0;
        for (String str : split) {
            long j2 = j;
            long length = sb.length();
            String[] split2 = str.split("\\s+");
            if (split2.length == 0) {
                sb.append("\n");
                j = sb.length();
                finishAllTags(hashMap, arrayList, j2);
            } else {
                String str2 = split2[0];
                sb.append(str2);
                j = sb.length();
                sb.append(' ');
                arrayList.add(Annotandum.makeToken(length, j, str2));
                arrayList.add(Annotandum.makeSpaceToken(j));
                for (int i = 1; i < split2.length; i++) {
                    if (split2[i].equals("O")) {
                        finishAllTags(hashMap, arrayList, j2);
                    } else if (split2[i].length() > 2 && split2[i].startsWith("U-")) {
                        String substring = split2[i].substring(2);
                        finishTag(substring, hashMap, arrayList, j2);
                        arrayList.add(new Annotandum(substring, Long.valueOf(length), Long.valueOf(j), i, true));
                    } else if (split2[i].length() > 2 && split2[i].startsWith("L-")) {
                        String substring2 = split2[i].substring(2);
                        if (hashMap.containsKey(substring2)) {
                            hashMap.get(substring2).endOffset = Long.valueOf(j);
                        } else {
                            hashMap.put(substring2, new Annotandum(substring2, Long.valueOf(length), Long.valueOf(j), i, true));
                        }
                        finishTag(substring2, hashMap, arrayList, j);
                    } else if (split2[i].length() > 2 && split2[i].startsWith("B-")) {
                        String substring3 = split2[i].substring(2);
                        finishTag(substring3, hashMap, arrayList, j2);
                        hashMap.put(substring3, new Annotandum(substring3, Long.valueOf(length), Long.valueOf(j), i, true));
                    } else if (split2[i].length() <= 2 || !split2[i].startsWith("I-")) {
                        arrayList.add(new Annotandum(split2[i], Long.valueOf(length), Long.valueOf(j), i, false));
                    } else {
                        String substring4 = split2[i].substring(2);
                        if (hashMap.containsKey(substring4)) {
                            hashMap.get(substring4).endOffset = Long.valueOf(j);
                        } else {
                            hashMap.put(substring4, new Annotandum(substring4, Long.valueOf(length), Long.valueOf(j), i, true));
                        }
                    }
                }
            }
        }
        finishAllTags(hashMap, arrayList, j);
        try {
            document.edit(0L, document.getContent().size(), new DocumentContentImpl(sb.toString()));
            document.getContent().size().longValue();
            AnnotationSet annotations = document.getAnnotations(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
            for (Annotandum annotandum : arrayList) {
                annotations.add(annotandum.startOffset, annotandum.endOffset, annotandum.type, annotandum.features);
            }
        } catch (InvalidOffsetException e) {
            throw new DocumentFormatException(e);
        }
    }

    private void finishAllTags(Map<String, Annotandum> map, List<Annotandum> list, long j) {
        for (Annotandum annotandum : map.values()) {
            if (annotandum.endOffset == null) {
                annotandum.endOffset = Long.valueOf(j);
            }
            list.add(annotandum);
        }
        map.clear();
    }

    private void finishTag(String str, Map<String, Annotandum> map, List<Annotandum> list, long j) {
        Annotandum remove = map.remove(str);
        if (remove != null) {
            if (remove.endOffset == null) {
                remove.endOffset = Long.valueOf(j);
            }
            list.add(remove);
        }
    }

    @Override // gate.corpora.TextualDocumentFormat, gate.creole.AbstractResource, gate.Resource
    public Resource init() throws ResourceInstantiationException {
        MimeType mimeType = new MimeType("text", "x-conll");
        mimeString2ClassHandlerMap.put(mimeType.getType() + "/" + mimeType.getSubtype(), this);
        mimeString2mimeTypeMap.put(mimeType.getType() + "/" + mimeType.getSubtype(), mimeType);
        suffixes2mimeTypeMap.put("conll", mimeType);
        suffixes2mimeTypeMap.put("iob", mimeType);
        setMimeType(mimeType);
        return this;
    }
}
