package com.gengoai.hermes.format;

import com.gengoai.ParamMap;
import com.gengoai.ParameterDef;
import com.gengoai.collection.Maps;
import com.gengoai.conversion.Cast;
import com.gengoai.conversion.Val;
import com.gengoai.hermes.Annotation;
import com.gengoai.hermes.AnnotationType;
import com.gengoai.hermes.Document;
import com.gengoai.hermes.DocumentFactory;
import com.gengoai.hermes.Types;
import com.gengoai.io.resource.Resource;
import com.gengoai.string.StringLike;
import com.gengoai.tuple.Tuples;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import lombok.NonNull;

/* loaded from: input_file:com/gengoai/hermes/format/TaggedFormat.class */
public class TaggedFormat extends WholeFileTextFormat implements OneDocPerFileFormat, Serializable {
    private static final long serialVersionUID = 1;
    private static final Pattern TAG_PATTERN = Pattern.compile("<([a-z_]+)>([^<>]+)</\\1>", 2);
    private static final Pattern WORD_PATTERN = Pattern.compile("\\S+");
    public static final ParameterDef<AnnotationType> ANNOTATION_TYPE = ParameterDef.param("annotationType", AnnotationType.class);
    public static final ParameterDef<Boolean> IS_TOKENIZED = ParameterDef.boolParam("isTokenized");
    private final TaggedParameters parameters;

    /* loaded from: input_file:com/gengoai/hermes/format/TaggedFormat$Provider.class */
    public static class Provider implements DocFormatProvider {
        @Override // com.gengoai.hermes.format.DocFormatProvider
        public DocFormat create(@NonNull DocFormatParameters docFormatParameters) {
            if (docFormatParameters == null) {
                throw new NullPointerException("parameters is marked non-null but is null");
            }
            if (docFormatParameters instanceof TaggedParameters) {
                return new TaggedFormat((TaggedParameters) Cast.as(docFormatParameters));
            }
            throw new IllegalArgumentException("Invalid parameter class, expecting: " + TaggedParameters.class.getName() + ", but received: " + docFormatParameters.getClass().getName());
        }

        @Override // com.gengoai.hermes.format.DocFormatProvider
        public DocFormatParameters getDefaultFormatParameters() {
            return new TaggedParameters();
        }

        @Override // com.gengoai.hermes.format.DocFormatProvider
        public String getName() {
            return "TAGGED";
        }

        @Override // com.gengoai.hermes.format.DocFormatProvider
        public boolean isWriteable() {
            return true;
        }
    }

    /* loaded from: input_file:com/gengoai/hermes/format/TaggedFormat$TaggedParameters.class */
    public static class TaggedParameters extends DocFormatParameters {
        public final ParamMap<DocFormatParameters>.Parameter<AnnotationType> annotationType = parameter(TaggedFormat.ANNOTATION_TYPE, Types.ENTITY);
        public final ParamMap<DocFormatParameters>.Parameter<Boolean> isTokenized = parameter(TaggedFormat.IS_TOKENIZED, false);
    }

    TaggedFormat(TaggedParameters taggedParameters) {
        this.parameters = taggedParameters;
    }

    private int getNextEndOfLine(int i, StringLike stringLike) {
        if (i >= stringLike.length()) {
            return -1;
        }
        int indexOf = stringLike.indexOf("\n", i);
        if (indexOf == -1 && indexOf < stringLike.length()) {
            indexOf = stringLike.length();
        }
        return gobbleEndWhiteSpace(i, indexOf, stringLike);
    }

    @Override // com.gengoai.hermes.format.DocFormat
    public DocFormatParameters getParameters() {
        return this.parameters;
    }

    private int gobbleEndWhiteSpace(int i, int i2, CharSequence charSequence) {
        while (i2 - 1 >= i && Character.isWhitespace(charSequence.charAt(i2 - 1))) {
            i2--;
        }
        if (i2 > i) {
            return i2;
        }
        return -1;
    }

    private int gobbleStartWhiteSpace(int i, CharSequence charSequence) {
        while (i < charSequence.length() && Character.isWhitespace(charSequence.charAt(i))) {
            i++;
        }
        return i;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v19, types: [java.lang.CharSequence, com.gengoai.hermes.Document, com.gengoai.string.StringLike, java.lang.Object] */
    @Override // com.gengoai.hermes.format.WholeFileTextFormat
    protected Stream<Document> readSingleFile(String str) {
        DocumentFactory documentFactory = this.parameters.getDocumentFactory();
        AnnotationType annotationType = (AnnotationType) this.parameters.annotationType.value();
        int i = 0;
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        Matcher matcher = TAG_PATTERN.matcher(str);
        StringBuilder sb = new StringBuilder();
        while (matcher.find()) {
            if (matcher.start() != i) {
                sb.append((CharSequence) str, i, matcher.start());
            }
            i = matcher.end();
            arrayList.add(Integer.valueOf(sb.length()));
            arrayList2.add(Integer.valueOf(sb.length() + matcher.group(2).length()));
            arrayList3.add(matcher.group(1));
            sb.append(matcher.group(2));
        }
        if (i != str.length()) {
            sb.append((CharSequence) str, i, str.length());
        }
        ?? createRaw = documentFactory.createRaw(sb.toString());
        for (int i2 = 0; i2 < arrayList.size(); i2++) {
            createRaw.createAnnotation(annotationType, ((Integer) arrayList.get(i2)).intValue(), ((Integer) arrayList2.get(i2)).intValue(), Maps.hashMapOf(new Map.Entry[]{Tuples.$(annotationType.getTagAttribute(), Val.of(arrayList3.get(i2)).as(annotationType.getTagAttribute().getValueType()))}));
        }
        if (((Boolean) this.parameters.isTokenized.value()).booleanValue()) {
            int i3 = 0;
            int gobbleStartWhiteSpace = gobbleStartWhiteSpace(0, createRaw);
            int nextEndOfLine = getNextEndOfLine(gobbleStartWhiteSpace, createRaw);
            while (true) {
                int i4 = nextEndOfLine;
                if (i4 < 0) {
                    break;
                }
                StringLike createAnnotation = createRaw.createAnnotation(Types.SENTENCE, gobbleStartWhiteSpace, i4, Maps.hashMapOf(new Map.Entry[]{Tuples.$(Types.INDEX, Integer.valueOf(i3))}));
                i3++;
                Matcher matcher2 = WORD_PATTERN.matcher(createAnnotation);
                while (matcher2.find()) {
                    createRaw.createAnnotation(Types.TOKEN, gobbleStartWhiteSpace + matcher2.start(), gobbleStartWhiteSpace + matcher2.end(), Collections.emptyMap());
                }
                gobbleStartWhiteSpace = gobbleStartWhiteSpace(i4 + 1, createRaw);
                nextEndOfLine = getNextEndOfLine(gobbleStartWhiteSpace, createRaw);
            }
            createRaw.setCompleted(Types.TOKEN, "Provided");
            createRaw.setCompleted(Types.SENTENCE, "Provided");
        }
        createRaw.setCompleted(annotationType, "Provided");
        return Stream.of(createRaw);
    }

    @Override // com.gengoai.hermes.format.DocFormat
    public void write(Document document, Resource resource) throws IOException {
        StringBuilder sb = new StringBuilder();
        int i = 0;
        for (Annotation annotation : document.annotations((AnnotationType) this.parameters.annotationType.value())) {
            if (annotation.start() != i) {
                sb.append(document.substring(i, annotation.start()).toString());
            }
            sb.append("<").append(annotation.getTag().label()).append(">").append(annotation.toString()).append("</").append(annotation.getTag().label()).append(">");
            i = annotation.end();
        }
        if (i < document.end()) {
            sb.append(document.substring(i, document.end()).toString());
        }
        resource.write(sb.toString());
    }
}
