package com.gengoai.hermes.extraction;

import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.gengoai.Validation;
import com.gengoai.conversion.Cast;
import com.gengoai.function.SerializablePredicate;
import com.gengoai.hermes.Annotation;
import com.gengoai.hermes.AnnotationType;
import com.gengoai.hermes.HString;
import com.gengoai.hermes.extraction.MultiPhaseExtractor;
import com.gengoai.hermes.extraction.lyre.LyreExpression;
import com.gengoai.hermes.ml.feature.ValueCalculator;
import com.gengoai.stream.Streams;
import com.gengoai.tuple.Tuple;
import com.gengoai.tuple.Tuple0;
import com.gengoai.tuple.Tuples;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import lombok.NonNull;

@JsonDeserialize(as = NGramExtractor.class)
/* loaded from: input_file:com/gengoai/hermes/extraction/NGramExtractor.class */
public class NGramExtractor extends MultiPhaseExtractor {
    private static final long serialVersionUID = 1;
    private int maxOrder;
    private int minOrder;

    /* loaded from: input_file:com/gengoai/hermes/extraction/NGramExtractor$Builder.class */
    public static class Builder extends MultiPhaseExtractor.MultiPhaseExtractorBuilder<NGramExtractor, Builder> {
        private int maxOrder = 1;
        private int minOrder = 1;

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // com.gengoai.hermes.extraction.MultiPhaseExtractor.MultiPhaseExtractorBuilder
        public NGramExtractor build() {
            Validation.checkArgument(this.minOrder > 0, "minOrder must be greater than or equal to 1");
            Validation.checkArgument(this.maxOrder >= this.minOrder, "maxOrder must be greater than or equal to minOrder");
            return new NGramExtractor(this.minOrder, this.maxOrder, this.annotationTypes, this.filter, this.prefix, this.toString, this.trim, this.valueCalculator);
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // com.gengoai.hermes.extraction.MultiPhaseExtractor.MultiPhaseExtractorBuilder
        public Builder fromExtractor(@NonNull MultiPhaseExtractor multiPhaseExtractor) {
            if (multiPhaseExtractor == null) {
                throw new NullPointerException("extractor is marked non-null but is null");
            }
            NGramExtractor nGramExtractor = (NGramExtractor) Cast.as(multiPhaseExtractor);
            return ((Builder) super.fromExtractor(multiPhaseExtractor)).minOrder(nGramExtractor.minOrder).maxOrder(nGramExtractor.maxOrder);
        }

        public Builder maxOrder(int i) {
            Validation.checkArgument(i > 0, "Max Order must be greater than 0");
            this.maxOrder = i;
            this.minOrder = Math.min(this.minOrder, i);
            return this;
        }

        public Builder minOrder(int i) {
            Validation.checkArgument(i > 0, "Min Order must be greater than 0");
            this.minOrder = i;
            this.maxOrder = Math.max(i, this.maxOrder);
            return this;
        }

        /* JADX WARN: Type inference failed for: r0v1, types: [com.gengoai.hermes.extraction.MultiPhaseExtractor$MultiPhaseExtractorBuilder, com.gengoai.hermes.extraction.NGramExtractor$Builder] */
        @Override // com.gengoai.hermes.extraction.MultiPhaseExtractor.MultiPhaseExtractorBuilder
        public /* bridge */ /* synthetic */ Builder valueCalculator(ValueCalculator valueCalculator) {
            return super.valueCalculator(valueCalculator);
        }

        /* JADX WARN: Type inference failed for: r0v1, types: [com.gengoai.hermes.extraction.MultiPhaseExtractor$MultiPhaseExtractorBuilder, com.gengoai.hermes.extraction.NGramExtractor$Builder] */
        @Override // com.gengoai.hermes.extraction.MultiPhaseExtractor.MultiPhaseExtractorBuilder
        public /* bridge */ /* synthetic */ Builder trim(LyreExpression lyreExpression) {
            return super.trim(lyreExpression);
        }

        /* JADX WARN: Type inference failed for: r0v1, types: [com.gengoai.hermes.extraction.MultiPhaseExtractor$MultiPhaseExtractorBuilder, com.gengoai.hermes.extraction.NGramExtractor$Builder] */
        @Override // com.gengoai.hermes.extraction.MultiPhaseExtractor.MultiPhaseExtractorBuilder
        public /* bridge */ /* synthetic */ Builder trim(String str) {
            return super.trim(str);
        }

        /* JADX WARN: Type inference failed for: r0v1, types: [com.gengoai.hermes.extraction.MultiPhaseExtractor$MultiPhaseExtractorBuilder, com.gengoai.hermes.extraction.NGramExtractor$Builder] */
        @Override // com.gengoai.hermes.extraction.MultiPhaseExtractor.MultiPhaseExtractorBuilder
        public /* bridge */ /* synthetic */ Builder toString(LyreExpression lyreExpression) {
            return super.toString(lyreExpression);
        }

        /* JADX WARN: Type inference failed for: r0v1, types: [com.gengoai.hermes.extraction.MultiPhaseExtractor$MultiPhaseExtractorBuilder, com.gengoai.hermes.extraction.NGramExtractor$Builder] */
        @Override // com.gengoai.hermes.extraction.MultiPhaseExtractor.MultiPhaseExtractorBuilder
        public /* bridge */ /* synthetic */ Builder toString(String str) {
            return super.toString(str);
        }

        /* JADX WARN: Type inference failed for: r0v1, types: [com.gengoai.hermes.extraction.MultiPhaseExtractor$MultiPhaseExtractorBuilder, com.gengoai.hermes.extraction.NGramExtractor$Builder] */
        @Override // com.gengoai.hermes.extraction.MultiPhaseExtractor.MultiPhaseExtractorBuilder
        public /* bridge */ /* synthetic */ Builder toLowerCase() {
            return super.toLowerCase();
        }

        /* JADX WARN: Type inference failed for: r0v1, types: [com.gengoai.hermes.extraction.MultiPhaseExtractor$MultiPhaseExtractorBuilder, com.gengoai.hermes.extraction.NGramExtractor$Builder] */
        @Override // com.gengoai.hermes.extraction.MultiPhaseExtractor.MultiPhaseExtractorBuilder
        public /* bridge */ /* synthetic */ Builder toLemma() {
            return super.toLemma();
        }

        /* JADX WARN: Type inference failed for: r0v1, types: [com.gengoai.hermes.extraction.MultiPhaseExtractor$MultiPhaseExtractorBuilder, com.gengoai.hermes.extraction.NGramExtractor$Builder] */
        @Override // com.gengoai.hermes.extraction.MultiPhaseExtractor.MultiPhaseExtractorBuilder
        public /* bridge */ /* synthetic */ Builder prefix(String str) {
            return super.prefix(str);
        }

        /* JADX WARN: Type inference failed for: r0v1, types: [com.gengoai.hermes.extraction.MultiPhaseExtractor$MultiPhaseExtractorBuilder, com.gengoai.hermes.extraction.NGramExtractor$Builder] */
        @Override // com.gengoai.hermes.extraction.MultiPhaseExtractor.MultiPhaseExtractorBuilder
        public /* bridge */ /* synthetic */ Builder ignoreStopwords() {
            return super.ignoreStopwords();
        }

        /* JADX WARN: Type inference failed for: r0v1, types: [com.gengoai.hermes.extraction.MultiPhaseExtractor$MultiPhaseExtractorBuilder, com.gengoai.hermes.extraction.NGramExtractor$Builder] */
        @Override // com.gengoai.hermes.extraction.MultiPhaseExtractor.MultiPhaseExtractorBuilder
        public /* bridge */ /* synthetic */ Builder filter(String str) {
            return super.filter(str);
        }

        /* JADX WARN: Type inference failed for: r0v1, types: [com.gengoai.hermes.extraction.MultiPhaseExtractor$MultiPhaseExtractorBuilder, com.gengoai.hermes.extraction.NGramExtractor$Builder] */
        @Override // com.gengoai.hermes.extraction.MultiPhaseExtractor.MultiPhaseExtractorBuilder
        public /* bridge */ /* synthetic */ Builder filter(LyreExpression lyreExpression) {
            return super.filter(lyreExpression);
        }

        /* JADX WARN: Type inference failed for: r0v1, types: [com.gengoai.hermes.extraction.MultiPhaseExtractor$MultiPhaseExtractorBuilder, com.gengoai.hermes.extraction.NGramExtractor$Builder] */
        @Override // com.gengoai.hermes.extraction.MultiPhaseExtractor.MultiPhaseExtractorBuilder
        public /* bridge */ /* synthetic */ Builder annotations(AnnotationType[] annotationTypeArr) {
            return super.annotations(annotationTypeArr);
        }

        /* JADX WARN: Type inference failed for: r0v1, types: [com.gengoai.hermes.extraction.MultiPhaseExtractor$MultiPhaseExtractorBuilder, com.gengoai.hermes.extraction.NGramExtractor$Builder] */
        @Override // com.gengoai.hermes.extraction.MultiPhaseExtractor.MultiPhaseExtractorBuilder
        public /* bridge */ /* synthetic */ Builder annotations(List list) {
            return super.annotations((List<AnnotationType>) list);
        }
    }

    /* loaded from: input_file:com/gengoai/hermes/extraction/NGramExtractor$NGramHStringIterator.class */
    private class NGramHStringIterator implements Iterator<HString> {
        private final List<Annotation> annotations;
        private final LinkedList<HString> buffer = new LinkedList<>();
        private int i = 0;

        private NGramHStringIterator(List<Annotation> list) {
            this.annotations = list;
            advance();
        }

        private boolean advance() {
            while (this.i < this.annotations.size() && this.buffer.isEmpty()) {
                for (int minOrder = (this.i + NGramExtractor.this.getMinOrder()) - 1; minOrder < this.annotations.size() && minOrder < this.i + NGramExtractor.this.getMaxOrder(); minOrder++) {
                    HString union = this.annotations.get(this.i).union(this.annotations.get(minOrder));
                    if (!union.isEmpty()) {
                        this.buffer.add(union);
                    }
                }
                this.i++;
            }
            return !this.buffer.isEmpty();
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            return advance();
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.Iterator
        public HString next() {
            if (advance()) {
                return this.buffer.removeFirst();
            }
            throw new NoSuchElementException();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/gengoai/hermes/extraction/NGramExtractor$NGramTupleIterator.class */
    public class NGramTupleIterator implements Iterator<Tuple> {
        private final List<Annotation> annotations;
        private final LinkedList<Tuple> buffer = new LinkedList<>();
        private int i = 0;

        private NGramTupleIterator(List<Annotation> list) {
            this.annotations = list;
            advance();
        }

        private Tuple add(Tuple tuple) {
            if (tuple.degree() >= NGramExtractor.this.getMinOrder() && tuple.degree() <= NGramExtractor.this.getMaxOrder()) {
                this.buffer.add((Tuple) tuple.copy());
            }
            return tuple;
        }

        private boolean advance() {
            while (this.i < this.annotations.size() && this.buffer.isEmpty()) {
                Tuple add = add(Tuples.$(this.annotations.get(this.i)));
                for (int i = 1; i <= NGramExtractor.this.getMaxOrder() && i + this.i < this.annotations.size(); i++) {
                    add = add(add.appendRight(this.annotations.get(i + this.i)));
                }
                this.i++;
            }
            return !this.buffer.isEmpty();
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            return advance();
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.Iterator
        public Tuple next() {
            if (advance()) {
                return this.buffer.removeFirst();
            }
            throw new NoSuchElementException();
        }
    }

    public static Builder bigrams() {
        return builder(2, 2);
    }

    public static Builder builder() {
        return new Builder();
    }

    public static Builder builder(int i) {
        return new Builder().minOrder(i);
    }

    public static Builder builder(int i, int i2) {
        return new Builder().minOrder(i).maxOrder(i2);
    }

    public static Builder trigrams() {
        return builder(3, 3);
    }

    private NGramExtractor(int i, int i2, AnnotationType[] annotationTypeArr, LyreExpression lyreExpression, String str, LyreExpression lyreExpression2, LyreExpression lyreExpression3, ValueCalculator valueCalculator) {
        super(annotationTypeArr, lyreExpression, str, lyreExpression2, lyreExpression3, valueCalculator);
        this.minOrder = i;
        this.maxOrder = i2;
    }

    @Override // com.gengoai.hermes.extraction.MultiPhaseExtractor
    protected Stream<HString> createStream(HString hString) {
        return Streams.asStream(new NGramHStringIterator(hString.interleaved(getAnnotationTypes())));
    }

    public List<Tuple> extractStringTuples(@NonNull HString hString) {
        if (hString == null) {
            throw new NullPointerException("hString is marked non-null but is null");
        }
        return (List) tupleStream(hString).map(tuple -> {
            LyreExpression toString = getToString();
            Objects.requireNonNull(toString);
            return tuple.mapValues(toString::applyAsString);
        }).collect(Collectors.toList());
    }

    @Override // com.gengoai.hermes.extraction.MultiPhaseExtractor
    public Builder toBuilder() {
        return builder().fromExtractor((MultiPhaseExtractor) this);
    }

    @Override // com.gengoai.hermes.extraction.MultiPhaseExtractor
    public String toString() {
        return "NGramExtractor{maxOrder=" + this.maxOrder + ", minOrder=" + this.minOrder + ", annotationTypes=" + Arrays.toString(getAnnotationTypes()) + ", toString=" + getToString() + ", filter=" + getFilter() + ", trim=" + getTrim() + ", valueCalculator=" + getValueCalculator() + "}";
    }

    private Stream<Tuple> tupleStream(HString hString) {
        Stream<Tuple> asStream = Streams.asStream(new NGramTupleIterator(getAnnotationTypes().length > 1 ? hString.interleaved(getAnnotationTypes()) : hString.annotations(getAnnotationTypes()[0])));
        if (getTrim() != null) {
            asStream = asStream.map(tuple -> {
                Tuple tuple = Tuple0.INSTANCE;
                Iterator it = tuple.iterator();
                while (it.hasNext()) {
                    HString hString2 = (HString) Cast.as(it.next());
                    if (!getTrim().test(hString2)) {
                        tuple = tuple.appendRight(hString2);
                    }
                }
                return tuple;
            }).filter(tuple2 -> {
                return tuple2.degree() > 0;
            });
        }
        if (getFilter() != null) {
            SerializablePredicate negate = getFilter().negate();
            asStream = asStream.filter(tuple3 -> {
                return negate.test(tuple3.degree() == 1 ? (HString) tuple3.get(0) : HString.union((HString) tuple3.get(0), (HString) tuple3.get(tuple3.degree() - 1), new HString[0]));
            });
        }
        return asStream;
    }

    public int getMaxOrder() {
        return this.maxOrder;
    }

    public int getMinOrder() {
        return this.minOrder;
    }

    protected NGramExtractor() {
    }
}
