package com.gengoai.hermes.corpus;

import com.gengoai.collection.Sets;
import com.gengoai.collection.counter.Counter;
import com.gengoai.conversion.Cast;
import com.gengoai.function.SerializableConsumer;
import com.gengoai.hermes.AnnotatableType;
import com.gengoai.hermes.AnnotationPipeline;
import com.gengoai.hermes.AttributeType;
import com.gengoai.hermes.Document;
import com.gengoai.hermes.HString;
import com.gengoai.hermes.Types;
import com.gengoai.hermes.extraction.caduceus.CaduceusProgram;
import com.gengoai.hermes.extraction.regex.TokenMatch;
import com.gengoai.hermes.extraction.regex.TokenRegex;
import com.gengoai.hermes.format.DocFormatService;
import com.gengoai.hermes.lexicon.Lexicon;
import com.gengoai.hermes.morphology.StandardTokenizer;
import com.gengoai.hermes.workflow.Context;
import com.gengoai.hermes.workflow.SequentialWorkflow;
import com.gengoai.io.Resources;
import com.gengoai.io.resource.Resource;
import com.gengoai.specification.Specification;
import java.io.File;
import java.io.IOException;
import java.lang.invoke.SerializedLambda;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;
import lombok.NonNull;

/* loaded from: input_file:com/gengoai/hermes/corpus/Corpus.class */
public interface Corpus extends DocumentCollection {
    static Corpus open(@NonNull Resource resource) {
        if (resource == null) {
            throw new NullPointerException("resource is marked non-null but is null");
        }
        return new LuceneCorpus((File) resource.asFile().orElseThrow());
    }

    static Corpus open(@NonNull String str) {
        if (str == null) {
            throw new NullPointerException("resource is marked non-null but is null");
        }
        return open(Resources.from(str));
    }

    boolean add(Document document);

    default void addAll(@NonNull Iterable<Document> iterable) {
        if (iterable == null) {
            throw new NullPointerException("documents is marked non-null but is null");
        }
        iterable.forEach(this::add);
    }

    @Override // com.gengoai.hermes.corpus.DocumentCollection
    default Corpus annotate(@NonNull AnnotatableType... annotatableTypeArr) {
        if (annotatableTypeArr == null) {
            throw new NullPointerException("annotatableTypes is marked non-null but is null");
        }
        AnnotationPipeline annotationPipeline = new AnnotationPipeline(Sets.difference(Arrays.asList(annotatableTypeArr), getCompleted()));
        if (!annotationPipeline.requiresUpdate()) {
            return this;
        }
        Objects.requireNonNull(annotationPipeline);
        return update("Annotate", annotationPipeline::annotate);
    }

    @Override // com.gengoai.hermes.corpus.DocumentCollection
    default Corpus apply(Lexicon lexicon, SerializableConsumer<HString> serializableConsumer) {
        return (Corpus) Cast.as(super.apply(lexicon, serializableConsumer));
    }

    @Override // com.gengoai.hermes.corpus.DocumentCollection
    default Corpus apply(TokenRegex tokenRegex, SerializableConsumer<TokenMatch> serializableConsumer) {
        return (Corpus) Cast.as(super.apply(tokenRegex, serializableConsumer));
    }

    default void assignRandomSplit(double d) {
        AtomicLong atomicLong = new AtomicLong((int) Math.floor(d * size()));
        update("AssignSplit", document -> {
            if (atomicLong.decrementAndGet() > 0) {
                document.attribute(Types.SPLIT, "TRAIN");
            } else {
                document.attribute(Types.SPLIT, "TEST");
            }
        });
    }

    default Corpus compact() {
        return this;
    }

    <T> Counter<T> getAttributeValueCount(@NonNull AttributeType<T> attributeType);

    Set<AttributeType<?>> getAttributes();

    Set<AnnotatableType> getCompleted();

    default Document getDocument(String str) {
        return (Document) parallelStream().filter(document -> {
            return document.getId().equals(str);
        }).first().orElse(null);
    }

    default List<String> getIds() {
        return parallelStream().map((v0) -> {
            return v0.getId();
        }).sorted(true).collect();
    }

    default Corpus importDocuments(@NonNull String str) throws IOException {
        if (str == null) {
            throw new NullPointerException("specification is marked non-null but is null");
        }
        Specification parse = Specification.parse(str);
        addAll(DocFormatService.create(parse).read(Resources.from(parse.getPath())));
        return this;
    }

    default Corpus process(@NonNull SequentialWorkflow sequentialWorkflow) throws Exception {
        if (sequentialWorkflow == null) {
            throw new NullPointerException("processor is marked non-null but is null");
        }
        sequentialWorkflow.process(null, new Context());
        return this;
    }

    boolean remove(Document document);

    boolean remove(String str);

    @Override // com.gengoai.hermes.corpus.DocumentCollection
    default Corpus repartition(int i) {
        return this;
    }

    @Override // com.gengoai.hermes.corpus.DocumentCollection
    Corpus update(@NonNull String str, @NonNull SerializableConsumer<Document> serializableConsumer);

    @Override // com.gengoai.hermes.corpus.DocumentCollection
    default Corpus update(@NonNull CaduceusProgram caduceusProgram) {
        if (caduceusProgram == null) {
            throw new NullPointerException("program is marked non-null but is null");
        }
        return (Corpus) Cast.as(super.update(caduceusProgram));
    }

    boolean update(Document document);

    @Override // com.gengoai.hermes.corpus.DocumentCollection
    /* bridge */ /* synthetic */ default DocumentCollection update(@NonNull String str, @NonNull SerializableConsumer serializableConsumer) {
        return update(str, (SerializableConsumer<Document>) serializableConsumer);
    }

    @Override // com.gengoai.hermes.corpus.DocumentCollection
    /* bridge */ /* synthetic */ default DocumentCollection apply(TokenRegex tokenRegex, SerializableConsumer serializableConsumer) {
        return apply(tokenRegex, (SerializableConsumer<TokenMatch>) serializableConsumer);
    }

    @Override // com.gengoai.hermes.corpus.DocumentCollection
    /* bridge */ /* synthetic */ default DocumentCollection apply(Lexicon lexicon, SerializableConsumer serializableConsumer) {
        return apply(lexicon, (SerializableConsumer<HString>) serializableConsumer);
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case -1668284069:
                if (implMethodName.equals("lambda$assignRandomSplit$bac5e9c3$1")) {
                    z = 3;
                    break;
                }
                break;
            case -649662132:
                if (implMethodName.equals("annotate")) {
                    z = true;
                    break;
                }
                break;
            case 98245393:
                if (implMethodName.equals("getId")) {
                    z = 2;
                    break;
                }
                break;
            case 247895942:
                if (implMethodName.equals("lambda$getDocument$3ec5d09$1")) {
                    z = false;
                    break;
                }
                break;
        }
        switch (z) {
            case StandardTokenizer.YYINITIAL /* 0 */:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("com/gengoai/function/SerializablePredicate") && serializedLambda.getFunctionalInterfaceMethodName().equals("test") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Z") && serializedLambda.getImplClass().equals("com/gengoai/hermes/corpus/Corpus") && serializedLambda.getImplMethodSignature().equals("(Ljava/lang/String;Lcom/gengoai/hermes/Document;)Z")) {
                    String str = (String) serializedLambda.getCapturedArg(0);
                    return document -> {
                        return document.getId().equals(str);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 5 && serializedLambda.getFunctionalInterfaceClass().equals("com/gengoai/function/SerializableConsumer") && serializedLambda.getFunctionalInterfaceMethodName().equals("accept") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)V") && serializedLambda.getImplClass().equals("com/gengoai/hermes/AnnotationPipeline") && serializedLambda.getImplMethodSignature().equals("(Lcom/gengoai/hermes/Document;)Z")) {
                    AnnotationPipeline annotationPipeline = (AnnotationPipeline) serializedLambda.getCapturedArg(0);
                    return annotationPipeline::annotate;
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 9 && serializedLambda.getFunctionalInterfaceClass().equals("com/gengoai/function/SerializableFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("apply") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("com/gengoai/hermes/Document") && serializedLambda.getImplMethodSignature().equals("()Ljava/lang/String;")) {
                    return (v0) -> {
                        return v0.getId();
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("com/gengoai/function/SerializableConsumer") && serializedLambda.getFunctionalInterfaceMethodName().equals("accept") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)V") && serializedLambda.getImplClass().equals("com/gengoai/hermes/corpus/Corpus") && serializedLambda.getImplMethodSignature().equals("(Ljava/util/concurrent/atomic/AtomicLong;Lcom/gengoai/hermes/Document;)V")) {
                    AtomicLong atomicLong = (AtomicLong) serializedLambda.getCapturedArg(0);
                    return document2 -> {
                        if (atomicLong.decrementAndGet() > 0) {
                            document2.attribute(Types.SPLIT, "TRAIN");
                        } else {
                            document2.attribute(Types.SPLIT, "TEST");
                        }
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
