package com.gengoai.hermes.format;

import com.gengoai.Tag;
import com.gengoai.collection.Maps;
import com.gengoai.hermes.Annotation;
import com.gengoai.hermes.Document;
import com.gengoai.hermes.Relation;
import com.gengoai.hermes.Types;
import com.gengoai.hermes.morphology.PartOfSpeech;
import com.gengoai.io.resource.Resource;
import com.gengoai.parsing.Lexer;
import com.gengoai.parsing.TokenDef;
import com.gengoai.parsing.TokenStream;
import com.gengoai.string.Strings;
import com.gengoai.tuple.Tuples;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import java.util.function.Consumer;
import java.util.stream.Stream;
import lombok.NonNull;

/* loaded from: input_file:com/gengoai/hermes/format/PennTreebankFormat.class */
public class PennTreebankFormat extends WholeFileTextFormat implements OneDocPerFileFormat {
    private final DocFormatParameters parameters;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/gengoai/hermes/format/PennTreebankFormat$Node.class */
    public static class Node {
        private Node parent;
        private String tag;
        private String word;
        private final List<Node> children = new ArrayList();
        private int start = -1;

        private Node() {
        }

        public static Node tag(String str) {
            Node node = new Node();
            node.tag = str;
            return node;
        }

        public static Node word(String str) {
            Node node = new Node();
            node.word = str;
            return node;
        }

        public int end() {
            return this.word == null ? this.children.get(this.children.size() - 1).end() : this.start + this.word.length();
        }

        public int start() {
            return this.start == -1 ? this.children.get(0).start() : this.start;
        }

        public String toString() {
            return "(" + this.word + " / " + this.tag + ") ";
        }

        public void traverseParentToRoot(Consumer<Node> consumer) {
            Node node = this.parent;
            while (true) {
                Node node2 = node;
                if (node2 == null) {
                    return;
                }
                consumer.accept(node2);
                node = node2.parent;
            }
        }
    }

    /* loaded from: input_file:com/gengoai/hermes/format/PennTreebankFormat$Provider.class */
    public static class Provider implements DocFormatProvider {
        @Override // com.gengoai.hermes.format.DocFormatProvider
        public DocFormat create(DocFormatParameters docFormatParameters) {
            return new PennTreebankFormat(docFormatParameters);
        }

        @Override // com.gengoai.hermes.format.DocFormatProvider
        public String getName() {
            return "PTB";
        }

        @Override // com.gengoai.hermes.format.DocFormatProvider
        public boolean isWriteable() {
            return false;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/gengoai/hermes/format/PennTreebankFormat$TT.class */
    public enum TT implements TokenDef {
        OPEN_PARENS("\\("),
        CLOSE_PARENS("\\)"),
        OTHER("[^\\s\\)\\(]+");

        private final String pattern;

        TT(String str) {
            this.pattern = str;
        }

        public String getPattern() {
            return this.pattern;
        }
    }

    PennTreebankFormat(@NonNull DocFormatParameters docFormatParameters) {
        if (docFormatParameters == null) {
            throw new NullPointerException("parameters is marked non-null but is null");
        }
        this.parameters = docFormatParameters;
    }

    @Override // com.gengoai.hermes.format.DocFormat
    public DocFormatParameters getParameters() {
        return this.parameters;
    }

    @Override // com.gengoai.hermes.format.WholeFileTextFormat
    protected Stream<Document> readSingleFile(String str) {
        Node recurse;
        TokenStream lex = Lexer.create(TT.values()).lex(str);
        ArrayList<List> arrayList = new ArrayList();
        StringBuilder sb = new StringBuilder();
        while (lex.hasNext() && (recurse = recurse(lex)) != null) {
            List<Node> tokens = toTokens(sb, recurse);
            if (tokens.size() > 0) {
                arrayList.add(tokens);
            }
            if (lex.peek().getType().isInstance(TT.CLOSE_PARENS)) {
                lex.consume();
            }
        }
        Document create = getParameters().getDocumentFactory().create(sb.toString().strip());
        for (List<Node> list : arrayList) {
            create.createAnnotation(Types.SENTENCE, ((Node) list.get(0)).start, ((Node) list.get(list.size() - 1)).end(), Collections.emptyMap());
            HashMap hashMap = new HashMap();
            for (Node node : list) {
                hashMap.put(node, create.createAnnotation(Types.TOKEN, node.start, node.end(), Maps.hashMapOf(new Map.Entry[]{Tuples.$(Types.PART_OF_SPEECH, PartOfSpeech.valueOf(node.tag))})));
            }
            Iterator it = list.iterator();
            while (it.hasNext()) {
                ((Node) it.next()).traverseParentToRoot(node2 -> {
                    if (hashMap.containsKey(node2)) {
                        return;
                    }
                    Annotation createAnnotation = create.createAnnotation(Types.NON_TERMINAL_NODE, node2.start(), node2.end(), Maps.hashMapOf(new Map.Entry[]{Tuples.$(Types.PART_OF_SPEECH, PartOfSpeech.valueOf(node2.tag.replaceAll("[-=].*$", "")))}));
                    String replaceAll = node2.tag.replaceAll("^[^\\-]+-", "");
                    if (Strings.isNotNullOrBlank(replaceAll) && !replaceAll.equalsIgnoreCase(node2.tag)) {
                        createAnnotation.put(Types.SYNTACTIC_FUNCTION, replaceAll);
                    }
                    hashMap.put(node2, createAnnotation);
                });
            }
            Iterator it2 = list.iterator();
            while (it2.hasNext()) {
                ((Node) it2.next()).traverseParentToRoot(node3 -> {
                    Iterator<Node> it3 = node3.children.iterator();
                    while (it3.hasNext()) {
                        ((Annotation) hashMap.get(it3.next())).add(new Relation(Types.SYNTACTIC_HEAD, node3.tag, ((Annotation) hashMap.get(node3)).getId()));
                    }
                });
            }
        }
        create.setCompleted(Types.SENTENCE, "PROVIDED");
        create.setCompleted(Types.TOKEN, "PROVIDED");
        create.setCompleted(Types.PART_OF_SPEECH, "PROVIDED");
        create.setCompleted(Types.CONSTITUENT_PARSE, "PROVIDED");
        return Stream.of(create);
    }

    private Node recurse(TokenStream tokenStream) {
        TT type = tokenStream.peek().getType();
        if (type != TT.OPEN_PARENS) {
            if (type != TT.CLOSE_PARENS) {
                return Node.word(tokenStream.consume().getText());
            }
            tokenStream.consume();
            return null;
        }
        tokenStream.consume();
        String text = tokenStream.peek().isInstance(new Tag[]{TT.OTHER}) ? tokenStream.consume().getText() : "";
        if (Strings.isNullOrBlank(text)) {
            return recurse(tokenStream);
        }
        if (text.equalsIgnoreCase("-NONE-")) {
            while (!tokenStream.peek().isInstance(new Tag[]{TT.CLOSE_PARENS})) {
                tokenStream.consume();
            }
            tokenStream.consume();
            return recurse(tokenStream);
        }
        if (tokenStream.peek().getType() == TT.OTHER) {
            Node tag = Node.tag(text);
            tag.word = tokenStream.consume().getText();
            tokenStream.consume();
            return tag;
        }
        Node tag2 = Node.tag(text);
        Node recurse = recurse(tokenStream);
        while (true) {
            Node node = recurse;
            if (node == null) {
                break;
            }
            node.parent = tag2;
            if (node.word == null && node.tag == null && node.children.isEmpty()) {
                recurse = recurse(tokenStream);
            } else {
                if (node.tag == null && tag2.tag == null) {
                    tag2.word = node.word;
                } else {
                    tag2.children.add(node);
                }
                recurse = recurse(tokenStream);
            }
        }
        return (tag2.word == null && tag2.children.isEmpty()) ? new Node() : tag2;
    }

    private List<Node> toTokens(StringBuilder sb, Node node) {
        Stack stack = new Stack();
        for (int size = node.children.size() - 1; size >= 0; size--) {
            stack.push(node.children.get(size));
        }
        ArrayList arrayList = new ArrayList();
        while (!stack.isEmpty()) {
            Node node2 = (Node) stack.pop();
            if (node2.word != null) {
                node2.word = POSCorrection.word(node2.word, node2.tag);
                node2.start = sb.length();
                arrayList.add(node2);
                sb.append(node2.word).append(" ");
            }
            for (int size2 = node2.children.size() - 1; size2 >= 0; size2--) {
                stack.push(node2.children.get(size2));
            }
        }
        return arrayList;
    }

    @Override // com.gengoai.hermes.format.DocFormat
    public void write(Document document, Resource resource) {
        throw new UnsupportedOperationException();
    }
}
