package org.culturegraph.mf.mediawiki.analyzer;

import de.fau.cs.osr.ptk.common.ast.AstNode;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.culturegraph.mf.framework.DefaultObjectPipe;
import org.culturegraph.mf.framework.StreamReceiver;
import org.culturegraph.mf.framework.annotations.Description;
import org.culturegraph.mf.framework.annotations.In;
import org.culturegraph.mf.framework.annotations.Out;
import org.culturegraph.mf.mediawiki.converter.WikiTextParser;
import org.culturegraph.mf.mediawiki.type.WikiPage;
import org.culturegraph.mf.mediawiki.util.TextExtractor;
import org.culturegraph.mf.mediawiki.util.TraverseTree;
import org.sweble.wikitext.lazy.preprocessor.Template;
import org.sweble.wikitext.lazy.preprocessor.TemplateArgument;

@Description("Extracts all templates from the wiki page whose name matches a pattern.")
@In(WikiPage.class)
@Out(StreamReceiver.class)
/* loaded from: input_file:org/culturegraph/mf/mediawiki/analyzer/TemplateExtractor.class */
public final class TemplateExtractor extends DefaultObjectPipe<WikiPage, StreamReceiver> implements Analyzer {
    private Matcher nameMatcher;
    private final TemplateVisitor visitor;

    /* loaded from: input_file:org/culturegraph/mf/mediawiki/analyzer/TemplateExtractor$TemplateVisitor.class */
    public class TemplateVisitor extends TraverseTree {
        private final TextExtractor textExtractor = new TextExtractor();

        public TemplateVisitor() {
        }

        @Override // org.culturegraph.mf.mediawiki.util.TraverseTree
        public final void visit(Template template) {
            String extractText = extractText(template.getName());
            TemplateExtractor.this.nameMatcher.reset(extractText.trim());
            if (TemplateExtractor.this.nameMatcher.matches()) {
                TemplateExtractor.this.getReceiver().startEntity(sanitizeName(extractText));
                TemplateExtractor.this.getReceiver().literal("_TEMPLATE_", "");
                iterate(template.getArgs());
                TemplateExtractor.this.getReceiver().endEntity();
            }
        }

        @Override // org.culturegraph.mf.mediawiki.util.TraverseTree
        public final void visit(TemplateArgument templateArgument) {
            if (templateArgument.getHasName()) {
                TemplateExtractor.this.getReceiver().literal(sanitizeName(extractText(templateArgument.getName())), sanitizeValue(extractText(templateArgument.getValue())));
            }
        }

        private String extractText(AstNode astNode) {
            return (String) this.textExtractor.go(astNode);
        }

        private String sanitizeName(String str) {
            return str.trim().replace(" ", "_");
        }

        private String sanitizeValue(String str) {
            return str.replaceAll("<[^>]+>", "").replaceAll("\\[\\[(?:.*?\\|)*?([^|]*?)\\]\\]", "$1").replaceAll("\\[(?:.*? )*?([^ ]*?)\\]", "$1").trim();
        }
    }

    public TemplateExtractor() {
        this("");
    }

    public TemplateExtractor(String str) {
        this.visitor = new TemplateVisitor();
        setNamePattern(str);
    }

    public void setNamePattern(String str) {
        this.nameMatcher = Pattern.compile(str).matcher("");
    }

    public String getNamePattern() {
        return this.nameMatcher.pattern().pattern();
    }

    public void process(WikiPage wikiPage) {
        getReceiver().startRecord(Long.toString(wikiPage.getPageId()));
        this.visitor.go(wikiPage.getWikiAst().getPage());
        getReceiver().endRecord();
    }

    @Override // org.culturegraph.mf.mediawiki.analyzer.Analyzer
    public boolean wikiTextOnly() {
        return false;
    }

    @Override // org.culturegraph.mf.mediawiki.analyzer.Analyzer
    public WikiTextParser.ParseLevel requiredParseLevel() {
        return WikiTextParser.ParseLevel.PREPROCESS;
    }
}
