package gate.termraider.bank;

import gate.Annotation;
import gate.AnnotationSet;
import gate.Corpus;
import gate.Document;
import gate.Resource;
import gate.Utils;
import gate.creole.ResourceInstantiationException;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.gui.ActionsPublisher;
import gate.termraider.gui.ActionSaveCsv;
import gate.termraider.util.ScoreType;
import gate.termraider.util.Term;
import gate.termraider.util.Utilities;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.swing.Action;
import org.apache.commons.lang.StringEscapeUtils;

@CreoleResource(name = "DocumentFrequencyBank", icon = "termbank-lr.png", comment = "Document frequency counter derived from corpora and other DFBs", helpURL = "http://gate.ac.uk/userguide/sec:creole:termraider:docfrequency")
/* loaded from: input_file:gate/termraider/bank/DocumentFrequencyBank.class */
public class DocumentFrequencyBank extends AbstractTermbank implements ActionsPublisher {
    private static final long serialVersionUID = 8486379203429800194L;
    private Set<DocumentFrequencyBank> inputBanks;
    private Map<String, Set<Term>> stringLookupTable;
    protected transient List<Action> actionsList;
    protected String segmentAnnotationType;

    @CreoleParameter(comment = "input segment annotations (default = whole documents)", defaultValue = "")
    public void setSegmentAnnotationType(String str) {
        this.segmentAnnotationType = str;
    }

    public String getSegmentAnnotationType() {
        return this.segmentAnnotationType;
    }

    @Override // gate.termraider.bank.AbstractTermbank
    public Resource init() throws ResourceInstantiationException {
        prepare();
        initializeScoreTypes();
        resetScores();
        processInputBanks();
        processCorpora();
        calculateScores();
        return this;
    }

    @Override // gate.termraider.bank.AbstractTermbank
    public void cleanup() {
        super.cleanup();
    }

    @Override // gate.termraider.bank.AbstractTermbank
    protected void prepare() throws ResourceInstantiationException {
        if (this.corpora == null) {
            this.corpora = new HashSet();
        }
        if (this.inputBanks == null) {
            this.inputBanks = new HashSet();
        }
    }

    @Override // gate.termraider.bank.AbstractTermbank
    protected void resetScores() {
        this.scores = new HashMap();
        Iterator<ScoreType> it = this.scoreTypes.iterator();
        while (it.hasNext()) {
            this.scores.put(it.next(), new HashMap());
        }
        this.documentCount = 0;
        this.languages = new HashSet();
        this.types = new HashSet();
        this.stringLookupTable = new HashMap();
        this.termDocuments = new HashMap();
    }

    @Override // gate.termraider.bank.AbstractTermbank
    protected void createActions() {
        this.actionsList = new ArrayList();
        this.actionsList.add(new ActionSaveCsv("Save as CSV...", this));
    }

    @Override // gate.termraider.bank.AbstractTermbank
    protected void processCorpora() {
        for (Corpus corpus : this.corpora) {
            processCorpus(corpus);
            if (this.debugMode) {
                System.out.println("Termbank: added corpus " + corpus.getName() + " with " + corpus.size() + " documents");
            }
        }
    }

    protected void processInputBanks() {
        for (DocumentFrequencyBank documentFrequencyBank : this.inputBanks) {
            this.documentCount += documentFrequencyBank.documentCount;
            for (Term term : documentFrequencyBank.getTerms()) {
                Utilities.incrementMap(getDefaultScores(), term, documentFrequencyBank.getFrequencyStrict(term));
            }
        }
    }

    @Override // gate.termraider.bank.AbstractTermbank
    protected void processDocument(Document document, int i) {
        if (this.segmentAnnotationType.isEmpty() || this.segmentAnnotationType == null) {
            processWholeDocument(document, i);
        } else {
            processDocumentSegments(document, i);
        }
    }

    protected void processDocumentSegments(Document document, int i) {
        String docIdentifier = Utilities.docIdentifier(document, this.idDocumentFeature, i);
        AnnotationSet<Annotation> annotationSet = document.getAnnotations(this.inputASName).get(this.segmentAnnotationType);
        AnnotationSet annotationSet2 = document.getAnnotations(this.inputASName).get(this.inputAnnotationTypes);
        for (Annotation annotation : annotationSet) {
            this.documentCount++;
            String format = String.format("%s [%d]", docIdentifier, annotation.getId());
            AnnotationSet containedAnnotations = Utils.getContainedAnnotations(annotationSet2, annotation);
            HashSet hashSet = new HashSet();
            Iterator it = containedAnnotations.iterator();
            while (it.hasNext()) {
                hashSet.add(makeTerm((Annotation) it.next(), document));
            }
            Iterator it2 = hashSet.iterator();
            while (it2.hasNext()) {
                Utilities.addToMapSet(this.termDocuments, (Term) it2.next(), format);
            }
        }
    }

    protected void processWholeDocument(Document document, int i) {
        this.documentCount++;
        String docIdentifier = Utilities.docIdentifier(document, this.idDocumentFeature, i);
        AnnotationSet annotationSet = document.getAnnotations(this.inputASName).get(this.inputAnnotationTypes);
        HashSet hashSet = new HashSet();
        Iterator it = annotationSet.iterator();
        while (it.hasNext()) {
            hashSet.add(makeTerm((Annotation) it.next(), document));
        }
        Iterator it2 = hashSet.iterator();
        while (it2.hasNext()) {
            Utilities.addToMapSet(this.termDocuments, (Term) it2.next(), docIdentifier);
        }
    }

    @Override // gate.termraider.bank.AbstractTermbank
    protected void calculateScores() {
        for (Term term : this.termDocuments.keySet()) {
            this.types.add(term.getType());
            this.languages.add(term.getLanguageCode());
            Utilities.setScoreTermValue(this.scores, getDefaultScoreType(), term, Integer.valueOf(this.termDocuments.get(term).size()));
            storeStringLookup(term);
        }
        if (this.debugMode) {
            System.out.println("Termbank: nbr of terms = " + getTerms().size());
        }
    }

    public int getFrequencyStrict(Term term) {
        if (getDefaultScores().containsKey(term)) {
            return getDefaultScores().get(term).intValue();
        }
        return 0;
    }

    public int getFrequencyLax(Term term) {
        if (getDefaultScores().containsKey(term)) {
            return getDefaultScores().get(term).intValue();
        }
        String termString = term.getTermString();
        if (!this.stringLookupTable.containsKey(termString)) {
            return 0;
        }
        for (Term term2 : this.stringLookupTable.get(termString)) {
            if (term2.closeMatch(term)) {
                return getDefaultScores().get(term2).intValue();
            }
        }
        return 0;
    }

    @CreoleParameter(comment = "Other DFBs to compile into the new one")
    public void setInputBanks(Set<DocumentFrequencyBank> set) {
        this.inputBanks = set;
    }

    public Set<DocumentFrequencyBank> getInputBanks() {
        return this.inputBanks;
    }

    @Override // gate.termraider.bank.AbstractTermbank
    public List<Action> getActions() {
        if (this.actionsList == null) {
            createActions();
        }
        return this.actionsList;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v14, types: [java.util.Set] */
    private void storeStringLookup(Term term) {
        String termString = term.getTermString();
        HashSet hashSet = this.stringLookupTable.containsKey(termString) ? (Set) this.stringLookupTable.get(termString) : new HashSet();
        hashSet.add(term);
        this.stringLookupTable.put(termString, hashSet);
    }

    @Override // gate.termraider.bank.AbstractTermbank
    protected void initializeScoreTypes() {
        this.scoreTypes = new ArrayList();
        this.scoreTypes.add(new ScoreType(this.scoreProperty));
    }

    @Override // gate.termraider.bank.AbstractBank
    @CreoleParameter(comment = "name of main score", defaultValue = "documentFrequency")
    public void setScoreProperty(String str) {
        this.scoreProperty = str;
    }

    @Override // gate.termraider.bank.AbstractTermbank
    public Map<String, String> getMiscDataForGui() {
        HashMap hashMap = new HashMap();
        hashMap.put("nbr of documents", String.valueOf(this.documentCount));
        hashMap.put("nbr of terms", String.valueOf(getDefaultScores().size()));
        hashMap.put("nbr of distinct term strings", String.valueOf(this.stringLookupTable.size()));
        return hashMap;
    }

    @Override // gate.termraider.bank.AbstractTermbank
    public String getCsvSubheader() {
        StringBuilder sb = new StringBuilder();
        sb.append('\n');
        sb.append(',').append(StringEscapeUtils.escapeCsv("_TOTAL_DOCS_"));
        sb.append(',').append(StringEscapeUtils.escapeCsv(""));
        sb.append(',').append(StringEscapeUtils.escapeCsv(""));
        sb.append(',').append(StringEscapeUtils.escapeCsv(Integer.toString(getDocumentCount())));
        return sb.toString();
    }
}
