package gate.termraider.bank;

import gate.Annotation;
import gate.Document;
import gate.Factory;
import gate.FeatureMap;
import gate.creole.ResourceInstantiationException;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.Optional;
import gate.gui.ActionsPublisher;
import gate.termraider.modes.IdfCalculation;
import gate.termraider.modes.Normalization;
import gate.termraider.modes.TfCalculation;
import gate.termraider.util.ScoreType;
import gate.termraider.util.Term;
import gate.termraider.util.Utilities;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;

@CreoleResource(name = "TfIdfTermbank", icon = "termbank-lr.png", comment = "TermRaider Termbank derived from vectors in document features", helpURL = "http://gate.ac.uk/userguide/sec:creole:termraider:tfidf")
/* loaded from: input_file:gate/termraider/bank/TfIdfTermbank.class */
public class TfIdfTermbank extends AbstractTermbank implements ActionsPublisher {
    private static final long serialVersionUID = 2256964300070167978L;
    private TfCalculation tfCalculation;
    private IdfCalculation idfCalculation;
    private Normalization normalization;
    private DocumentFrequencyBank docFreqSource;
    private ScoreType rawScoreST;
    private ScoreType termFrequencyST;
    private ScoreType localDocFrequencyST;
    private ScoreType refDocFrequencyST;

    @Override // gate.termraider.bank.AbstractTermbank
    protected void processDocument(Document document, int i) {
        this.documentCount++;
        String docIdentifier = Utilities.docIdentifier(document, this.idDocumentFeature, i);
        Iterator it = document.getAnnotations(this.inputASName).get(this.inputAnnotationTypes).iterator();
        while (it.hasNext()) {
            Term makeTerm = makeTerm((Annotation) it.next(), document);
            Utilities.incrementScoreTermValue(this.scores, this.termFrequencyST, makeTerm, 1);
            Utilities.addToMapSet(this.termDocuments, makeTerm, docIdentifier);
        }
    }

    @Override // gate.termraider.bank.AbstractTermbank
    protected void initializeScoreTypes() {
        this.scoreTypes = new ArrayList();
        this.scoreTypes.add(new ScoreType(this.scoreProperty));
        this.rawScoreST = new ScoreType(this.scoreProperty + AbstractTermbank.RAW_SUFFIX);
        this.scoreTypes.add(this.rawScoreST);
        this.termFrequencyST = new ScoreType("termFrequency");
        this.scoreTypes.add(this.termFrequencyST);
        this.localDocFrequencyST = new ScoreType("localDocFrequency");
        this.scoreTypes.add(this.localDocFrequencyST);
        this.refDocFrequencyST = new ScoreType("refDocFrequency");
        this.scoreTypes.add(this.refDocFrequencyST);
    }

    @Override // gate.termraider.bank.AbstractTermbank
    protected void calculateScores() {
        for (Term term : this.scores.get(this.termFrequencyST).keySet()) {
            this.languages.add(term.getLanguageCode());
            this.types.add(term.getType());
            int intValue = this.scores.get(this.termFrequencyST).get(term).intValue();
            int frequencyLax = this.docFreqSource.getFrequencyLax(term);
            Utilities.setScoreTermValue(this.scores, this.refDocFrequencyST, term, Integer.valueOf(frequencyLax));
            Utilities.setScoreTermValue(this.scores, this.localDocFrequencyST, term, Integer.valueOf(this.termDocuments.get(term).size()));
            double calculate = TfCalculation.calculate(this.tfCalculation, intValue) * IdfCalculation.calculate(this.idfCalculation, frequencyLax, this.docFreqSource.getDocumentCount());
            Utilities.setScoreTermValue(this.scores, this.rawScoreST, term, Double.valueOf(calculate));
            Utilities.setScoreTermValue(this.scores, getDefaultScoreType(), term, Double.valueOf(Normalization.calculate(this.normalization, Double.valueOf(calculate))));
        }
        if (this.debugMode) {
            System.out.println("Termbank: nbr of terms = " + getTerms().size());
        }
    }

    @Override // gate.termraider.bank.AbstractTermbank
    protected void resetScores() {
        this.termDocuments = new HashMap();
        this.documentCount = 0;
        this.scores = new HashMap();
        Iterator<ScoreType> it = this.scoreTypes.iterator();
        while (it.hasNext()) {
            this.scores.put(it.next(), new HashMap());
        }
        this.types = new HashSet();
        this.languages = new HashSet();
    }

    public int getDocCount() {
        return this.documentCount;
    }

    @CreoleParameter(comment = "document frequency bank (unset = create from these corpora)")
    @Optional
    public void setDocFreqSource(DocumentFrequencyBank documentFrequencyBank) {
        this.docFreqSource = documentFrequencyBank;
    }

    public DocumentFrequencyBank getDocFreqSource() {
        return this.docFreqSource;
    }

    @CreoleParameter(comment = "score normalization", defaultValue = "Sigmoid")
    public void setNormalization(Normalization normalization) {
        this.normalization = normalization;
    }

    public Normalization getNormalization() {
        return this.normalization;
    }

    @CreoleParameter(comment = "term frequency calculation", defaultValue = "Logarithmic")
    public void setTfCalculation(TfCalculation tfCalculation) {
        this.tfCalculation = tfCalculation;
    }

    public TfCalculation getTfCalculation() {
        return this.tfCalculation;
    }

    @CreoleParameter(comment = "inverted document frequency calculation", defaultValue = "LogarithmicScaled")
    public void setIdfCalculation(IdfCalculation idfCalculation) {
        this.idfCalculation = idfCalculation;
    }

    public IdfCalculation getIdfCalculation() {
        return this.idfCalculation;
    }

    @Override // gate.termraider.bank.AbstractBank
    @CreoleParameter(defaultValue = "tfIdf")
    public void setScoreProperty(String str) {
        super.setScoreProperty(str);
    }

    @Override // gate.termraider.bank.AbstractTermbank
    protected void prepare() throws ResourceInstantiationException {
        if (this.corpora == null || this.corpora.size() == 0) {
            throw new ResourceInstantiationException("No corpora given");
        }
        if (this.docFreqSource == null) {
            FeatureMap newFeatureMap = Factory.newFeatureMap();
            newFeatureMap.put("inputASName", this.inputASName);
            newFeatureMap.put("languageFeature", this.languageFeature);
            newFeatureMap.put("inputAnnotationFeature", this.inputAnnotationFeature);
            newFeatureMap.put("corpora", this.corpora);
            newFeatureMap.put("debugMode", Boolean.valueOf(this.debugMode));
            setDocFreqSource((DocumentFrequencyBank) Factory.createResource(DocumentFrequencyBank.class.getName(), newFeatureMap));
        }
    }

    @Override // gate.termraider.bank.AbstractTermbank
    public Map<String, String> getMiscDataForGui() {
        HashMap hashMap = new HashMap();
        hashMap.put("nbr of local documents", String.valueOf(this.documentCount));
        hashMap.put("nbr of reference documents", String.valueOf(this.docFreqSource.getDocumentCount()));
        hashMap.put("nbr of terms", String.valueOf(getDefaultScores().size()));
        return hashMap;
    }
}
