package cc.mallet.pipe;

import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureCounter;
import cc.mallet.types.FeatureSequence;
import cc.mallet.types.Instance;
import gnu.trove.TIntIntHashMap;

/* loaded from: input_file:cc/mallet/pipe/FeatureDocFreqPipe.class */
public class FeatureDocFreqPipe extends Pipe {
    FeatureCounter counter;
    int numInstances;
    static final long serialVersionUID = 1;

    public FeatureDocFreqPipe() {
        super(new Alphabet(), null);
        this.counter = new FeatureCounter(getDataAlphabet());
        this.numInstances = 0;
    }

    public FeatureDocFreqPipe(Alphabet alphabet, Alphabet alphabet2) {
        super(alphabet, alphabet2);
        this.counter = new FeatureCounter(alphabet);
        this.numInstances = 0;
    }

    @Override // cc.mallet.pipe.Pipe
    public Instance pipe(Instance instance) {
        TIntIntHashMap tIntIntHashMap = new TIntIntHashMap();
        if (!(instance.getData() instanceof FeatureSequence)) {
            throw new IllegalArgumentException("Looking for a FeatureSequence, found a " + instance.getData().getClass());
        }
        FeatureSequence featureSequence = (FeatureSequence) instance.getData();
        for (int i = 0; i < featureSequence.size(); i++) {
            tIntIntHashMap.adjustOrPutValue(featureSequence.getIndexAtPosition(i), 1, 1);
        }
        for (int i2 : tIntIntHashMap.keys()) {
            this.counter.increment(i2);
        }
        this.numInstances++;
        return instance;
    }

    public void addPrunedWordsToStoplist(SimpleTokenizer simpleTokenizer, double d) {
        Alphabet dataAlphabet = getDataAlphabet();
        for (int i = 0; i < dataAlphabet.size(); i++) {
            if (this.counter.get(i) / this.numInstances > d) {
                simpleTokenizer.stop((String) dataAlphabet.lookupObject(i));
            }
        }
    }
}
