package cc.mallet.pipe.tsf;

import cc.mallet.pipe.Pipe;
import cc.mallet.types.Instance;
import cc.mallet.types.Token;
import cc.mallet.types.TokenSequence;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;

/* loaded from: input_file:cc/mallet/pipe/tsf/TokenTextNGrams.class */
public class TokenTextNGrams extends Pipe implements Serializable {
    static char startBorderChar = '>';
    static char endBorderChar = '<';
    String prefix;
    int[] gramSizes;
    boolean distinguishBorders;
    private static final long serialVersionUID = 1;
    private static final int CURRENT_SERIAL_VERSION = 0;

    public TokenTextNGrams(String str, int[] iArr) {
        this.distinguishBorders = false;
        this.prefix = str;
        this.gramSizes = iArr;
    }

    public TokenTextNGrams() {
        this("CHARBIGRAM=", new int[]{2});
    }

    @Override // cc.mallet.pipe.Pipe
    public Instance pipe(Instance instance) {
        TokenSequence tokenSequence = (TokenSequence) instance.getData();
        for (int i = 0; i < tokenSequence.size(); i++) {
            Token token = (Token) tokenSequence.get(i);
            String text = token.getText();
            if (this.distinguishBorders) {
                text = startBorderChar + text + endBorderChar;
            }
            int length = text.length();
            for (int i2 = 0; i2 < this.gramSizes.length; i2++) {
                int i3 = this.gramSizes[i2];
                for (int i4 = 0; i4 < length - i3; i4++) {
                    token.setFeatureValue(text.substring(i4, i4 + i3), 1.0d);
                }
            }
        }
        return instance;
    }

    private void writeObject(ObjectOutputStream objectOutputStream) throws IOException {
        objectOutputStream.writeInt(0);
    }

    private void readObject(ObjectInputStream objectInputStream) throws IOException, ClassNotFoundException {
        objectInputStream.readInt();
    }
}
