package eu.stratosphere.sopremo.tokenizer;

import eu.stratosphere.sopremo.type.CachingArrayNode;
import eu.stratosphere.sopremo.type.TextNode;
import it.unimi.dsi.fastutil.chars.CharOpenHashSet;
import it.unimi.dsi.fastutil.chars.CharSet;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import javolution.text.TextFormat;

/* loaded from: input_file:eu/stratosphere/sopremo/tokenizer/DelimiterTokenizer.class */
public class DelimiterTokenizer extends AbstractTokenizer implements Tokenizer {
    private final CharSet delimiters;
    public static final DelimiterTokenizer WHITESPACES = new DelimiterTokenizer(' ', '\n', '\r', '\t');

    public DelimiterTokenizer() {
        this.delimiters = new CharOpenHashSet();
    }

    public DelimiterTokenizer(Character... chArr) {
        this(Arrays.asList(chArr));
    }

    public DelimiterTokenizer(Collection<Character> collection) {
        this.delimiters = new CharOpenHashSet();
        this.delimiters.addAll(collection);
    }

    public DelimiterTokenizer addDelimiter(char c) {
        this.delimiters.add(c);
        return this;
    }

    public void appendAsString(Appendable appendable) throws IOException {
        appendable.append("DelimiterTokenizer [");
        appendable.append("delimiters=");
        TextFormat.getInstance(CharSet.class).format(this.delimiters, appendable);
        appendable.append("]");
    }

    public void setDelimiters(Collection<Character> collection) {
        if (collection == null) {
            throw new NullPointerException("delimiters must not be null");
        }
        this.delimiters.clear();
        this.delimiters.addAll(collection);
    }

    @Override // eu.stratosphere.sopremo.tokenizer.Tokenizer
    public void tokenizeInto(CharSequence charSequence, CachingArrayNode<TextNode> cachingArrayNode) {
        cachingArrayNode.setSize(0);
        int i = 0;
        int i2 = 0;
        while (i < charSequence.length()) {
            if (this.delimiters.contains(charSequence.charAt(i))) {
                if (i == i2) {
                    i2++;
                } else {
                    addToken(cachingArrayNode, charSequence, i2, i);
                    i2 = i + 1;
                }
            }
            i++;
        }
        if (i != i2) {
            addToken(cachingArrayNode, charSequence, i2, i);
        }
    }
}
