package edu.jhuapl.dorset.nlp;

import java.util.ArrayList;

/* loaded from: input_file:edu/jhuapl/dorset/nlp/RuleBasedTokenizer.class */
public class RuleBasedTokenizer implements Tokenizer {
    private boolean ignorePunctuation;

    public RuleBasedTokenizer() {
        this(false);
    }

    public RuleBasedTokenizer(boolean z) {
        this.ignorePunctuation = z;
    }

    @Override // edu.jhuapl.dorset.nlp.Tokenizer
    public String[] tokenize(String str) {
        char[] charArray = str.trim().toCharArray();
        ArrayList arrayList = new ArrayList();
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < charArray.length; i++) {
            char c = i - 1 > 0 ? charArray[i - 1] : ' ';
            char c2 = charArray[i];
            char c3 = i + 1 < charArray.length ? charArray[i + 1] : ' ';
            if (c2 == '\'' && Character.isLetter(c) && Character.isLetter(c3)) {
                sb.append(c2);
            } else if ((c2 == '.' || c2 == ',') && Character.isDigit(c) && Character.isDigit(c3)) {
                sb.append(c2);
            } else if (c2 == ' ') {
                if (sb.length() > 0) {
                    arrayList.add(sb.toString());
                    sb.setLength(0);
                }
            } else if (Character.isLetterOrDigit(c2)) {
                sb.append(c2);
            } else {
                arrayList.add(sb.toString());
                sb.setLength(0);
                if (!this.ignorePunctuation) {
                    arrayList.add(String.valueOf(c2));
                }
            }
        }
        if (sb.length() > 0) {
            arrayList.add(sb.toString());
        }
        return (String[]) arrayList.toArray(new String[arrayList.size()]);
    }
}
