package com.gengoai.hermes.morphology;

import com.gengoai.collection.Iterables;
import com.gengoai.hermes.morphology.Tokenizer;
import com.gengoai.io.Resources;
import com.gengoai.string.Strings;
import java.io.IOException;
import java.io.Reader;
import java.io.Serializable;
import java.text.BreakIterator;
import java.util.Iterator;
import java.util.Locale;
import java.util.NoSuchElementException;

/* loaded from: input_file:com/gengoai/hermes/morphology/BreakIteratorTokenizer.class */
public class BreakIteratorTokenizer implements Tokenizer, Serializable {
    private static final long serialVersionUID = 1;
    private final Locale locale;

    /* loaded from: input_file:com/gengoai/hermes/morphology/BreakIteratorTokenizer$BreakIteratorStream.class */
    private static class BreakIteratorStream implements Iterator<Tokenizer.Token> {
        private final String input;
        private final BreakIterator iterator;
        private Tokenizer.Token nextToken;
        private int index = 0;
        private int start = 0;

        private BreakIteratorStream(String str, Locale locale) {
            this.input = str;
            this.iterator = BreakIterator.getWordInstance(locale);
            this.iterator.setText(str);
        }

        private Tokenizer.Token advance() {
            while (this.nextToken == null && this.start >= 0 && this.start < this.input.length()) {
                int next = this.iterator.next();
                if (next < 0) {
                    this.start = -1;
                    return null;
                }
                while (this.start <= next && Character.isWhitespace(this.input.charAt(this.start))) {
                    this.start++;
                }
                if (this.start < next && !Strings.isNullOrBlank(this.input.substring(this.start, next))) {
                    this.nextToken = new Tokenizer.Token(this.input.substring(this.start, next), determineType(this.input.substring(this.start, next)), this.start, next, this.index);
                    this.index++;
                    this.start = next;
                }
            }
            return this.nextToken;
        }

        private TokenType determineType(String str) {
            boolean hasLetter = Strings.hasLetter(str);
            boolean hasDigit = Strings.hasDigit(str);
            TokenType tokenType = TokenType.UNKNOWN;
            if (hasDigit && hasLetter) {
                tokenType = TokenType.ALPHA_NUMERIC;
            } else if (hasDigit) {
                tokenType = TokenType.NUMBER;
            } else if (hasLetter && str.contains(".")) {
                tokenType = TokenType.ACRONYM;
            } else if (hasLetter && str.contains("'")) {
                tokenType = TokenType.CONTRACTION;
            } else if (hasLetter) {
                tokenType = TokenType.ALPHA_NUMERIC;
            } else if (Strings.isPunctuation(str)) {
                tokenType = TokenType.PUNCTUATION;
            }
            return tokenType;
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            return advance() != null;
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.Iterator
        public Tokenizer.Token next() {
            Tokenizer.Token advance = advance();
            if (advance == null) {
                throw new NoSuchElementException();
            }
            this.nextToken = null;
            return advance;
        }
    }

    public BreakIteratorTokenizer(Locale locale) {
        this.locale = locale;
    }

    @Override // com.gengoai.hermes.morphology.Tokenizer
    public Iterable<Tokenizer.Token> tokenize(Reader reader) {
        try {
            return Iterables.asIterable(new BreakIteratorStream(Resources.fromReader(reader).readToString(), this.locale));
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    @Override // com.gengoai.hermes.morphology.Tokenizer
    public Iterable<Tokenizer.Token> tokenize(String str) {
        return Iterables.asIterable(new BreakIteratorStream(str, this.locale));
    }
}
