package cc.redpen.parser;

import cc.redpen.config.Configuration;
import cc.redpen.config.SymbolTable;
import cc.redpen.config.SymbolType;
import cc.redpen.parser.latex.Token;
import cc.redpen.util.EndOfSentenceDetector;
import cc.redpen.util.Pair;
import cc.redpen.validator.section.ParagraphStartWithValidator;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:cc/redpen/parser/SentenceExtractor.class */
public class SentenceExtractor {
    private Pattern fullStopPattern;
    private char[] fullStopList;
    private char[] rightQuotationList;
    private EndOfSentenceDetector endOfSentenceDetector;
    private SymbolTable symbolTable;
    private static final Logger LOG = LoggerFactory.getLogger(SentenceExtractor.class);
    private static final List<String> WHITE_WORDS = generateUmList("Mr.", "Mrs.", "Dr.", "genn.ai", "Co., Ltd.", "Miss.", "a.m.", "U.S.A.", "Jan.", "Feb.", "Mar.", "Apr.", "May.", "Jun.", "Jul.", "Aug.", "Sep.", "Oct.", "Nov.", "Dec.", "Feb.", "B.C", "A.D.");

    SentenceExtractor(char[] cArr) {
        this(cArr, extractRightQuotations(new Configuration.ConfigurationBuilder().build().getSymbolTable()));
    }

    public SentenceExtractor(SymbolTable symbolTable) {
        this(extractPeriods(symbolTable), extractRightQuotations(symbolTable));
        this.symbolTable = symbolTable;
    }

    SentenceExtractor(char[] cArr, char[] cArr2) {
        this.symbolTable = null;
        this.fullStopList = cArr;
        this.rightQuotationList = cArr2;
        this.fullStopPattern = constructEndSentencePattern();
        this.endOfSentenceDetector = new EndOfSentenceDetector(this.fullStopPattern, WHITE_WORDS);
    }

    private static char[] extractPeriods(SymbolTable symbolTable) {
        char[] cArr = {symbolTable.getValueOrFallbackToDefault(SymbolType.FULL_STOP), symbolTable.getValueOrFallbackToDefault(SymbolType.QUESTION_MARK), symbolTable.getValueOrFallbackToDefault(SymbolType.EXCLAMATION_MARK)};
        LOG.info("\"" + Arrays.toString(cArr) + "\" are added as a end of sentence characters");
        return cArr;
    }

    private static char[] extractRightQuotations(SymbolTable symbolTable) {
        char[] cArr = {symbolTable.getValueOrFallbackToDefault(SymbolType.RIGHT_SINGLE_QUOTATION_MARK), symbolTable.getValueOrFallbackToDefault(SymbolType.RIGHT_DOUBLE_QUOTATION_MARK)};
        LOG.info("\"" + Arrays.toString(cArr) + "\" are added as a right quotation characters");
        return cArr;
    }

    private void generateQuotationPattern(char[] cArr, StringBuilder sb, char c) {
        for (char c2 : cArr) {
            appendPattern(sb, handleSpecialCharacter(c2) + c);
        }
    }

    private void generateSimplePattern(char[] cArr, StringBuilder sb) {
        for (char c : cArr) {
            appendPattern(sb, handleSpecialCharacter(c));
        }
    }

    private void appendPattern(StringBuilder sb, String str) {
        if (sb.length() > 0) {
            sb.append("|");
        }
        sb.append(str);
    }

    private static String handleSpecialCharacter(char c) {
        return c == '.' ? "\\." : c == '?' ? "\\?" : c == '!' ? "\\!" : String.valueOf(c);
    }

    private static <E> List<E> generateUmList(E... eArr) {
        return new ArrayList(Arrays.asList(eArr));
    }

    public int extract(String str, List<Pair<Integer, Integer>> list) {
        int i = 0;
        int sentenceEndPosition = this.endOfSentenceDetector.getSentenceEndPosition(str);
        while (true) {
            int i2 = sentenceEndPosition;
            if (i2 < 0) {
                return i;
            }
            list.add(new Pair<>(Integer.valueOf(i), Integer.valueOf(i2 + 1)));
            i = i2 + 1;
            sentenceEndPosition = this.endOfSentenceDetector.getSentenceEndPosition(str, i);
        }
    }

    public int getSentenceEndPosition(String str) {
        return this.endOfSentenceDetector.getSentenceEndPosition(str);
    }

    public String getBrokenLineSeparator() {
        return (this.symbolTable == null || !this.symbolTable.getLang().equals("ja")) ? ParagraphStartWithValidator.DEFAULT_PARAGRAPH_START_WITH : Token.BLANK_LINE;
    }

    Pattern constructEndSentencePattern() {
        if (this.fullStopList == null || this.fullStopList.length == 0) {
            throw new IllegalArgumentException("No end character is specified");
        }
        StringBuilder sb = new StringBuilder();
        for (char c : this.rightQuotationList) {
            generateQuotationPattern(this.fullStopList, sb, c);
        }
        generateSimplePattern(this.fullStopList, sb);
        return Pattern.compile(sb.toString());
    }
}
