package org.conqat.lib.commons.string;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.conqat.lib.commons.collections.CollectionUtils;

/* loaded from: input_file:org/conqat/lib/commons/string/SimpleNLPUtils.class */
public class SimpleNLPUtils {
    public static final String REGEX_WORD_WITH_TWO_SYLLABLES = "\\b\\w*?([aeiouy]+[^aeiouy\\s]+){2,}\\w*?\\b";
    private static final Pattern WORD_SEPARATION_PATTERN = Pattern.compile("[\\s\\p{Z}]+");
    public static final List<String> IRREGULAR_SUPERLATIVES = Arrays.asList("best", "worst", "least", "furthest", "farthest");
    public static final List<String> IRREGULAR_COMPARATIVES = Arrays.asList("better", "worse", "less", "further", "farther");
    private static final Pattern SENTENCE_END_PATTERN = Pattern.compile("\\s*(\\S.*?[.!?])(?=\\s+\\p{Lu}|$)", 32);

    /* loaded from: input_file:org/conqat/lib/commons/string/SimpleNLPUtils$SentenceDetails.class */
    public static class SentenceDetails {
        private final String sentence;
        private final int startOffset;
        private final int endOffset;

        public SentenceDetails(String str, int i, int i2) {
            this.sentence = str;
            this.startOffset = i;
            this.endOffset = i2;
        }

        public String getSentence() {
            return this.sentence;
        }

        public int getStartOffset() {
            return this.startOffset;
        }

        public int getEndOffset() {
            return this.endOffset;
        }
    }

    public static List<String> splitIntoWords(String str) {
        return StringUtils.isEmpty(str) ? CollectionUtils.emptyList() : (List) Arrays.stream(WORD_SEPARATION_PATTERN.split(str)).filter(str2 -> {
            return !org.apache.commons.lang3.StringUtils.isBlank(str2);
        }).collect(Collectors.toList());
    }

    public static String removeIgnoredSubstrings(Pattern pattern, String str) {
        if (str == null) {
            return StringUtils.EMPTY_STRING;
        }
        Matcher matcher = pattern.matcher(str);
        StringBuilder sb = new StringBuilder(str);
        while (matcher.find()) {
            int start = matcher.start();
            int end = matcher.end();
            sb.replace(start, end, StringUtils.repeat(StringUtils.SPACE, end - start));
        }
        return sb.toString();
    }

    private static boolean isWordBoundary(char c) {
        return !Character.isLetterOrDigit(c);
    }

    public static boolean hasWordBoundaries(int i, int i2, String str) {
        if (i <= 0 || isWordBoundary(str.charAt(i - 1))) {
            return i + i2 >= str.length() || isWordBoundary(str.charAt(i + i2));
        }
        return false;
    }

    public static List<SentenceDetails> splitIntoSentences(String str) {
        if (StringUtils.isEmpty(str)) {
            return CollectionUtils.emptyList();
        }
        ArrayList arrayList = new ArrayList();
        Matcher matcher = SENTENCE_END_PATTERN.matcher(str);
        while (matcher.find()) {
            arrayList.add(new SentenceDetails(matcher.group(1), matcher.start(1), matcher.end(1) - 1));
        }
        return arrayList;
    }
}
