package com.fnklabs.draenei.analytics;

import com.codahale.metrics.Timer;
import com.fnklabs.draenei.MetricsFactory;
import com.fnklabs.draenei.analytics.MorphologyFactory;
import com.google.common.util.concurrent.Futures;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.morphology.WrongCharaterException;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* JADX INFO: Access modifiers changed from: package-private */
/* loaded from: input_file:com/fnklabs/draenei/analytics/TextUtils.class */
public class TextUtils {
    private static final Logger LOGGER = LoggerFactory.getLogger(TextUtils.class);
    private static final Set<Character> SPECIAL_CHARACTERS = new HashSet();
    private final MorphologyFactory morphologyFactory;
    private final MetricsFactory metricsFactory;
    private final StopWordsDao stopWordsDao;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/fnklabs/draenei/analytics/TextUtils$MetricsType.class */
    public enum MetricsType implements MetricsFactory.Type {
        TEXT_UTILS_TOKENIZE_TEXT,
        TEXT_UTILS_IS_NORMAL_WORD,
        TEXT_UTILS_FIND_ONE,
        TEXT_UTILS_SPLIT,
        TEXT_UTILS_FILTER_WORDS,
        TEXT_UTILS_GET_MORPH_INFO,
        TEXT_UTILS_GET_NORMAL_FORMS,
        TEXT_UTILS_IS_STOP_WORD,
        TEXT_UTILS_ANALYZE_TEXT
    }

    public TextUtils(MorphologyFactory morphologyFactory, StopWordsDao stopWordsDao, MetricsFactory metricsFactory) throws IOException {
        this.morphologyFactory = morphologyFactory;
        this.stopWordsDao = stopWordsDao;
        this.metricsFactory = metricsFactory;
    }

    @NotNull
    public Set<String> getNormalForms(String str, MorphologyFactory.Language language) {
        Timer.Context time = this.metricsFactory.getTimer(MetricsType.TEXT_UTILS_GET_NORMAL_FORMS).time();
        HashSet hashSet = new HashSet();
        try {
            hashSet.addAll(this.morphologyFactory.getMorphology(language).getNormalForms(str));
        } catch (WrongCharaterException e) {
            LOGGER.warn("Cant get normal form of word", e);
        }
        time.stop();
        return hashSet;
    }

    public List<String> tokenizeText(String str, MorphologyFactory.Language language) {
        Timer.Context time = this.metricsFactory.getTimer(MetricsType.TEXT_UTILS_TOKENIZE_TEXT).time();
        if (StringUtils.isEmpty(str)) {
            time.stop();
            return new ArrayList();
        }
        List<String> splitText = splitText(str, language);
        time.stop();
        return splitText;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean isNormalWord(String str, MorphologyFactory.Language language) {
        Timer.Context time = this.metricsFactory.getTimer(MetricsType.TEXT_UTILS_IS_NORMAL_WORD).time();
        try {
            try {
                if (StringUtils.isEmpty(str) || StringUtils.startsWith(str, "http://") || StringUtils.startsWith(str, "https://")) {
                    time.stop();
                    return false;
                }
                Timer.Context time2 = this.metricsFactory.getTimer(MetricsType.TEXT_UTILS_GET_MORPH_INFO).time();
                List morphInfo = this.morphologyFactory.getMorphology(language).getMorphInfo(str);
                time2.stop();
                if (!StringUtils.contains((CharSequence) morphInfo.get(0), "МЕЖД") && !StringUtils.contains((CharSequence) morphInfo.get(0), "ПРЕДЛ") && !StringUtils.contains((CharSequence) morphInfo.get(0), "ЧАСТ") && !StringUtils.contains((CharSequence) morphInfo.get(0), "МС") && !StringUtils.contains((CharSequence) morphInfo.get(0), "СОЮЗ") && !StringUtils.contains((CharSequence) morphInfo.get(0), "ARTICLE") && !StringUtils.contains((CharSequence) morphInfo.get(0), "PREP") && !StringUtils.contains((CharSequence) morphInfo.get(0), "PN")) {
                    if (!StringUtils.contains((CharSequence) morphInfo.get(0), "CONJ")) {
                        time.stop();
                        return true;
                    }
                }
                time.stop();
                return false;
            } catch (Exception e) {
                LOGGER.warn("Can't get morph info: {" + str + "} ", e);
                time.stop();
                return true;
            }
        } catch (Throwable th) {
            time.stop();
            throw th;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean isStopWord(@NotNull String str) {
        Timer.Context time = this.metricsFactory.getTimer(MetricsType.TEXT_UTILS_IS_STOP_WORD).time();
        StopWord stopWord = (StopWord) Futures.getUnchecked(this.stopWordsDao.findOneAsync(str));
        time.stop();
        return stopWord != null;
    }

    @NotNull
    private List<String> splitText(String str, MorphologyFactory.Language language) {
        Timer.Context time = this.metricsFactory.getTimer(MetricsType.TEXT_UTILS_SPLIT).time();
        ArrayList arrayList = new ArrayList();
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < str.length(); i++) {
            char charAt = str.charAt(i);
            if (!SPECIAL_CHARACTERS.contains(Character.valueOf(charAt))) {
                sb.append(charAt);
            } else if (sb.length() > 0) {
                String sb2 = sb.toString();
                if (isNormalWord(sb2, language) && !isStopWord(sb2) && StringUtils.length(sb2) > 1) {
                    arrayList.add(sb2.toLowerCase());
                }
                sb.setLength(0);
            }
        }
        if (sb.length() > 0) {
            String sb3 = sb.toString();
            if (isNormalWord(sb3, language)) {
                arrayList.add(sb3.toLowerCase());
            }
        }
        time.stop();
        return arrayList;
    }

    @Nullable
    private static String cleanText(String str) {
        String[] strArr = {"\"", "'", ",", " ", ".", "#", "(", ")", "!", "?"};
        String[] strArr2 = new String[strArr.length];
        for (int i = 0; i < strArr2.length; i++) {
            strArr2[i] = "";
        }
        return StringUtils.replaceEach(str, strArr, strArr2);
    }

    static {
        for (char c : "/*!@#$%^&*()\\\"{}_:– -[]|\\\\?/<>,.«»—=\r\n\t".toCharArray()) {
            SPECIAL_CHARACTERS.add(Character.valueOf(c));
        }
    }
}
