package org.snu.ids.kkma.index;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import javax.swing.JLabel;
import javax.swing.JProgressBar;
import org.snu.ids.kkma.constants.POSTag;
import org.snu.ids.kkma.ma.CharSetType;
import org.snu.ids.kkma.ma.MCandidate;
import org.snu.ids.kkma.ma.MExpression;
import org.snu.ids.kkma.ma.Morpheme;
import org.snu.ids.kkma.ma.MorphemeAnalyzer;
import org.snu.ids.kkma.ma.Token;
import org.snu.ids.kkma.ma.Tokenizer;
import org.snu.ids.kkma.util.StringSet;
import org.snu.ids.kkma.util.Util;

/* loaded from: input_file:org/snu/ids/kkma/index/KeywordExtractor.class */
public class KeywordExtractor extends MorphemeAnalyzer {
    static WordDic UOMDic;
    static WordDic ChemFormulaDic;
    static WordDic CompNounDic;
    static WordDic VerbNounDic;
    static WordDic JunkWordDic;
    static WordDic VerbJunkWordDic;
    static final int MAX_UOM_SIZE = 7;
    public static final String STD_UOM_CONNECTOR = "*";
    public static final StringSet MULTIPLYERS = new StringSet(new String[]{STD_UOM_CONNECTOR, "x", "X", "×", "Ⅹ"});
    public static final StringSet RANGE_INDICATOR = new StringSet(new String[]{"-", "±", MCandidate.DLMT_BCL, "+"});

    public KeywordList extractKeyword(JProgressBar jProgressBar, JLabel jLabel, String str, boolean z) {
        KeywordList keywordList = null;
        int i = 0;
        String[] split = str.split("\n");
        if (jProgressBar != null) {
            jProgressBar.setIndeterminate(false);
            jProgressBar.setMaximum(split.length);
            jProgressBar.setStringPainted(true);
            jLabel.setText("0");
        }
        int length = split.length;
        for (int i2 = 0; i2 < length; i2++) {
            String str2 = split[i2];
            if (Util.valid(str2)) {
                KeywordList extractKeyword = extractKeyword(str2, z);
                if (i > 0) {
                    int size = extractKeyword.size();
                    for (int i3 = 0; i3 < size; i3++) {
                        Keyword keyword = extractKeyword.get(i3);
                        keyword.setIndex(i + keyword.getIndex());
                    }
                }
                if (extractKeyword != null && extractKeyword.size() > 0) {
                    if (keywordList == null) {
                        keywordList = new KeywordList(extractKeyword);
                    } else {
                        keywordList.addAll((List<Keyword>) extractKeyword);
                    }
                }
            }
            if (jProgressBar != null) {
                jProgressBar.setValue(i2 + 1);
                jLabel.setText((i2 + 1) + "");
            }
            i += str2.length() + 1;
        }
        if (jProgressBar != null) {
            jProgressBar.setStringPainted(false);
        }
        return keywordList;
    }

    public KeywordList extractKeyword(String str, boolean z) {
        String[] compNoun;
        ArrayList arrayList = new ArrayList();
        try {
            List<MExpression> leaveJustBest = leaveJustBest(postProcess(analyze(str)));
            Morpheme morpheme = null;
            ArrayList arrayList2 = new ArrayList();
            int size = leaveJustBest == null ? 0 : leaveJustBest.size();
            for (int i = 0; i < size; i++) {
                MExpression mExpression = leaveJustBest.get(i);
                MCandidate mCandidate = mExpression.get(0);
                int size2 = mCandidate.size();
                if (size2 == 1) {
                    morpheme = (Morpheme) mCandidate.get(0);
                    morpheme.setString(mExpression.getExp());
                    arrayList2.add(morpheme);
                } else {
                    for (int i2 = 0; i2 < size2; i2++) {
                        arrayList2.add(mCandidate.get(i2));
                    }
                }
            }
            int size3 = arrayList2.size() - 1;
            while (size3 > 0) {
                for (int max = Math.max(size3 - MAX_UOM_SIZE, 0); max < size3; max++) {
                    String str2 = "";
                    for (int i3 = max; i3 <= size3; i3++) {
                        str2 = str2 + ((Morpheme) arrayList2.get(i3)).getString();
                    }
                    if (UOMDic.contains(str2)) {
                        while (max < size3) {
                            arrayList2.remove(max + 1);
                            size3--;
                        }
                        morpheme = (Morpheme) arrayList2.get(max);
                        morpheme.setString(str2);
                        morpheme.setCharSet(CharSetType.COMBINED);
                        morpheme.setTag(POSTag.NNM);
                    } else if (ChemFormulaDic.contains(str2)) {
                        while (max < size3) {
                            arrayList2.remove(max + 1);
                            size3--;
                        }
                        morpheme = (Morpheme) arrayList2.get(max);
                        morpheme.setString(str2);
                        morpheme.setCharSet(CharSetType.COMBINED);
                        morpheme.setTag(POSTag.UN);
                    } else if (CompNounDic.contains(str2)) {
                        while (max < size3) {
                            arrayList2.remove(max + 1);
                            size3--;
                        }
                        if (!JunkWordDic.contains(str2)) {
                            morpheme = (Morpheme) arrayList2.get(max);
                            morpheme.setString(str2);
                            morpheme.setCharSet(CharSetType.COMBINED);
                            morpheme.setTag(POSTag.NNG);
                            morpheme.setComposed(true);
                        }
                    }
                }
                size3--;
            }
            int size4 = arrayList2.size();
            for (int i4 = 0; i4 < size4; i4++) {
                morpheme = (Morpheme) arrayList2.get(i4);
                morpheme.setString(morpheme.getString().toLowerCase());
                if ((!z || morpheme.isTagOf(POSTag.N)) && !JunkWordDic.contains(morpheme.getString())) {
                    if (morpheme.isTagOf(POSTag.UN) && morpheme.getCharSet() == CharSetType.ENGLISH) {
                        Keyword keyword = new Keyword(morpheme);
                        keyword.setString(keyword.getString().toLowerCase());
                        arrayList.add(keyword);
                    } else if (morpheme.isTagOf(POSTag.V)) {
                        String string = morpheme.getString();
                        int length = string.length();
                        char charAt = string.charAt(length - 1);
                        if (length > 2 && (charAt == 54616 || charAt == 46104)) {
                            WordDic wordDic = VerbNounDic;
                            String substring = string.substring(0, length - 1);
                            if (wordDic.contains(substring)) {
                                Keyword keyword2 = new Keyword(morpheme);
                                keyword2.setString(substring);
                                keyword2.setTag(POSTag.NNG);
                                arrayList.add(keyword2);
                            }
                        }
                        arrayList.add(new Keyword(morpheme));
                    } else {
                        if (morpheme.isTagOf(POSTag.NP)) {
                        }
                        arrayList.add(new Keyword(morpheme));
                    }
                }
            }
            int i5 = 0;
            int size5 = arrayList2.size();
            while (i5 < size5) {
                Morpheme morpheme2 = (Morpheme) arrayList2.get(i5);
                int i6 = 0;
                if (i5 + 1 < size5 && morpheme2.isTagOf(POSTag.NN)) {
                    Morpheme morpheme3 = (Morpheme) arrayList2.get(i5 + 1);
                    if (morpheme3.isTagOf(POSTag.NN) && morpheme2.getIndex() + morpheme2.getString().length() == morpheme3.getIndex()) {
                        if (i5 + 2 < size5) {
                            Morpheme morpheme4 = (Morpheme) arrayList2.get(i5 + 2);
                            if (morpheme4.isTagOf(POSTag.NN) && morpheme3.getIndex() + morpheme3.getString().length() == morpheme4.getIndex()) {
                                if (i5 + 3 < size5) {
                                    Morpheme morpheme5 = (Morpheme) arrayList2.get(i5 + 3);
                                    if (morpheme5.isTagOf(POSTag.NN) && morpheme4.getIndex() + morpheme4.getString().length() == morpheme5.getIndex()) {
                                        Keyword keyword3 = new Keyword(morpheme2);
                                        keyword3.setComposed(true);
                                        keyword3.setString(morpheme2.getString() + morpheme3.getString() + morpheme4.getString() + morpheme5.getString());
                                        arrayList.add(keyword3);
                                        i6 = 0 + 1;
                                        i6++;
                                        i6++;
                                    }
                                }
                                Keyword keyword4 = new Keyword(morpheme2);
                                keyword4.setComposed(true);
                                keyword4.setString(morpheme2.getString() + morpheme3.getString() + morpheme4.getString());
                                arrayList.add(keyword4);
                                i6++;
                                i6++;
                            }
                        }
                        Keyword keyword5 = new Keyword(morpheme2);
                        keyword5.setComposed(true);
                        keyword5.setString(morpheme2.getString() + morpheme3.getString());
                        arrayList.add(keyword5);
                        i6++;
                    }
                }
                i5 = i5 + i6 + 1;
            }
            int i7 = 0;
            while (i7 < arrayList.size()) {
                if (((Keyword) arrayList.get(i7)).isTagOf(POSTag.XP | POSTag.XS | POSTag.VX) || JunkWordDic.contains(morpheme.getString())) {
                    arrayList.remove(i7);
                    i7--;
                }
                i7++;
            }
            ArrayList arrayList3 = new ArrayList();
            int size6 = arrayList.size();
            for (int i8 = 0; i8 < size6; i8++) {
                Keyword keyword6 = (Keyword) arrayList.get(i8);
                if (keyword6.isComposed() && (compNoun = this.dic.getCompNoun(keyword6.getString())) != null) {
                    int i9 = 0;
                    int length2 = compNoun.length;
                    for (int i10 = 0; i10 < length2; i10++) {
                        if (!JunkWordDic.contains(compNoun[i10])) {
                            Keyword keyword7 = new Keyword(keyword6);
                            keyword7.setVocTag("E");
                            keyword7.setString(compNoun[i10]);
                            keyword7.setComposed(false);
                            keyword7.setIndex(keyword6.getIndex() + i9);
                            i9 += keyword7.getString().length();
                            arrayList3.add(keyword7);
                        }
                    }
                }
            }
            arrayList.addAll(arrayList3);
            Collections.sort(arrayList, new Comparator<Keyword>() { // from class: org.snu.ids.kkma.index.KeywordExtractor.1
                @Override // java.util.Comparator
                public int compare(Keyword keyword8, Keyword keyword9) {
                    return keyword8.getIndex() == keyword9.getIndex() ? keyword8.getString().length() - keyword9.getString().length() : keyword8.getIndex() - keyword9.getIndex();
                }
            });
        } catch (Exception e) {
            System.err.println(str);
            e.printStackTrace();
        }
        return new KeywordList(arrayList);
    }

    public KeywordList removeJunkWord(KeywordList keywordList) {
        for (int i = 0; i < (keywordList == null ? 0 : keywordList.size()); i++) {
        }
        return keywordList;
    }

    public Keyword getCompositeNoun(MCandidate mCandidate) {
        Keyword keyword = null;
        if (mCandidate == null || mCandidate.size() < 2) {
            return null;
        }
        int i = 0;
        for (int i2 = 0; i2 < mCandidate.size(); i2++) {
            Morpheme morpheme = (Morpheme) mCandidate.get(i2);
            if (!morpheme.isTagOf(POSTag.NN)) {
                if (keyword != null && i > 1) {
                    return keyword;
                }
                i = 0;
            } else if (keyword == null) {
                keyword = new Keyword(morpheme);
                keyword.setComposed(true);
                i++;
            } else {
                if (i == 0) {
                    return null;
                }
                keyword.setString(keyword.getString() + morpheme.getString());
                i++;
            }
        }
        if (i == 0) {
            return null;
        }
        return keyword;
    }

    public static String getFormatedUOMValues(String str) {
        String str2 = "";
        List<Token> list = Tokenizer.tokenize(str);
        for (int i = 0; i < list.size(); i++) {
            Token token = list.get(i);
            if (token.isCharSetOf(CharSetType.NUMBER)) {
                str2 = str2 + token.getString();
            } else if (isUOMConnector(token.getString())) {
                str2 = str2 + STD_UOM_CONNECTOR;
            } else if (!token.getString().equals(" ") && !token.getString().equals("\t")) {
                str2 = str2 + token.getString();
            }
        }
        return str2;
    }

    private static boolean isUOMConnector(String str) {
        return MULTIPLYERS.contains(str);
    }

    private static boolean isUOMConnector2(String str) {
        return MULTIPLYERS.contains(str) || RANGE_INDICATOR.contains(str);
    }

    public static void main(String[] strArr) {
        KeywordList extractKeyword = new KeywordExtractor().extractKeyword("문서 엔터티의 개념이 명확하지 못하다. 즉, 문서 엔터티에 저장되는 단위개체인 문서가 다른 부서로 발신을 하면 다른 문서가 되는 것인지 수정을 할 때는 문서가 새로 생성되지 않는 것인지, 혹은 결재선으로 발신하면 문서가 그대로 있는 것인지 등에 대한 명확한 정의가 없다. 개발 담당자 마저도 이러한 개념을 명확히 설명하지 못하고 있다.\n사용노즐 : Variojet 045\n작동압력 : 10∼135 bar\n최대압력 : 150 bar\n물토출량 : 1400 rpm 11 L/min\n물흡입허용최고온도 : 70 ℃\n최대물흡입높이 : 2.5 m\n소비전력(시작) : 3.1 kW\n소비전력(정상작동) : 2.3 kW\n크기 : 350×330×900 mm\n무게 : 32 kg\n세제흡입가능 HClO4 ClO4 KClO4 CH3OC6H4OH H2(SO4)2", false);
        int size = extractKeyword == null ? 0 : extractKeyword.size();
        for (int i = 0; i < size; i++) {
            System.out.println(i + "\t" + extractKeyword.get(i));
        }
    }

    static {
        UOMDic = null;
        ChemFormulaDic = null;
        CompNounDic = null;
        VerbNounDic = null;
        JunkWordDic = null;
        VerbJunkWordDic = null;
        UOMDic = new WordDic("/dic/ecat/UOM.dic");
        ChemFormulaDic = new WordDic("/dic/ecat/ChemFormula.dic");
        CompNounDic = new WordDic("/dic/ecat/CompNoun.dic");
        VerbNounDic = new WordDic("/dic/ecat/VerbNoun.dic");
        JunkWordDic = new WordDic("/dic/ecat/JunkWord.dic");
        VerbJunkWordDic = new WordDic("/dic/ecat/VerbJunkWord.dic");
    }
}
