package com.shikhir.lsh.shingling;

import com.shikhir.hash.MurmurHash;
import com.shikhir.util.stringops.StringOperations;
import java.util.Arrays;
import java.util.Iterator;
import java.util.TreeSet;
import opennlp.tools.ngram.NGramModel;
import opennlp.tools.tokenize.SimpleTokenizer;
import opennlp.tools.util.StringList;

/* loaded from: input_file:com/shikhir/lsh/shingling/ShinglingSet.class */
public class ShinglingSet {
    private TreeSet<Integer> shinglingSet = new TreeSet<>();
    private static final int CJK_NGRAM = 3;

    public ShinglingSet() {
    }

    ShinglingSet(String str, int i, int i2) {
        addShingling(str, i, i2);
    }

    public static Integer[] getTokensForMessage(String str, int i, int i2) {
        TreeSet treeSet = new TreeSet();
        NGramModel nGramModel = new NGramModel();
        if (StringOperations.countCJKCharecters(str) > 0) {
            nGramModel.add(str, CJK_NGRAM, CJK_NGRAM);
        } else {
            nGramModel.add(new StringList(SimpleTokenizer.INSTANCE.tokenize(str.toLowerCase())), i, i2);
        }
        Iterator it = nGramModel.iterator();
        while (it.hasNext()) {
            treeSet.add(new Integer(MurmurHash.hash32(((StringList) it.next()).toString())));
        }
        Object[] array = treeSet.toArray();
        return (Integer[]) Arrays.copyOf(array, array.length, Integer[].class);
    }

    public void addShingling(String str, int i, int i2) {
        for (Integer num : getTokensForMessage(str, i, i2)) {
            this.shinglingSet.add(num);
        }
    }

    public int size() {
        return this.shinglingSet.size();
    }

    public boolean contains(Integer num) {
        return this.shinglingSet.contains(num);
    }

    public static boolean hasCjk(String str) {
        return false;
    }

    public Integer[] getAllTokens() {
        Integer[] numArr = new Integer[this.shinglingSet.size()];
        Iterator<Integer> it = this.shinglingSet.iterator();
        for (int i = 0; it.hasNext() && i < numArr.length; i++) {
            numArr[i] = it.next();
        }
        return numArr;
    }

    public Integer[] subset(int i) {
        Integer[] numArr = new Integer[i];
        Iterator<Integer> it = this.shinglingSet.iterator();
        for (int i2 = 0; it.hasNext() && i2 < i; i2++) {
            numArr[i2] = it.next();
        }
        return numArr;
    }
}
