package epic.dense;

import breeze.linalg.Counter;
import breeze.linalg.Counter$;
import breeze.storage.Zero$DoubleZero$;
import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import scala.Array$;
import scala.Console$;
import scala.Predef$;
import scala.collection.Iterator;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.immutable.IndexedSeq$;
import scala.collection.immutable.Range;
import scala.collection.immutable.Range$;
import scala.collection.immutable.Set;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.HashMap;
import scala.collection.mutable.StringBuilder;
import scala.io.Codec$;
import scala.io.Source$;
import scala.math.Numeric$IntIsIntegral$;
import scala.math.Ordering$Double$;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.IntRef;
import scala.util.Random;

/* compiled from: Word2Vec.scala */
/* loaded from: input_file:epic/dense/Word2Vec$.class */
public final class Word2Vec$ {
    public static final Word2Vec$ MODULE$ = null;
    private final Pattern hyphenPattern;

    static {
        new Word2Vec$();
    }

    public HashMap<String, float[]> smartLoadVectorsForVocabulary(Seq<String> seq, Set<String> set, Counter<String, Object> counter, int i, boolean z, boolean z2) {
        Seq seq2 = (Seq) seq.map(new Word2Vec$$anonfun$6(set), Seq$.MODULE$.canBuildFrom());
        Seq seq3 = (Seq) seq2.map(new Word2Vec$$anonfun$7(), Seq$.MODULE$.canBuildFrom());
        int min = Math.min(i, BoxesRunTime.unboxToInt(seq3.sum(Numeric$IntIsIntegral$.MODULE$)) + (z ? 1 : 0));
        HashMap<String, float[]> hashMap = new HashMap<>();
        Random random = new Random(0);
        Counter apply = Counter$.MODULE$.apply(Zero$DoubleZero$.MODULE$);
        IntRef create = IntRef.create(0);
        set.foreach(new Word2Vec$$anonfun$smartLoadVectorsForVocabulary$1(counter, z, z2, seq2, seq3, min, hashMap, random, apply, create));
        Predef$.MODULE$.println(new StringBuilder().append("Read embeddings for ").append(BoxesRunTime.boxToInteger(set.size())).append(" words from ").append(BoxesRunTime.boxToInteger(seq.size())).append(" sources, ").append("total embedding size = ").append(BoxesRunTime.boxToInteger(min)).append(", ").append(BoxesRunTime.boxToInteger(create.elem)).append(" present in no source").toString());
        Predef$.MODULE$.println(new StringBuilder().append("Fifty most common misses: ").append(apply.argtopk(50, Ordering$Double$.MODULE$).map(new Word2Vec$$anonfun$smartLoadVectorsForVocabulary$2(apply), IndexedSeq$.MODULE$.canBuildFrom())).toString());
        return hashMap;
    }

    public Counter<String, Object> smartLoadVectorsForVocabulary$default$3() {
        return Counter$.MODULE$.apply(Zero$DoubleZero$.MODULE$);
    }

    public int smartLoadVectorsForVocabulary$default$4() {
        return Integer.MAX_VALUE;
    }

    public boolean smartLoadVectorsForVocabulary$default$6() {
        return true;
    }

    public HashMap<String, float[]> makeRandomVectorsForVocabulary(Set<String> set, int i, boolean z) {
        HashMap<String, float[]> hashMap = new HashMap<>();
        set.foreach(new Word2Vec$$anonfun$makeRandomVectorsForVocabulary$1(z, hashMap, i + (z ? 1 : 0), new Random(0)));
        return hashMap;
    }

    public HashMap<String, float[]> loadVectorsForVocabulary(String str, Set<String> set, boolean z) {
        HashMap<String, float[]> readWord2Vec = readWord2Vec(str, set, z);
        if (readWord2Vec.isEmpty()) {
            throw new RuntimeException("No word2vec vectors loaded");
        }
        return augmentVectorsToCompleteVocabulary(readWord2Vec, set, z);
    }

    public HashMap<String, float[]> loadBansalVectorsForVocabulary(String str, Set<String> set, boolean z) {
        HashMap<String, float[]> readBansalEmbeddings = readBansalEmbeddings(str, set, z);
        if (readBansalEmbeddings.isEmpty()) {
            throw new RuntimeException("No Bansal vectors loaded");
        }
        return augmentVectorsToCompleteVocabulary(readBansalEmbeddings, set, z);
    }

    private HashMap<String, float[]> augmentVectorsToCompleteVocabulary(HashMap<String, float[]> hashMap, Set<String> set, boolean z) {
        set.$minus$minus(hashMap.keySet()).foreach(new Word2Vec$$anonfun$augmentVectorsToCompleteVocabulary$1(hashMap, z, ((float[]) hashMap.values().head()).length, new Random(0)));
        return hashMap;
    }

    public HashMap<String, float[]> readWord2Vec(String str, Set<String> set, boolean z) {
        int i;
        DataInputStream dataInputStream = new DataInputStream(new BufferedInputStream(new FileInputStream(str)));
        HashMap<String, float[]> hashMap = new HashMap<>();
        Predef$ predef$ = Predef$.MODULE$;
        int i2 = new StringOps(Word2VecUtils.readString(dataInputStream)).toInt();
        Predef$ predef$2 = Predef$.MODULE$;
        int i3 = new StringOps(Word2VecUtils.readString(dataInputStream)).toInt();
        Predef$ predef$3 = Predef$.MODULE$;
        Range apply = Range$.MODULE$.apply(0, i2);
        if (!apply.isEmpty()) {
            int start = apply.start();
            while (true) {
                int i4 = start;
                if (i4 % 1000000 == 0) {
                    Predef$ predef$4 = Predef$.MODULE$;
                    Console$.MODULE$.println(new StringBuilder().append("On line ").append(BoxesRunTime.boxToInteger(i4)).toString());
                }
                String readString = Word2VecUtils.readString(dataInputStream);
                float[] fArr = new float[z ? i3 + 1 : i3];
                int i5 = 0;
                while (true) {
                    i = i5;
                    if (i >= i3) {
                        break;
                    }
                    fArr[i] = Word2VecUtils.readFloat(dataInputStream);
                    i5 = i + 1;
                }
                if (z) {
                    fArr[i] = 1.0f;
                }
                if (set.isEmpty() || set.contains(readString)) {
                    hashMap.put(readString, fArr);
                } else {
                    BoxedUnit boxedUnit = BoxedUnit.UNIT;
                }
                if (i4 == apply.lastElement()) {
                    break;
                }
                start = i4 + apply.step();
            }
        }
        Predef$ predef$5 = Predef$.MODULE$;
        Console$.MODULE$.println(new StringBuilder().append("Loaded ").append(BoxesRunTime.boxToInteger(hashMap.size())).append(" word2vec representations out of ").append(BoxesRunTime.boxToInteger(set.size())).append(" attempted words").toString());
        return hashMap;
    }

    public Pattern hyphenPattern() {
        return this.hyphenPattern;
    }

    public String convertWord(String str, boolean z) {
        String replaceAll = str.replace("-LRB-", "(").replace("-RRB-", ")").replace("-LSB-", "[").replace("-RSB-", "]").replace("-LCB-", "{").replace("-RCB-", "}").replaceAll("^-?[0-9,.]{2,15}$", "fifteen");
        Matcher matcher = hyphenPattern().matcher(str);
        String group = matcher.find() ? matcher.group(2) : replaceAll;
        if (z) {
            group = group.toLowerCase();
        }
        return group;
    }

    public boolean convertWord$default$2() {
        return false;
    }

    public HashMap<String, float[]> readBansalEmbeddings(String str, Set<String> set, boolean z) {
        Iterator lines = Source$.MODULE$.fromFile(new File(str), Codec$.MODULE$.fallbackSystemCodec()).getLines();
        HashMap<String, float[]> hashMap = new HashMap<>();
        boolean z2 = true;
        while (true) {
            boolean z3 = z2;
            if (!lines.hasNext()) {
                Predef$.MODULE$.println(new StringBuilder().append("Loaded ").append(BoxesRunTime.boxToInteger(hashMap.size())).append(" Bansal representations out of ").append(BoxesRunTime.boxToInteger(set.size())).append(" attempted words").toString());
                return hashMap;
            }
            String str2 = (String) lines.next();
            if (z3) {
                if (Predef$.MODULE$.refArrayOps(str2.split("\\s+")).size() == 2) {
                    Predef$.MODULE$.println(new StringBuilder().append("Skipping first line: ").append(str2).toString());
                } else {
                    Predef$.MODULE$.println(new StringBuilder().append("Not skipping first line: ").append(str2).toString());
                    z3 = false;
                }
            }
            if (z3) {
                BoxedUnit boxedUnit = BoxedUnit.UNIT;
            } else if (str2.contains("\t")) {
                String substring = str2.substring(0, str2.indexOf("\t"));
                if (set.isEmpty() || set.contains(substring)) {
                    String[] split = str2.substring(str2.indexOf("\t") + 1).split(" ");
                    hashMap.put(substring, (float[]) Array$.MODULE$.tabulate(z ? Predef$.MODULE$.refArrayOps(split).size() + 1 : Predef$.MODULE$.refArrayOps(split).size(), new Word2Vec$$anonfun$4(z, split), ClassTag$.MODULE$.Float()));
                } else {
                    BoxedUnit boxedUnit2 = BoxedUnit.UNIT;
                }
            } else {
                String substring2 = str2.substring(0, str2.indexOf(" "));
                if (set.isEmpty() || set.contains(substring2)) {
                    String[] split2 = str2.substring(str2.indexOf(" ") + 1).split(" ");
                    hashMap.put(substring2, (float[]) Array$.MODULE$.tabulate(z ? Predef$.MODULE$.refArrayOps(split2).size() + 1 : Predef$.MODULE$.refArrayOps(split2).size(), new Word2Vec$$anonfun$5(z, split2), ClassTag$.MODULE$.Float()));
                } else {
                    BoxedUnit boxedUnit3 = BoxedUnit.UNIT;
                }
            }
            z2 = false;
        }
    }

    private Word2Vec$() {
        MODULE$ = this;
        this.hyphenPattern = Pattern.compile("(\\w+-)+(\\w+)");
    }
}
