package com.yahoo.language.wordpiece;

import com.yahoo.collections.Tuple2;
import com.yahoo.language.Language;
import com.yahoo.language.process.StemMode;
import com.yahoo.language.process.Token;
import com.yahoo.language.process.Tokenizer;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.TreeMap;

/* JADX INFO: Access modifiers changed from: package-private */
/* loaded from: input_file:com/yahoo/language/wordpiece/Model.class */
public class Model {
    private final String subwordPrefix;
    private final Path source;
    private final Language language;
    private final NavigableMap<String, Integer> vocabulary = new TreeMap(Collections.reverseOrder());
    private final Map<Integer, String> tokenId2Token = new HashMap();

    /* JADX INFO: Access modifiers changed from: package-private */
    public Model(String str, Language language, Path path) {
        this.subwordPrefix = str;
        this.source = path;
        this.language = language;
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(path.toFile()), StandardCharsets.UTF_8));
            int i = 0;
            while (true) {
                try {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        bufferedReader.close();
                        return;
                    } else {
                        this.vocabulary.put(readLine, Integer.valueOf(i));
                        this.tokenId2Token.put(Integer.valueOf(i), readLine);
                        i++;
                    }
                } finally {
                }
            }
        } catch (IOException e) {
            throw new IllegalArgumentException("Could not read a WordPiece model from " + path, e);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public Language language() {
        return this.language;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public List<Integer> embed(String str, Tokenizer tokenizer) {
        Tuple2<String, Integer> findLongestSubstring;
        ArrayList arrayList = new ArrayList();
        Iterator it = tokenizer.tokenize(str.toLowerCase(), this.language, StemMode.NONE, true).iterator();
        while (it.hasNext()) {
            String tokenString = ((Token) it.next()).getTokenString();
            String str2 = tokenString;
            int i = 0;
            while (str2.length() > 0 && !str2.equals(this.subwordPrefix) && (findLongestSubstring = findLongestSubstring(str2)) != null) {
                arrayList.add((Integer) findLongestSubstring.second);
                str2 = this.subwordPrefix + str2.substring(((String) findLongestSubstring.first).length());
                int i2 = i;
                i++;
                if (i2 > tokenString.length()) {
                    break;
                }
            }
        }
        return arrayList;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public List<String> segment(String str, Tokenizer tokenizer) {
        return embed(str, tokenizer).stream().map(num -> {
            return this.tokenId2Token.get(num);
        }).toList();
    }

    private Tuple2<String, Integer> findLongestSubstring(String str) {
        NavigableMap<String, Integer> tailMap = this.vocabulary.tailMap(str, true);
        if (tailMap.isEmpty()) {
            return null;
        }
        String firstKey = tailMap.firstKey();
        Integer value = tailMap.firstEntry().getValue();
        int min = Math.min(str.length(), firstKey.length());
        while (!str.startsWith(firstKey)) {
            min--;
            tailMap = tailMap.tailMap(str.substring(0, min), true);
            if (tailMap.isEmpty()) {
                return null;
            }
            firstKey = tailMap.firstKey();
            value = tailMap.firstEntry().getValue();
        }
        return new Tuple2<>(firstKey, value);
    }

    public String toString() {
        return "WordPiece model for " + this.language + ": '" + this.source + "'";
    }
}
