package ivory.sqe.querygenerator;

import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.JsonPrimitive;
import edu.umd.cloud9.io.map.HMapSFW;
import edu.umd.cloud9.util.map.MapKF;
import ivory.core.ConfigurationException;
import ivory.core.RetrievalEnvironment;
import ivory.core.tokenize.BigramChineseTokenizer;
import ivory.core.tokenize.Tokenizer;
import ivory.core.tokenize.TokenizerFactory;
import ivory.sqe.retrieval.Constants;
import ivory.sqe.retrieval.StructuredQuery;
import java.io.IOException;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.log4j.Logger;

/* loaded from: input_file:ivory/sqe/querygenerator/SCFGQueryGenerator.class */
public class SCFGQueryGenerator implements QueryGenerator {
    private static final Logger LOG = Logger.getLogger(SCFGQueryGenerator.class);
    private Tokenizer queryLangTokenizer;
    private Tokenizer docLangTokenizer;
    private Tokenizer bigramTokenizer;
    private Map<String, HMapSFW> probMap;
    private int length;
    private int numTransPerToken;
    private boolean bigramSegment;
    private RetrievalEnvironment env;
    private String queryLang;
    private String docLang;

    @Override // ivory.sqe.querygenerator.QueryGenerator
    public void init(FileSystem fileSystem, Configuration configuration) throws IOException {
        LOG.info(configuration.get(Constants.DocLanguage));
        LOG.info(configuration.get(Constants.DocTokenizerData));
        LOG.info(configuration.get(Constants.MinWindow));
        LOG.info(configuration.get(Constants.MaxWindow));
        LOG.info(configuration.get(Constants.SCFGWeight));
        LOG.info("Stemmed stopword list file in query-language:" + configuration.get(Constants.StemmedStopwordListQ));
        LOG.info("Stemmed stopword list file in doc-language:" + configuration.get(Constants.StemmedStopwordListD));
        this.numTransPerToken = configuration.getInt(Constants.NumTransPerToken, Integer.MAX_VALUE);
        this.queryLang = configuration.get(Constants.QueryLanguage);
        this.docLang = configuration.get(Constants.DocLanguage);
        float f = configuration.getFloat(Constants.LexicalProbThreshold, 0.0f);
        float f2 = configuration.getFloat(Constants.CumulativeProbThreshold, 1.0f);
        String str = configuration.get(Constants.BigramSegment);
        this.bigramSegment = str != null && str.equals("on");
        if (this.bigramSegment) {
            this.bigramTokenizer = new BigramChineseTokenizer();
        }
        LOG.info("Bigram segmentation = " + this.bigramSegment);
        try {
            this.env = new RetrievalEnvironment(configuration.get("index"), fileSystem);
            this.env.initialize(true);
        } catch (ConfigurationException e) {
            e.printStackTrace();
        }
        this.queryLangTokenizer = TokenizerFactory.createTokenizer(fileSystem, configuration, configuration.get(Constants.QueryLanguage), configuration.get(Constants.QueryTokenizerData), false, null, null, null);
        this.docLangTokenizer = TokenizerFactory.createTokenizer(fileSystem, configuration, this.docLang, configuration.get(Constants.DocTokenizerData), true, null, configuration.get(Constants.StemmedStopwordListD), null);
        this.probMap = Utils.generateTranslationTable(fileSystem, configuration, this.docLangTokenizer);
        Utils.normalize(this.probMap, f, f2, 30);
    }

    @Override // ivory.sqe.querygenerator.QueryGenerator
    public StructuredQuery parseQuery(String str) {
        JsonObject jsonObject = new JsonObject();
        String[] split = str.trim().split("\\s");
        this.length = split.length;
        JsonArray jsonArray = new JsonArray();
        for (String str2 : split) {
            if (this.numTransPerToken != 1 || this.bigramSegment) {
                JsonObject jsonObject2 = new JsonObject();
                JsonArray createJsonArrayFromProbabilities = Utils.createJsonArrayFromProbabilities(getTranslations(str2, null));
                if (createJsonArrayFromProbabilities != null) {
                    jsonObject2.add("#weight", createJsonArrayFromProbabilities);
                    jsonArray.add(jsonObject2);
                }
            } else {
                String bestTranslation = getBestTranslation(str2);
                if (bestTranslation != null) {
                    jsonArray.add(new JsonPrimitive(bestTranslation));
                }
            }
        }
        jsonObject.add("#combine", jsonArray);
        return new StructuredQuery(jsonObject, this.length);
    }

    private String getBestTranslation(String str) {
        HMapSFW hMapSFW = this.probMap.get(str);
        if (hMapSFW == null) {
            return str;
        }
        float f = 0.0f;
        String str2 = null;
        for (MapKF.Entry entry : hMapSFW.entrySet()) {
            if (entry.getValue() > f) {
                f = entry.getValue();
                str2 = (String) entry.getKey();
            }
        }
        return str2;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public HMapSFW getTranslations(String str, Map<String, String> map) {
        HMapSFW hMapSFW = this.probMap.get(str);
        if (hMapSFW != null) {
            return hMapSFW;
        }
        HMapSFW hMapSFW2 = new HMapSFW();
        hMapSFW2.put((map == null || map.get(str) == null) ? str : map.get(str), 1.0f);
        return hMapSFW2;
    }
}
