package ivory.sqe.querygenerator;

import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.JsonPrimitive;
import edu.umd.cloud9.io.map.HMapSFW;
import edu.umd.cloud9.io.pair.PairOfStrings;
import ivory.core.tokenize.Tokenizer;
import ivory.core.tokenize.TokenizerFactory;
import ivory.sqe.retrieval.Constants;
import ivory.sqe.retrieval.PairOfFloatMap;
import ivory.sqe.retrieval.StructuredQuery;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;

/* loaded from: input_file:ivory/sqe/querygenerator/MtNQueryGenerator.class */
public class MtNQueryGenerator implements QueryGenerator {
    private static final Logger LOG = Logger.getLogger(MtNQueryGenerator.class);
    private Tokenizer docLangTokenizer;
    private Tokenizer queryLangTokenizerWithStemming;
    private Tokenizer queryLangTokenizer;
    private int length;
    private int kBest;
    private boolean bigramSegment = false;
    private ProbabilisticStructuredQueryGenerator clGenerator;
    private SCFGQueryGenerator scfgGenerator;
    private float mtWeight;
    private float bitextWeight;
    private float scfgWeight;
    private float tokenWeight;
    private float phraseWeight;
    private float alpha;
    private float lexProbThreshold;
    private String queryLang;
    private String docLang;
    private boolean scaling;
    private Set<String> unknownWords;

    @Override // ivory.sqe.querygenerator.QueryGenerator
    public void init(FileSystem fileSystem, Configuration configuration) throws IOException {
        if (configuration.getBoolean(Constants.Quiet, false)) {
            LOG.setLevel(Level.OFF);
        }
        this.queryLang = configuration.get(Constants.QueryLanguage);
        this.docLang = configuration.get(Constants.DocLanguage);
        LOG.info("Stemmed stopword list file in query-language:" + configuration.get(Constants.StemmedStopwordListQ));
        LOG.info("Stemmed stopword list file in doc-language:" + configuration.get(Constants.StemmedStopwordListD));
        this.tokenWeight = configuration.getFloat(Constants.TokenWeight, 1.0f);
        this.phraseWeight = configuration.getFloat(Constants.PhraseWeight, 0.0f);
        this.alpha = configuration.getFloat(Constants.Alpha, 1.0f);
        this.scaling = configuration.getBoolean(Constants.Scaling, false);
        this.lexProbThreshold = configuration.getFloat(Constants.LexicalProbThreshold, 0.0f);
        String str = configuration.get(Constants.QueryTokenizerData);
        String str2 = configuration.get(Constants.DocTokenizerData);
        this.kBest = configuration.getInt(Constants.KBest, 1);
        LOG.info("K = " + this.kBest);
        this.mtWeight = configuration.getFloat(Constants.MTWeight, 1.0f);
        this.bitextWeight = configuration.getFloat(Constants.BitextWeight, 0.0f);
        this.scfgWeight = configuration.getFloat(Constants.GrammarWeight, 0.0f);
        LOG.info(configuration.get(Constants.MTWeight));
        LOG.info(configuration.get(Constants.BitextWeight));
        LOG.info(configuration.get(Constants.GrammarWeight));
        this.queryLangTokenizer = TokenizerFactory.createTokenizer(fileSystem, configuration, this.queryLang, str, false, null, null, null);
        this.queryLangTokenizerWithStemming = TokenizerFactory.createTokenizer(fileSystem, configuration, this.queryLang, str, true, null, configuration.get(Constants.StemmedStopwordListQ), null);
        this.docLangTokenizer = TokenizerFactory.createTokenizer(fileSystem, configuration, this.docLang, str2, true, null, configuration.get(Constants.StemmedStopwordListD), null);
        this.unknownWords = Utils.readUnknowns(fileSystem, configuration.get(Constants.UNKFile));
        LOG.info("Unknown words = " + this.unknownWords);
        LOG.info("one2many= " + configuration.getInt(Constants.One2Many, 2));
        if (this.clGenerator == null) {
            this.clGenerator = new ProbabilisticStructuredQueryGenerator();
            this.clGenerator.init(fileSystem, configuration);
        }
        if (this.scfgGenerator == null) {
            this.scfgGenerator = new SCFGQueryGenerator();
            this.scfgGenerator.init(fileSystem, configuration);
        }
    }

    @Override // ivory.sqe.querygenerator.QueryGenerator
    public StructuredQuery parseQuery(String str, FileSystem fileSystem, Configuration configuration) {
        HMapSFW translations;
        HMapSFW translations2;
        JsonObject jsonObject = new JsonObject();
        JsonObject jsonObject2 = new JsonObject();
        JsonObject jsonObject3 = new JsonObject();
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        Translation readTranslationsFromNBest = TranslationFactory.readTranslationsFromNBest(str, this.alpha, this.unknownWords, this.queryLangTokenizer, this.queryLangTokenizerWithStemming, this.docLangTokenizer, configuration);
        String originalQuery = readTranslationsFromNBest.getOriginalQuery();
        String str2 = configuration.get(Constants.GrammarPath);
        Map<String, HMapSFW> processGrammar = this.scfgWeight > 0.0f ? this.scfgGenerator.processGrammar(fileSystem, configuration, str2) : null;
        Set<PairOfStrings> processGrammar2 = this.bitextWeight > 0.0f ? this.clGenerator.processGrammar(fileSystem, configuration, str2) : null;
        if (this.mtWeight == 0.0f && this.scfgWeight == 0.0f && this.bitextWeight == 1.0f) {
            return this.clGenerator.parseQuery(String.valueOf(originalQuery) + "||||", fileSystem, configuration);
        }
        String[] processContent = this.queryLangTokenizerWithStemming.processContent(originalQuery);
        Map<String, String> stemMapping = readTranslationsFromNBest.getStemMapping();
        if (this.kBest == 1) {
            if (this.phraseWeight > 0.0f) {
                Iterator it = readTranslationsFromNBest.getPhraseDist().keySet().iterator();
                while (it.hasNext()) {
                    arrayList2.add(Utils.removeBorderStopWords(this.docLangTokenizer, (String) it.next()));
                }
            }
            Iterator<String> it2 = readTranslationsFromNBest.getTargetTokens().iterator();
            while (it2.hasNext()) {
                arrayList.add(it2.next());
            }
            String[] strArr = new String[arrayList2.size()];
            JsonObject jsonObject4 = new JsonObject();
            jsonObject4.add("#combine", Utils.createJsonArray((String[]) arrayList2.toArray(strArr)));
            String[] strArr2 = new String[arrayList.size()];
            JsonObject jsonObject5 = new JsonObject();
            jsonObject5.add("#combine", Utils.createJsonArray((String[]) arrayList.toArray(strArr2)));
            JsonArray jsonArray = new JsonArray();
            jsonArray.add(new JsonPrimitive(Float.valueOf(this.tokenWeight)));
            jsonArray.add(jsonObject5);
            jsonArray.add(new JsonPrimitive(Float.valueOf(this.phraseWeight)));
            jsonArray.add(jsonObject4);
            jsonObject.add("#weight", jsonArray);
        } else {
            if (this.phraseWeight > 0.0f) {
                jsonObject3.add("#weight", Utils.createJsonArrayFromProbabilities(readTranslationsFromNBest.getPhraseDist()));
            }
            JsonArray jsonArray2 = new JsonArray();
            if (this.tokenWeight > 0.0f) {
                for (String str3 : processContent) {
                    HMapSFW distributionOf = readTranslationsFromNBest.getDistributionOf(str3);
                    if (!this.queryLangTokenizerWithStemming.isStopWord(str3)) {
                        LOG.info("Processing " + str3);
                        ArrayList arrayList3 = new ArrayList();
                        if (this.bitextWeight > 0.0f && (translations2 = this.clGenerator.getTranslations(originalQuery.trim(), str3, processGrammar2, stemMapping)) != null && !translations2.isEmpty()) {
                            arrayList3.add(new PairOfFloatMap(translations2, this.bitextWeight));
                        }
                        if (this.scfgWeight > 0.0f && (translations = this.scfgGenerator.getTranslations(originalQuery.trim(), str3, processGrammar, stemMapping)) != null && !translations.isEmpty()) {
                            arrayList3.add(new PairOfFloatMap(translations, this.scfgWeight));
                        }
                        if (this.mtWeight > 0.0f && distributionOf != null && !distributionOf.isEmpty()) {
                            Utils.normalize(distributionOf);
                            arrayList3.add(new PairOfFloatMap(distributionOf, this.mtWeight));
                        }
                        float count = this.scaling ? (1.0f * readTranslationsFromNBest.getSourceTokenCnt().get(str3)) / readTranslationsFromNBest.getCount() : 1.0f;
                        if (arrayList3.size() != 0) {
                            JsonArray createJsonArrayFromProbabilities = arrayList3.size() == 1 ? Utils.createJsonArrayFromProbabilities(Utils.scaleProbMap(this.lexProbThreshold, count, ((PairOfFloatMap) arrayList3.get(0)).getMap())) : Utils.createJsonArrayFromProbabilities(Utils.combineProbMaps(this.lexProbThreshold, count, arrayList3));
                            JsonObject jsonObject6 = new JsonObject();
                            jsonObject6.add("#weight", createJsonArrayFromProbabilities);
                            jsonArray2.add(jsonObject6);
                        }
                    }
                }
                jsonObject2.add("#combine", jsonArray2);
            }
            JsonArray jsonArray3 = new JsonArray();
            if (this.phraseWeight > 0.0f) {
                HMapSFW scaleProbMap = Utils.scaleProbMap(this.lexProbThreshold, this.phraseWeight, readTranslationsFromNBest.getPhraseDist());
                for (String str4 : scaleProbMap.keySet()) {
                    jsonArray3.add(new JsonPrimitive(Float.valueOf(scaleProbMap.get(str4))));
                    jsonArray3.add(new JsonPrimitive(str4));
                }
            }
            if (this.tokenWeight > 0.0f) {
                jsonArray3.add(new JsonPrimitive(Float.valueOf(this.tokenWeight)));
                jsonArray3.add(jsonObject2);
            }
            jsonObject.add("#combweight", jsonArray3);
        }
        return new StructuredQuery(jsonObject, this.length);
    }
}
