package ivory.sqe.querygenerator;

import com.google.gson.JsonObject;
import ivory.core.tokenize.BigramChineseTokenizer;
import ivory.core.tokenize.GalagoTokenizer;
import ivory.core.tokenize.Tokenizer;
import ivory.core.tokenize.TokenizerFactory;
import ivory.sqe.retrieval.Constants;
import ivory.sqe.retrieval.StructuredQuery;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;

/* loaded from: input_file:ivory/sqe/querygenerator/BagOfWordsQueryGenerator.class */
public class BagOfWordsQueryGenerator implements QueryGenerator {
    private static final Logger LOG = Logger.getLogger(ProbabilisticStructuredQueryGenerator.class);
    Tokenizer tokenizer;

    @Override // ivory.sqe.querygenerator.QueryGenerator
    public void init(FileSystem fileSystem, Configuration configuration) throws IOException {
        if (configuration.getBoolean(Constants.Quiet, false)) {
            LOG.setLevel(Level.OFF);
        }
        String str = configuration.get(Constants.DocLanguage);
        String str2 = configuration.get(Constants.DocTokenizerData);
        if (str.equals(Constants.English)) {
            if (fileSystem.exists(new Path(str2))) {
                this.tokenizer = TokenizerFactory.createTokenizer(str, str2, true);
                return;
            } else {
                LOG.info("Tokenizer path " + str2 + " doesn't exist -- using GalagoTokenizer");
                this.tokenizer = new GalagoTokenizer();
                return;
            }
        }
        if (str.equals(Constants.German)) {
            this.tokenizer = TokenizerFactory.createTokenizer(str, str2, true);
            return;
        }
        if (!str.equals(Constants.Chinese)) {
            throw new RuntimeException("DocLanguage code " + str + " not known");
        }
        if (fileSystem.exists(new Path(str2))) {
            this.tokenizer = TokenizerFactory.createTokenizer(str, str2, true);
        } else {
            LOG.info("Tokenizer path " + str2 + " doesn't exist -- using BigramChineseTokenizer");
            this.tokenizer = new BigramChineseTokenizer();
        }
    }

    @Override // ivory.sqe.querygenerator.QueryGenerator
    public StructuredQuery parseQuery(String str) {
        String[] processContent = this.tokenizer.processContent(str.trim());
        int length = processContent.length;
        JsonObject jsonObject = new JsonObject();
        jsonObject.add("#combine", Utils.createJsonArray(processContent));
        return new StructuredQuery(jsonObject, length);
    }
}
