package ivory.ffg.preprocessing;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import edu.umd.cloud9.util.map.HMapII;
import edu.umd.cloud9.util.map.HMapIV;
import ivory.bloomir.util.DocumentUtility;
import ivory.bloomir.util.OptionManager;
import ivory.bloomir.util.QueryUtility;
import ivory.core.RetrievalEnvironment;
import ivory.core.data.index.Posting;
import ivory.core.data.index.PostingsList;
import ivory.core.data.index.PostingsReader;
import ivory.core.data.index.TermPositions;
import ivory.core.data.stat.SpamPercentileScore;
import ivory.ffg.data.CompressedPositionalPostings;
import java.io.DataOutput;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;

/* loaded from: input_file:ivory/ffg/preprocessing/GenerateCompressedPositionalPostings.class */
public class GenerateCompressedPositionalPostings {
    private static final Logger LOGGER = Logger.getLogger(GenerateCompressedPositionalPostings.class);

    public static void main(String[] strArr) throws Exception {
        OptionManager optionManager = new OptionManager(GenerateCompressedPositionalPostings.class.getName());
        optionManager.addOption("index", "path", "index root", true);
        optionManager.addOption(OptionManager.OUTPUT_PATH, "path", OptionManager.OUTPUT_PATH, true);
        optionManager.addOption(OptionManager.QUERY_PATH, "path", "XML query", true);
        optionManager.addOption(OptionManager.SPAM_PATH, "path", "spam percentile scores", true);
        try {
            optionManager.parse(strArr);
            String optionValue = optionManager.getOptionValue("index");
            String optionValue2 = optionManager.getOptionValue(OptionManager.OUTPUT_PATH);
            String optionValue3 = optionManager.getOptionValue(OptionManager.SPAM_PATH);
            String optionValue4 = optionManager.getOptionValue(OptionManager.QUERY_PATH);
            FileSystem fileSystem = FileSystem.get(new Configuration());
            RetrievalEnvironment retrievalEnvironment = new RetrievalEnvironment(optionValue, fileSystem);
            retrievalEnvironment.initialize(true);
            DataOutput create = fileSystem.create(new Path(optionValue2));
            HMapIV<int[]> queryToIntegerCode = QueryUtility.queryToIntegerCode(retrievalEnvironment, QueryUtility.loadQueries(optionValue4));
            HashSet newHashSet = Sets.newHashSet();
            HMapII hMapII = new HMapII();
            SpamPercentileScore spamPercentileScore = new SpamPercentileScore();
            spamPercentileScore.initialize(optionValue3, fileSystem);
            int[] spamSortDocids = DocumentUtility.spamSortDocids(spamPercentileScore);
            Posting posting = new Posting();
            ArrayList newArrayList = Lists.newArrayList();
            HashMap newHashMap = Maps.newHashMap();
            Iterator it = queryToIntegerCode.keySet().iterator();
            while (it.hasNext()) {
                int intValue = ((Integer) it.next()).intValue();
                for (int i : (int[]) queryToIntegerCode.get(intValue)) {
                    if (!newHashSet.contains(Integer.valueOf(i))) {
                        newHashSet.add(Integer.valueOf(i));
                        PostingsList postingsList = retrievalEnvironment.getPostingsList(retrievalEnvironment.getTermFromId(i));
                        PostingsReader postingsReader = postingsList.getPostingsReader();
                        newArrayList.clear();
                        newHashMap.clear();
                        int[] iArr = new int[postingsList.getDf()];
                        int i2 = 0;
                        while (postingsReader.nextPosting(posting)) {
                            iArr[i2] = spamSortDocids[posting.getDocno()];
                            newHashMap.put(Integer.valueOf(iArr[i2]), new TermPositions(postingsReader.getPositions(), postingsReader.getTf()));
                            hMapII.put(iArr[i2], retrievalEnvironment.getDocumentLength(posting.getDocno()));
                            i2++;
                        }
                        Arrays.sort(iArr);
                        for (int i3 : iArr) {
                            newArrayList.add(newHashMap.get(Integer.valueOf(i3)));
                        }
                        create.writeInt(i);
                        create.writeInt(postingsList.getDf());
                        CompressedPositionalPostings.newInstance(iArr, newArrayList).write(create);
                    }
                }
                LOGGER.info("Compressed query " + intValue);
            }
            create.writeInt(-1);
            create.writeInt(hMapII.size());
            Iterator it2 = hMapII.keySet().iterator();
            while (it2.hasNext()) {
                int intValue2 = ((Integer) it2.next()).intValue();
                create.writeInt(intValue2);
                create.writeInt(hMapII.get(intValue2));
            }
            create.close();
        } catch (Exception e) {
        }
    }
}
