package ivory.core.index;

import edu.umd.cloud9.io.pair.PairOfIntLong;
import edu.umd.cloud9.util.PowerTool;
import ivory.core.Constants;
import ivory.core.RetrievalEnvironment;
import ivory.core.data.dictionary.PrefixEncodedLexicographicallySortedDictionary;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.mapred.lib.NLineInputFormat;
import org.apache.hadoop.mapred.lib.NullOutputFormat;
import org.apache.log4j.Logger;

/* loaded from: input_file:ivory/core/index/MergeGlobalStatsAcrossIndexSegments.class */
public class MergeGlobalStatsAcrossIndexSegments extends PowerTool {
    private static final Logger LOG = Logger.getLogger(MergeGlobalStatsAcrossIndexSegments.class);
    public static final String[] RequiredParameters = {Constants.CollectionName, "Ivory.IndexPaths", "Ivory.DfThreshold", "Ivory.DataOutputPath"};

    /* loaded from: input_file:ivory/core/index/MergeGlobalStatsAcrossIndexSegments$MyMapper.class */
    private static class MyMapper extends MapReduceBase implements Mapper<LongWritable, Text, Text, PairOfIntLong> {
        private static final PairOfIntLong stats = new PairOfIntLong();
        private static final Text sTerm = new Text();
        private int mDfThreshold;

        private MyMapper() {
        }

        public void configure(JobConf jobConf) {
            this.mDfThreshold = jobConf.getInt("Ivory.DfThreshold", 0);
        }

        public void map(LongWritable longWritable, Text text, OutputCollector<Text, PairOfIntLong> outputCollector, Reporter reporter) throws IOException {
            FileSystem fileSystem = FileSystem.get(new Configuration());
            MergeGlobalStatsAcrossIndexSegments.LOG.info(text);
            RetrievalEnvironment retrievalEnvironment = new RetrievalEnvironment(text.toString(), fileSystem);
            Path path = new Path(retrievalEnvironment.getIndexTermsData());
            Path path2 = new Path(retrievalEnvironment.getDfByTermData());
            Path path3 = new Path(retrievalEnvironment.getCfByTermData());
            FSDataInputStream open = fileSystem.open(path);
            FSDataInputStream open2 = fileSystem.open(path2);
            FSDataInputStream open3 = fileSystem.open(path3);
            open2.readInt();
            open3.readInt();
            int readInt = open.readInt();
            String str = "";
            int readInt2 = open.readInt();
            for (int i = 0; i < readInt; i++) {
                if (i % readInt2 != 0) {
                    int readByte = open.readByte();
                    if (readByte < 0) {
                        readByte += 256;
                    }
                    byte[] bArr = new byte[readByte];
                    int readByte2 = open.readByte();
                    if (readByte2 < 0) {
                        readByte2 += 256;
                    }
                    for (int i2 = 0; i2 < bArr.length; i2++) {
                        bArr[i2] = open.readByte();
                    }
                    String str2 = String.valueOf(str.substring(0, readByte2)) + new String(bArr);
                    str = str2;
                    sTerm.set(str2);
                } else {
                    int readByte3 = open.readByte();
                    if (readByte3 < 0) {
                        readByte3 += 256;
                    }
                    byte[] bArr2 = new byte[readByte3];
                    for (int i3 = 0; i3 < bArr2.length; i3++) {
                        bArr2[i3] = open.readByte();
                    }
                    String str3 = new String(bArr2);
                    str = str3;
                    sTerm.set(str3);
                }
                int readVInt = WritableUtils.readVInt(open2);
                long readVInt2 = WritableUtils.readVInt(open3);
                if (readVInt > this.mDfThreshold) {
                    stats.set(readVInt, readVInt2);
                    outputCollector.collect(sTerm, stats);
                }
            }
            open.close();
            open2.close();
            open3.close();
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((LongWritable) obj, (Text) obj2, (OutputCollector<Text, PairOfIntLong>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:ivory/core/index/MergeGlobalStatsAcrossIndexSegments$MyReducer.class */
    private static class MyReducer extends MapReduceBase implements Reducer<Text, PairOfIntLong, Text, Text> {
        String termsFile;
        String dfStatsFile;
        String cfStatsFile;
        int nTerms;
        int window;
        FileSystem fileSys;
        FSDataOutputStream termsOut;
        FSDataOutputStream dfStatsOut;
        FSDataOutputStream cfStatsOut;
        int curKeyIndex = 0;
        String lastKey = "";

        private MyReducer() {
        }

        public void close() throws IOException {
            super.close();
            this.termsOut.close();
            this.dfStatsOut.close();
            this.cfStatsOut.close();
        }

        public void configure(JobConf jobConf) {
            try {
                this.fileSys = FileSystem.get(jobConf);
                String str = jobConf.get("Ivory.DataOutputPath");
                this.termsFile = String.valueOf(str) + "/dict.terms";
                this.dfStatsFile = String.valueOf(str) + "/dict.df";
                this.cfStatsFile = String.valueOf(str) + "/dict.cf";
                this.nTerms = jobConf.getInt("Ivory.IndexNumberOfTerms", 0);
                this.window = 8;
                MergeGlobalStatsAcrossIndexSegments.LOG.info("Ivory.PrefixEncodedTermsFile: " + this.termsFile);
                MergeGlobalStatsAcrossIndexSegments.LOG.info("Ivory.DFStatsFile: " + this.dfStatsFile);
                MergeGlobalStatsAcrossIndexSegments.LOG.info("Ivory.CFStatsFile: " + this.cfStatsFile);
                MergeGlobalStatsAcrossIndexSegments.LOG.info("Ivory.IndexNumberOfTerms: " + this.nTerms);
                MergeGlobalStatsAcrossIndexSegments.LOG.info("Ivory.ForwardIndexWindow: " + this.window);
                try {
                    this.termsOut = this.fileSys.create(new Path(this.termsFile), true);
                    this.dfStatsOut = this.fileSys.create(new Path(this.dfStatsFile), true);
                    this.cfStatsOut = this.fileSys.create(new Path(this.cfStatsFile), true);
                    this.termsOut.writeInt(this.nTerms);
                    this.termsOut.writeInt(this.window);
                    this.dfStatsOut.writeInt(this.nTerms);
                    this.cfStatsOut.writeInt(this.nTerms);
                } catch (Exception e) {
                    throw new RuntimeException("error in creating files");
                }
            } catch (Exception e2) {
                throw new RuntimeException("error in fileSys");
            }
        }

        public void reduce(Text text, Iterator<PairOfIntLong> it, OutputCollector<Text, Text> outputCollector, Reporter reporter) throws IOException {
            long j;
            String text2 = text.toString();
            int i = 0;
            long j2 = 0;
            while (true) {
                j = j2;
                if (!it.hasNext()) {
                    break;
                }
                PairOfIntLong next = it.next();
                i += next.getLeftElement();
                j2 = j + next.getRightElement();
            }
            MergeGlobalStatsAcrossIndexSegments.LOG.info(text + " " + i + " " + j);
            if (this.curKeyIndex % this.window == 0) {
                byte[] bytes = text2.getBytes();
                this.termsOut.writeByte((byte) bytes.length);
                for (byte b : bytes) {
                    this.termsOut.writeByte(b);
                }
            } else {
                int prefix = PrefixEncodedLexicographicallySortedDictionary.getPrefix(this.lastKey, text2);
                byte[] bytes2 = text2.substring(prefix).getBytes();
                if (prefix > 127 || bytes2.length > 127) {
                    throw new RuntimeException("prefix/suffix length overflow");
                }
                this.termsOut.writeByte((byte) bytes2.length);
                this.termsOut.writeByte((byte) prefix);
                for (byte b2 : bytes2) {
                    this.termsOut.writeByte(b2);
                }
            }
            this.lastKey = text2;
            this.curKeyIndex++;
            WritableUtils.writeVInt(this.dfStatsOut, i);
            WritableUtils.writeVLong(this.cfStatsOut, j);
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterator it, OutputCollector outputCollector, Reporter reporter) throws IOException {
            reduce((Text) obj, (Iterator<PairOfIntLong>) it, (OutputCollector<Text, Text>) outputCollector, reporter);
        }
    }

    public String[] getRequiredParameters() {
        return RequiredParameters;
    }

    public MergeGlobalStatsAcrossIndexSegments(Configuration configuration) {
        super(configuration);
    }

    public int runTool() throws Exception {
        JobConf jobConf = new JobConf(getConf(), MergeGlobalStatsAcrossIndexSegments.class);
        FileSystem fileSystem = FileSystem.get(jobConf);
        String str = jobConf.get(Constants.CollectionName);
        String str2 = jobConf.get("Ivory.IndexPaths");
        String str3 = jobConf.get("Ivory.DataOutputPath");
        jobConf.getInt("Ivory.DfThreshold", 0);
        Path path = new Path("/tmp/index-paths.txt");
        FSDataOutputStream create = fileSystem.create(path, true);
        for (String str4 : str2.split(",")) {
            create.write(new String(String.valueOf(str4) + "\n").getBytes());
        }
        create.close();
        LOG.info("Job: ComputeNumberOfTermsAcrossIndexSegments");
        jobConf.setJobName("ComputeNumberOfTermsAcrossIndexSegments:" + str);
        FileInputFormat.addInputPath(jobConf, path);
        jobConf.setNumMapTasks(1);
        jobConf.setNumReduceTasks(1);
        jobConf.set("mapred.child.java.opts", "-Xmx2048m");
        jobConf.setInputFormat(NLineInputFormat.class);
        jobConf.setOutputKeyClass(Text.class);
        jobConf.setOutputValueClass(PairOfIntLong.class);
        jobConf.setOutputFormat(NullOutputFormat.class);
        jobConf.setMapperClass(MyMapper.class);
        jobConf.setReducerClass(IdentityReducer.class);
        long currentTimeMillis = System.currentTimeMillis();
        RunningJob runJob = JobClient.runJob(jobConf);
        LOG.info("Job Finished in " + ((System.currentTimeMillis() - currentTimeMillis) / 1000.0d) + " seconds");
        LOG.info("total number of terms in global dictionary = " + runJob.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", 6, "REDUCE_INPUT_GROUPS").getCounter());
        fileSystem.delete(new Path(str3), true);
        JobConf jobConf2 = new JobConf(getConf(), MergeGlobalStatsAcrossIndexSegments.class);
        LOG.info("Job: MergeGlobalStatsAcrossIndexSegments");
        jobConf2.setJobName("MergeGlobalStatsAcrossIndexSegments:" + str);
        FileInputFormat.addInputPath(jobConf2, path);
        jobConf2.setNumMapTasks(1);
        jobConf2.setNumReduceTasks(1);
        jobConf2.set("mapred.child.java.opts", "-Xmx2048m");
        jobConf2.setInputFormat(NLineInputFormat.class);
        jobConf2.setOutputKeyClass(Text.class);
        jobConf2.setOutputValueClass(PairOfIntLong.class);
        jobConf2.setOutputFormat(NullOutputFormat.class);
        jobConf2.setMapperClass(MyMapper.class);
        jobConf2.setReducerClass(MyReducer.class);
        jobConf2.setLong("Ivory.IndexNumberOfTerms", (int) r0);
        long currentTimeMillis2 = System.currentTimeMillis();
        JobClient.runJob(jobConf2);
        LOG.info("Job Finished in " + ((System.currentTimeMillis() - currentTimeMillis2) / 1000.0d) + " seconds");
        long j = 0;
        int i = 0;
        for (String str5 : str2.split(",")) {
            LOG.info("reading stats for " + str5);
            RetrievalEnvironment retrievalEnvironment = new RetrievalEnvironment(str5, fileSystem);
            long readCollectionLength = retrievalEnvironment.readCollectionLength();
            int readCollectionDocumentCount = retrievalEnvironment.readCollectionDocumentCount();
            LOG.info(" - CollectionLength: " + readCollectionLength);
            LOG.info(" - CollectionDocumentCount: " + readCollectionDocumentCount);
            j += readCollectionLength;
            i += readCollectionDocumentCount;
        }
        float f = ((float) j) / i;
        LOG.info("all index segments: ");
        LOG.info(" - CollectionLength: " + j);
        LOG.info(" - CollectionDocumentCount: " + i);
        LOG.info(" - AverageDocumentLenght: " + f);
        RetrievalEnvironment retrievalEnvironment2 = new RetrievalEnvironment(str3, fileSystem);
        retrievalEnvironment2.writeCollectionAverageDocumentLength(f);
        retrievalEnvironment2.writeCollectionLength(j);
        retrievalEnvironment2.writeCollectionDocumentCount(i);
        return 0;
    }

    public static void main(String[] strArr) throws Exception {
        Configuration configuration = new Configuration();
        if (strArr.length < 4) {
            System.err.println("Usage: [collection-name] [output-path] [df-threshold] [index1] [index2] ...");
            System.exit(-1);
        }
        String str = strArr[0];
        String str2 = strArr[1];
        int parseInt = Integer.parseInt(strArr[2]);
        LOG.info("Merging global statistics across index segments...");
        LOG.info(" CollectionName: " + str);
        LOG.info(" OutputPath: " + str2);
        LOG.info(" DfThreshold: " + parseInt);
        LOG.info(" IndexPaths: ");
        StringBuffer stringBuffer = new StringBuffer();
        for (int i = 3; i < strArr.length; i++) {
            LOG.info("    Adding" + strArr[i]);
            stringBuffer.append(strArr[i]);
            if (i != strArr.length - 1) {
                stringBuffer.append(",");
            }
        }
        configuration.set(Constants.CollectionName, str);
        configuration.set("Ivory.IndexPaths", stringBuffer.toString());
        configuration.set("Ivory.DataOutputPath", str2);
        configuration.setInt("Ivory.DfThreshold", parseInt);
        new MergeGlobalStatsAcrossIndexSegments(configuration).run();
    }
}
