package edu.umd.hooka;

import edu.umd.hooka.CorpusInfo;
import edu.umd.hooka.alignment.IndexedFloatArray;
import java.io.IOException;
import java.rmi.UnexpectedException;
import java.util.Iterator;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;

/* loaded from: input_file:edu/umd/hooka/PhraseExtractAndCount.class */
public class PhraseExtractAndCount {

    /* loaded from: input_file:edu/umd/hooka/PhraseExtractAndCount$MapClass1.class */
    public static class MapClass1 extends MapReduceBase implements Mapper<IntWritable, PhrasePair, PhrasePair, IntWritable> {
        private static final IntWritable one = new IntWritable(1);

        public void map(IntWritable intWritable, PhrasePair phrasePair, OutputCollector<PhrasePair, IntWritable> outputCollector, Reporter reporter) throws IOException {
            Iterator<PhrasePair> it = phrasePair.extractConsistentPhrasePairs(7).iterator();
            while (it.hasNext()) {
                outputCollector.collect(it.next(), one);
            }
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((IntWritable) obj, (PhrasePair) obj2, (OutputCollector<PhrasePair, IntWritable>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:edu/umd/hooka/PhraseExtractAndCount$MapClass2.class */
    public static class MapClass2 extends MapReduceBase implements Mapper<PhrasePair, IntWritable, PhrasePair, IntWritable> {
        private static final Phrase empty = new Phrase();

        MapClass2() {
            empty.setLanguage(0);
        }

        public void map(PhrasePair phrasePair, IntWritable intWritable, OutputCollector<PhrasePair, IntWritable> outputCollector, Reporter reporter) throws IOException {
            PhrasePair phrasePair2 = new PhrasePair(phrasePair.getF(), phrasePair.getE());
            Phrase e = phrasePair2.getE();
            phrasePair2.setAlignment(null);
            phrasePair2.setE(empty);
            outputCollector.collect(phrasePair2, intWritable);
            phrasePair2.setF(e);
            outputCollector.collect(phrasePair2, intWritable);
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((PhrasePair) obj, (IntWritable) obj2, (OutputCollector<PhrasePair, IntWritable>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:edu/umd/hooka/PhraseExtractAndCount$MapClass3.class */
    public static class MapClass3 extends MapReduceBase implements Mapper<PhrasePair, IntWritable, PhrasePair, IntWritable> {
        public void map(PhrasePair phrasePair, IntWritable intWritable, OutputCollector<PhrasePair, IntWritable> outputCollector, Reporter reporter) throws IOException {
            if (phrasePair.getE().size() == 0) {
                outputCollector.collect(phrasePair, intWritable);
            } else {
                outputCollector.collect(phrasePair, intWritable);
                outputCollector.collect(phrasePair.getTranspose(), intWritable);
            }
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((PhrasePair) obj, (IntWritable) obj2, (OutputCollector<PhrasePair, IntWritable>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:edu/umd/hooka/PhraseExtractAndCount$MapClass4.class */
    public static class MapClass4 extends MapReduceBase implements Mapper<PhrasePair, FloatWritable, PhrasePair, IndexedFloatArray> {
        IndexedFloatArray scores = new IndexedFloatArray(2);

        public void map(PhrasePair phrasePair, FloatWritable floatWritable, OutputCollector<PhrasePair, IndexedFloatArray> outputCollector, Reporter reporter) throws IOException {
            float f = floatWritable.get();
            if (phrasePair.getF().getLanguage() != 0) {
                this.scores.set(0, f);
                this.scores.set(1, 0.0f);
                outputCollector.collect(phrasePair, this.scores);
            } else {
                PhrasePair transpose = phrasePair.getTranspose();
                this.scores.set(0, 0.0f);
                this.scores.set(1, f);
                outputCollector.collect(transpose, this.scores);
            }
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((PhrasePair) obj, (FloatWritable) obj2, (OutputCollector<PhrasePair, IndexedFloatArray>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:edu/umd/hooka/PhraseExtractAndCount$Reduce.class */
    public static class Reduce extends MapReduceBase implements Reducer<PhrasePair, IntWritable, PhrasePair, IntWritable> {
        IntWritable res = new IntWritable();

        public void reduce(PhrasePair phrasePair, Iterator<IntWritable> it, OutputCollector<PhrasePair, IntWritable> outputCollector, Reporter reporter) throws IOException {
            int i = 0;
            while (true) {
                int i2 = i;
                if (!it.hasNext()) {
                    this.res.set(i2);
                    outputCollector.collect(phrasePair, this.res);
                    return;
                }
                i = i2 + it.next().get();
            }
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterator it, OutputCollector outputCollector, Reporter reporter) throws IOException {
            reduce((PhrasePair) obj, (Iterator<IntWritable>) it, (OutputCollector<PhrasePair, IntWritable>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:edu/umd/hooka/PhraseExtractAndCount$ReducePT.class */
    public static class ReducePT extends MapReduceBase implements Reducer<PhrasePair, IntWritable, PhrasePair, FloatWritable> {
        float marginal = 0.0f;
        int need_to_cover = 0;
        FloatWritable prob = new FloatWritable(0.0f);

        public void reduce(PhrasePair phrasePair, Iterator<IntWritable> it, OutputCollector<PhrasePair, FloatWritable> outputCollector, Reporter reporter) throws IOException {
            if (!it.hasNext()) {
                throw new UnexpectedException("no values for " + phrasePair);
            }
            int i = it.next().get();
            if (this.need_to_cover == 0) {
                if (phrasePair.getE().size() != 0) {
                    throw new UnexpectedException("Expected empty e-side: " + phrasePair);
                }
                this.need_to_cover = i;
                if (i < 1) {
                    throw new UnexpectedException("Bad count: " + i);
                }
                this.marginal = i;
                return;
            }
            if (phrasePair.getE().size() == 0) {
                throw new UnexpectedException("unaccounted for counts: " + this.need_to_cover + " key=" + phrasePair);
            }
            this.prob.set(i / this.marginal);
            outputCollector.collect(phrasePair, this.prob);
            this.need_to_cover -= i;
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterator it, OutputCollector outputCollector, Reporter reporter) throws IOException {
            reduce((PhrasePair) obj, (Iterator<IntWritable>) it, (OutputCollector<PhrasePair, FloatWritable>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:edu/umd/hooka/PhraseExtractAndCount$ReduceSumScores.class */
    public static class ReduceSumScores extends MapReduceBase implements Reducer<PhrasePair, IndexedFloatArray, PhrasePair, IndexedFloatArray> {
        IndexedFloatArray scores = new IndexedFloatArray(2);

        public void reduce(PhrasePair phrasePair, Iterator<IndexedFloatArray> it, OutputCollector<PhrasePair, IndexedFloatArray> outputCollector, Reporter reporter) throws IOException {
            this.scores.clear();
            while (it.hasNext()) {
                this.scores.plusEquals(it.next());
            }
            outputCollector.collect(phrasePair, this.scores);
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterator it, OutputCollector outputCollector, Reporter reporter) throws IOException {
            reduce((PhrasePair) obj, (Iterator<IndexedFloatArray>) it, (OutputCollector<PhrasePair, IndexedFloatArray>) outputCollector, reporter);
        }
    }

    public static void main(String[] strArr) throws IOException {
        CorpusInfo corpus = CorpusInfo.getCorpus(CorpusInfo.Corpus.ARABIC_500k);
        Path path = new Path("ppc.paircount.tmp");
        Path path2 = new Path("ppc.marginals.tmp");
        Path path3 = new Path("ppc.ptable.tmp");
        JobConf jobConf = new JobConf(PhraseExtractAndCount.class);
        FileSystem fileSystem = FileSystem.get(jobConf);
        fileSystem.delete(path);
        fileSystem.delete(path2);
        fileSystem.delete(path3);
        fileSystem.delete(corpus.getLocalPhraseTable());
        jobConf.setJobName("PhraseExtractAndCount");
        jobConf.setOutputKeyClass(PhrasePair.class);
        jobConf.setOutputValueClass(IntWritable.class);
        jobConf.setMapperClass(MapClass1.class);
        jobConf.setCombinerClass(Reduce.class);
        jobConf.setReducerClass(Reduce.class);
        jobConf.setNumMapTasks(38);
        jobConf.setNumReduceTasks(38);
        FileInputFormat.setInputPaths(jobConf, new Path[]{corpus.getAlignedBitext()});
        FileOutputFormat.setOutputPath(jobConf, path);
        jobConf.setInputFormat(SequenceFileInputFormat.class);
        jobConf.setOutputFormat(SequenceFileOutputFormat.class);
        JobClient.runJob(jobConf);
        JobConf jobConf2 = new JobConf(PhraseExtractAndCount.class);
        jobConf2.setJobName("PhraseExtractAndCount_marginals");
        jobConf2.setInputFormat(SequenceFileInputFormat.class);
        jobConf2.setOutputKeyClass(PhrasePair.class);
        jobConf2.setOutputValueClass(IntWritable.class);
        jobConf2.setMapperClass(MapClass2.class);
        jobConf2.setCombinerClass(Reduce.class);
        jobConf2.setReducerClass(Reduce.class);
        jobConf2.setNumMapTasks(38);
        jobConf2.setNumReduceTasks(38);
        FileInputFormat.setInputPaths(jobConf2, new Path[]{path});
        FileOutputFormat.setOutputPath(jobConf2, path2);
        jobConf2.setOutputFormat(SequenceFileOutputFormat.class);
        JobClient.runJob(jobConf2);
        JobConf jobConf3 = new JobConf(PhraseExtractAndCount.class);
        jobConf3.setJobName("PhraseExtractAndCount_ptscore");
        jobConf3.setInputFormat(SequenceFileInputFormat.class);
        jobConf3.setOutputFormat(SequenceFileOutputFormat.class);
        jobConf3.setOutputKeyClass(PhrasePair.class);
        jobConf3.setMapOutputValueClass(IntWritable.class);
        jobConf3.setOutputValueClass(FloatWritable.class);
        jobConf3.setMapperClass(MapClass3.class);
        jobConf3.setReducerClass(ReducePT.class);
        jobConf3.setNumMapTasks(38);
        jobConf3.setNumReduceTasks(38);
        FileInputFormat.addInputPath(jobConf3, path);
        FileInputFormat.addInputPath(jobConf3, path2);
        FileOutputFormat.setOutputPath(jobConf3, path3);
        JobClient.runJob(jobConf3);
        JobConf jobConf4 = new JobConf(PhraseExtractAndCount.class);
        jobConf4.setJobName("PhraseExtractAndCount_ptcombine");
        jobConf4.setInputFormat(SequenceFileInputFormat.class);
        jobConf4.setOutputKeyClass(PhrasePair.class);
        jobConf4.setOutputValueClass(IndexedFloatArray.class);
        jobConf4.setMapperClass(MapClass4.class);
        jobConf4.setReducerClass(ReduceSumScores.class);
        jobConf4.setNumMapTasks(38);
        jobConf4.setNumReduceTasks(38);
        FileInputFormat.setInputPaths(jobConf4, new Path[]{path3});
        FileOutputFormat.setOutputPath(jobConf4, corpus.getLocalPhraseTable());
        JobClient.runJob(jobConf4);
    }
}
