package com.datasalt.pangool.tuplemr.mapred.lib.input;

import com.datasalt.pangool.io.ITuple;
import com.datasalt.pangool.io.Schema;
import com.googlecode.jcsv.CSVStrategy;
import com.googlecode.jcsv.reader.CSVTokenizer;
import java.io.IOException;
import java.io.Serializable;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.LineReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/datasalt/pangool/tuplemr/mapred/lib/input/TupleTextInputFormat.class */
public class TupleTextInputFormat extends FileInputFormat<ITuple, NullWritable> implements Serializable {
    public static final char NO_QUOTE_CHARACTER = 0;
    public static final char NO_ESCAPE_CHARACTER = 0;
    public static final char NO_SEPARATOR_CHARACTER = 0;
    private final InputType type;
    private final Schema schema;
    private final boolean hasHeader;
    private final boolean strictQuotes;
    private final char separatorCharacter;
    private final char quoteCharacter;
    private final char escapeCharacter;
    private FieldSelector fieldSelector;
    private String nullString;
    private int[] fixedWidthFieldsPositions;
    private static final Logger log = LoggerFactory.getLogger(TupleTextInputFormat.class);
    public static final String NO_NULL_STRING = null;

    /* loaded from: input_file:com/datasalt/pangool/tuplemr/mapred/lib/input/TupleTextInputFormat$FieldSelector.class */
    public static class FieldSelector implements Serializable {
        private Integer[] fieldIndexesToSelect;
        public static final FieldSelector NONE = new FieldSelector(new Integer[0]);

        public FieldSelector(Integer... numArr) {
            this.fieldIndexesToSelect = numArr;
        }

        public int select(int i) {
            return this.fieldIndexesToSelect.length > 0 ? this.fieldIndexesToSelect[i].intValue() : i;
        }
    }

    /* loaded from: input_file:com/datasalt/pangool/tuplemr/mapred/lib/input/TupleTextInputFormat$InputType.class */
    private enum InputType {
        CSV,
        FIXED_WIDTH
    }

    /* loaded from: input_file:com/datasalt/pangool/tuplemr/mapred/lib/input/TupleTextInputFormat$TupleTextInputReader.class */
    public static class TupleTextInputReader extends RecordReader<ITuple, NullWritable> {
        private static final Log LOG = LogFactory.getLog(TupleTextInputReader.class);
        private CompressionCodecFactory compressionCodecs;
        private CSVTokenizer tokenizer;
        private CSVStrategy csvStrategy;
        private final InputType type;
        private final Character separator;
        private final Character quote;
        private final boolean hasHeader;
        private final FieldSelector fieldSelector;
        private Text line;
        private LineReader in;
        private int maxLineLength;
        private long start;
        private long end;
        private long position;
        private final Schema schema;
        private ITuple tuple;

        public TupleTextInputReader(Schema schema, boolean z, boolean z2, Character ch, Character ch2, Character ch3, FieldSelector fieldSelector, String str) {
            this.compressionCodecs = null;
            this.start = 0L;
            this.end = 2147483647L;
            this.position = 0L;
            this.type = InputType.CSV;
            this.separator = ch;
            this.quote = ch2;
            this.schema = schema;
            this.hasHeader = z;
            this.fieldSelector = fieldSelector;
            this.tokenizer = new NullableCSVTokenizer(ch3.charValue(), z2, str);
        }

        public TupleTextInputReader(Schema schema, int[] iArr, boolean z, String str) {
            this.compressionCodecs = null;
            this.start = 0L;
            this.end = 2147483647L;
            this.position = 0L;
            this.type = InputType.FIXED_WIDTH;
            this.separator = (char) 0;
            this.quote = (char) 0;
            this.schema = schema;
            this.hasHeader = z;
            this.fieldSelector = null;
            this.tokenizer = new FixedWidthCSVTokenizer(iArr, str);
        }

        public void close() throws IOException {
            if (this.in != null) {
                this.in.close();
            }
        }

        /* renamed from: getCurrentKey, reason: merged with bridge method [inline-methods] */
        public ITuple m40getCurrentKey() throws IOException, InterruptedException {
            return this.tuple;
        }

        /* renamed from: getCurrentValue, reason: merged with bridge method [inline-methods] */
        public NullWritable m39getCurrentValue() throws IOException, InterruptedException {
            return NullWritable.get();
        }

        public float getProgress() throws IOException, InterruptedException {
            if (this.end == this.start) {
                return 0.0f;
            }
            return Math.min(1.0f, ((float) (this.position - this.start)) / ((float) (this.end - this.start)));
        }

        public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException {
            org.apache.hadoop.mapreduce.lib.input.FileSplit fileSplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) inputSplit;
            Configuration configuration = taskAttemptContext.getConfiguration();
            this.maxLineLength = configuration.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
            this.csvStrategy = new CSVStrategy(this.separator.charValue(), this.quote.charValue(), '#', this.hasHeader && this.start == 0, true);
            boolean z = this.hasHeader && this.start == 0;
            this.start = fileSplit.getStart();
            this.end = this.start + fileSplit.getLength();
            Path path = fileSplit.getPath();
            TupleTextInputFormat.log.info("Initializing input split from path: " + path + " , start: " + this.start + ", end: " + this.end);
            this.compressionCodecs = new CompressionCodecFactory(configuration);
            CompressionCodec codec = this.compressionCodecs.getCodec(path);
            FSDataInputStream open = path.getFileSystem(configuration).open(fileSplit.getPath());
            if (codec != null) {
                this.in = new LineReader(codec.createInputStream(open), configuration);
                this.end = Long.MAX_VALUE;
            } else {
                if (this.start != 0) {
                    z = true;
                    this.start--;
                    open.seek(this.start);
                }
                this.in = new LineReader(open, configuration);
            }
            if (z) {
                this.start += this.in.readLine(new Text(), 0, (int) Math.min(2147483647L, this.end - this.start));
            }
            this.position = this.start;
        }

        /* JADX WARN: Failed to find 'out' block for switch in B:25:0x00d9. Please report as an issue. */
        /* JADX WARN: Removed duplicated region for block: B:55:0x0233  */
        /* JADX WARN: Removed duplicated region for block: B:57:0x023f A[RETURN] */
        /*
            Code decompiled incorrectly, please refer to instructions dump.
            To view partially-correct add '--show-bad-code' argument
        */
        public boolean nextKeyValue() throws java.io.IOException {
            /*
                Method dump skipped, instructions count: 577
                To view this dump add '--comments-level debug' option
            */
            throw new UnsupportedOperationException("Method not decompiled: com.datasalt.pangool.tuplemr.mapred.lib.input.TupleTextInputFormat.TupleTextInputReader.nextKeyValue():boolean");
        }

        private static void processNumber(Schema.Field.Type type, ITuple iTuple, int i, String str) {
            String adaptNumber = TupleTextInputFormat.adaptNumber(str.trim());
            if ("".equals(adaptNumber)) {
                iTuple.set(i, (Object) null);
                return;
            }
            try {
                switch (type) {
                    case INT:
                        iTuple.set(i, Integer.valueOf(Integer.parseInt(adaptNumber)));
                        break;
                    case LONG:
                        iTuple.set(i, Long.valueOf(Long.parseLong(adaptNumber)));
                        break;
                    case FLOAT:
                        iTuple.set(i, Float.valueOf(Float.parseFloat(adaptNumber)));
                        break;
                    case DOUBLE:
                        iTuple.set(i, Double.valueOf(Double.parseDouble(adaptNumber)));
                        break;
                    default:
                        throw new RuntimeException("Imposible case. You found a bug!");
                }
            } catch (NumberFormatException e) {
                LOG.warn("Invalid number [" + adaptNumber + "]. Using null.");
                iTuple.set(i, (Object) null);
            }
        }
    }

    protected boolean isSplitable(JobContext jobContext, Path path) {
        return new CompressionCodecFactory(jobContext.getConfiguration()).getCodec(path) == null;
    }

    public TupleTextInputFormat(Schema schema, boolean z, boolean z2, Character ch, Character ch2, Character ch3, FieldSelector fieldSelector, String str) {
        this.fieldSelector = null;
        this.fixedWidthFieldsPositions = null;
        this.type = InputType.CSV;
        checkSchema(schema);
        this.schema = schema;
        this.strictQuotes = z2;
        this.hasHeader = z;
        this.separatorCharacter = ch.charValue();
        this.quoteCharacter = ch2.charValue();
        this.escapeCharacter = ch3.charValue();
        this.fieldSelector = fieldSelector;
        this.nullString = str;
    }

    public TupleTextInputFormat(Schema schema, int[] iArr, boolean z, String str) {
        this.fieldSelector = null;
        this.fixedWidthFieldsPositions = null;
        this.type = InputType.FIXED_WIDTH;
        checkSchema(schema);
        this.schema = schema;
        this.strictQuotes = false;
        this.hasHeader = z;
        this.separatorCharacter = (char) 0;
        this.quoteCharacter = (char) 0;
        this.escapeCharacter = (char) 0;
        this.nullString = str;
        checkFixedWithFields(schema, iArr);
        this.fixedWidthFieldsPositions = iArr;
    }

    private void checkFixedWithFields(Schema schema, int[] iArr) {
        if (schema.getFields().size() * 2 != iArr.length) {
            throw new IllegalArgumentException("Array with field positions for fixed width fields of incorrect size [" + iArr.length + "]. Expected size [" + (schema.getFields().size() * 2) + "] for the schema [" + schema + "] of size [" + schema.getFields().size() + "]");
        }
        for (int i = 0; i < iArr.length; i += 2) {
            int i2 = iArr[i];
            int i3 = iArr[i + 1];
            if (i3 < i2) {
                throw new IllegalArgumentException("Incorrect field range [" + i2 + "," + i3 + "]. max position cannot be smaller than min position.");
            }
        }
    }

    private void checkSchema(Schema schema) {
        for (Schema.Field field : schema.getFields()) {
            if (field.getType().equals(Schema.Field.Type.OBJECT) || field.getType().equals(Schema.Field.Type.BYTES)) {
                throw new IllegalArgumentException(getClass().getName() + " doesn't support Pangool types " + Schema.Field.Type.OBJECT + " or " + Schema.Field.Type.BYTES);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static String adaptNumber(String str) {
        String trim = str.trim();
        return trim.startsWith("+") ? trim.substring(1) : trim;
    }

    public Schema getSchema() {
        return this.schema;
    }

    public boolean isHasHeader() {
        return this.hasHeader;
    }

    public char getSeparatorCharacter() {
        return this.separatorCharacter;
    }

    public char getQuoteCharacter() {
        return this.quoteCharacter;
    }

    public char getEscapeCharacter() {
        return this.escapeCharacter;
    }

    public InputType getType() {
        return this.type;
    }

    public boolean isStrictQuotes() {
        return this.strictQuotes;
    }

    public String getNullString() {
        return this.nullString;
    }

    public int[] getFixedWidthFieldsPositions() {
        return this.fixedWidthFieldsPositions;
    }

    public RecordReader<ITuple, NullWritable> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
        return this.type == InputType.CSV ? new TupleTextInputReader(this.schema, this.hasHeader, this.strictQuotes, Character.valueOf(this.separatorCharacter), Character.valueOf(this.quoteCharacter), Character.valueOf(this.escapeCharacter), this.fieldSelector, this.nullString) : new TupleTextInputReader(this.schema, this.fixedWidthFieldsPositions, this.hasHeader, this.nullString);
    }
}
