package com.marklogic.contentpump;

import com.marklogic.contentpump.utilities.CSVParserFormatter;
import com.marklogic.contentpump.utilities.DelimitedSplit;
import com.marklogic.contentpump.utilities.EncodingUtil;
import com.marklogic.mapreduce.DocumentURIWithSourceInfo;
import com.marklogic.mapreduce.MarkLogicConstants;
import com.marklogic.mapreduce.utilities.TextArrayWritable;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DefaultStringifier;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

/* loaded from: input_file:com/marklogic/contentpump/DelimitedTextInputFormat.class */
public class DelimitedTextInputFormat extends FileAndDirectoryInputFormat<DocumentURIWithSourceInfo, Text> {
    public static final Log LOG = LogFactory.getLog(DelimitedTextInputFormat.class);

    public RecordReader<DocumentURIWithSourceInfo, Text> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
        return isSplitInput(taskAttemptContext.getConfiguration()) ? new SplitDelimitedTextReader() : new DelimitedTextReader();
    }

    private boolean isSplitInput(Configuration configuration) {
        return configuration.getBoolean(ConfigConstants.CONF_SPLIT_INPUT, false);
    }

    @Override // com.marklogic.contentpump.FileAndDirectoryInputFormat
    public List<InputSplit> getSplits(JobContext jobContext) throws IOException {
        boolean isSplitInput = isSplitInput(jobContext.getConfiguration());
        List<InputSplit> splits = super.getSplits(jobContext);
        if (!isSplitInput) {
            return splits;
        }
        if (splits.size() >= SPLIT_COUNT_LIMIT) {
            LOG.warn("Exceeding SPLIT_COUNT_LIMIT, input_split is off:" + SPLIT_COUNT_LIMIT);
            DefaultStringifier.store(jobContext.getConfiguration(), false, ConfigConstants.CONF_SPLIT_INPUT);
            return splits;
        }
        ArrayList arrayList = new ArrayList();
        LOG.info(splits.size() + " DelimitedSplits generated");
        Configuration configuration = jobContext.getConfiguration();
        char c = 0;
        ArrayList arrayList2 = new ArrayList();
        Iterator<InputSplit> it = splits.iterator();
        while (it.hasNext()) {
            FileSplit fileSplit = (InputSplit) it.next();
            Path path = fileSplit.getPath();
            FileSystem fileSystem = path.getFileSystem(configuration);
            if (fileSplit.getStart() == 0) {
                FSDataInputStream open = fileSystem.open(path);
                String str = configuration.get(ConfigConstants.CONF_DELIMITER, ConfigConstants.DEFAULT_DELIMITER);
                if (str.length() == 1) {
                    c = str.charAt(0);
                } else {
                    LOG.error("Incorrect delimitor: " + c + ". Expects single character.");
                }
                InputStreamReader inputStreamReader = new InputStreamReader((InputStream) open, configuration.get(MarkLogicConstants.OUTPUT_CONTENT_ENCODING, "UTF-8"));
                Iterator it2 = new CSVParser(inputStreamReader, CSVParserFormatter.getFormat(c, '\"', true, true)).iterator();
                if (it2.hasNext()) {
                    CSVRecord cSVRecord = (CSVRecord) it2.next();
                    Iterator it3 = cSVRecord.iterator();
                    int size = cSVRecord.size();
                    String[] strArr = new String[size];
                    for (int i = 0; i < size; i++) {
                        if (!it3.hasNext()) {
                            throw new IOException("Record size doesn't match the real size");
                        }
                        strArr[i] = (String) it3.next();
                    }
                    EncodingUtil.handleBOMUTF8(strArr, 0);
                    arrayList2.clear();
                    for (String str2 : strArr) {
                        arrayList2.add(new Text(str2));
                    }
                }
                inputStreamReader.close();
            }
            arrayList.add(new DelimitedSplit(new TextArrayWritable((Text[]) arrayList2.toArray(new Text[arrayList2.size()])), path, fileSplit.getStart(), fileSplit.getLength(), fileSplit.getLocations()));
        }
        return arrayList;
    }
}
