package gobblin.compaction.dataset;

import com.google.common.base.Preconditions;
import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import gobblin.compaction.mapreduce.MRCompactor;
import gobblin.configuration.State;
import gobblin.util.DatasetFilterUtils;
import gobblin.util.HadoopUtils;
import java.io.IOException;
import java.net.URI;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:gobblin/compaction/dataset/DatasetsFinder.class */
public abstract class DatasetsFinder implements gobblin.dataset.DatasetsFinder<Dataset> {
    private static final Logger log = LoggerFactory.getLogger(DatasetsFinder.class);
    public static final double HIGH_PRIORITY = 3.0d;
    public static final double NORMAL_PRIORITY = 2.0d;
    public static final double LOW_PRIORITY = 1.0d;
    public static final char DATASETS_WITH_DIFFERENT_RECOMPACT_THRESHOLDS_SEPARATOR = ';';
    public static final char DATASETS_WITH_SAME_RECOMPACT_THRESHOLDS_SEPARATOR = ',';
    public static final char DATASETS_AND_RECOMPACT_THRESHOLD_SEPARATOR = ':';
    protected final State state;
    protected final Configuration conf;
    protected final FileSystem fs = getFileSystem();
    protected final String inputDir = getInputDir();
    protected final String destDir = getDestDir();
    protected final String tmpOutputDir = getTmpOutputDir();
    protected final List<Pattern> blacklist = getBlacklist();
    protected final List<Pattern> whitelist = getWhitelist();
    protected final List<Pattern> highPriority = getHighPriorityPatterns();
    protected final List<Pattern> normalPriority = getNormalPriorityPatterns();
    protected final boolean recompactDatasets = getRecompactDatasets();

    public DatasetsFinder(State state) {
        this.state = state;
        this.conf = HadoopUtils.getConfFromState(state);
    }

    public abstract Set<Dataset> findDistinctDatasets() throws IOException;

    public List<Dataset> findDatasets() throws IOException {
        return Lists.newArrayList(findDistinctDatasets());
    }

    public Path commonDatasetRoot() {
        return new Path(this.destDir);
    }

    private String getInputDir() {
        Preconditions.checkArgument(this.state.contains(MRCompactor.COMPACTION_INPUT_DIR), "Missing required property compaction.input.dir");
        return this.state.getProp(MRCompactor.COMPACTION_INPUT_DIR);
    }

    private String getDestDir() {
        Preconditions.checkArgument(this.state.contains(MRCompactor.COMPACTION_DEST_DIR), "Missing required property compaction.dest.dir");
        return this.state.getProp(MRCompactor.COMPACTION_DEST_DIR);
    }

    private String getTmpOutputDir() {
        return this.state.getProp(MRCompactor.COMPACTION_TMP_DEST_DIR, MRCompactor.DEFAULT_COMPACTION_TMP_DEST_DIR);
    }

    private FileSystem getFileSystem() {
        try {
            return this.state.contains(MRCompactor.COMPACTION_FILE_SYSTEM_URI) ? FileSystem.get(URI.create(this.state.getProp(MRCompactor.COMPACTION_FILE_SYSTEM_URI)), this.conf) : FileSystem.get(this.conf);
        } catch (IOException e) {
            throw new RuntimeException("Failed to get filesystem for datasetsFinder.", e);
        }
    }

    private List<Pattern> getBlacklist() {
        return DatasetFilterUtils.getPatternsFromStrings(this.state.getPropAsList(MRCompactor.COMPACTION_BLACKLIST, ""));
    }

    private List<Pattern> getWhitelist() {
        return DatasetFilterUtils.getPatternsFromStrings(this.state.getPropAsList(MRCompactor.COMPACTION_WHITELIST, ""));
    }

    private List<Pattern> getHighPriorityPatterns() {
        return DatasetFilterUtils.getPatternsFromStrings(this.state.getPropAsList(MRCompactor.COMPACTION_HIGH_PRIORITY_TOPICS, ""));
    }

    private List<Pattern> getNormalPriorityPatterns() {
        return DatasetFilterUtils.getPatternsFromStrings(this.state.getPropAsList(MRCompactor.COMPACTION_NORMAL_PRIORITY_TOPICS, ""));
    }

    private boolean getRecompactDatasets() {
        return this.state.getPropAsBoolean(MRCompactor.COMPACTION_RECOMPACT_FROM_DEST_PATHS, false);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public double getDatasetPriority(String str) {
        double d = 1.0d;
        if (DatasetFilterUtils.stringInPatterns(str, this.highPriority)) {
            d = 3.0d;
        } else if (DatasetFilterUtils.stringInPatterns(str, this.normalPriority)) {
            d = 2.0d;
        }
        return d;
    }

    private Map<String, Double> getDatasetRegexAndRecompactThreshold(String str) {
        HashMap newHashMap = Maps.newHashMap();
        Iterator it = Splitter.on(';').trimResults().omitEmptyStrings().splitToList(str).iterator();
        while (it.hasNext()) {
            List splitToList = Splitter.on(':').trimResults().omitEmptyStrings().splitToList((String) it.next());
            if (splitToList.size() != 2) {
                log.error("Invalid form (DATASET_NAME:THRESHOLD) in compaction.latedata.threshold.for.recompact.per.topic.");
            } else {
                newHashMap.put(splitToList.get(0), Double.valueOf(Double.parseDouble((String) splitToList.get(1))));
            }
        }
        return newHashMap;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public double getDatasetRecompactThreshold(String str) {
        for (Map.Entry<String, Double> entry : getDatasetRegexAndRecompactThreshold(this.state.getProp(MRCompactor.COMPACTION_LATEDATA_THRESHOLD_FOR_RECOMPACT_PER_DATASET, "")).entrySet()) {
            if (DatasetFilterUtils.stringInPatterns(str, DatasetFilterUtils.getPatternsFromStrings(Splitter.on(',').trimResults().omitEmptyStrings().splitToList(entry.getKey())))) {
                return entry.getValue().doubleValue();
            }
        }
        return 1.0d;
    }
}
