package org.apache.pinot.tools.anonymizer;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Stopwatch;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import org.apache.avro.SchemaBuilder;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.pinot.pql.parsers.Pql2Compiler;
import org.apache.pinot.pql.parsers.pql2.ast.AstNode;
import org.apache.pinot.pql.parsers.pql2.ast.BetweenPredicateAstNode;
import org.apache.pinot.pql.parsers.pql2.ast.BooleanOperatorAstNode;
import org.apache.pinot.pql.parsers.pql2.ast.ComparisonPredicateAstNode;
import org.apache.pinot.pql.parsers.pql2.ast.FunctionCallAstNode;
import org.apache.pinot.pql.parsers.pql2.ast.GroupByAstNode;
import org.apache.pinot.pql.parsers.pql2.ast.IdentifierAstNode;
import org.apache.pinot.pql.parsers.pql2.ast.InPredicateAstNode;
import org.apache.pinot.pql.parsers.pql2.ast.LiteralAstNode;
import org.apache.pinot.pql.parsers.pql2.ast.OrderByAstNode;
import org.apache.pinot.pql.parsers.pql2.ast.OrderByExpressionAstNode;
import org.apache.pinot.pql.parsers.pql2.ast.OutputColumnAstNode;
import org.apache.pinot.pql.parsers.pql2.ast.OutputColumnListAstNode;
import org.apache.pinot.pql.parsers.pql2.ast.PredicateAstNode;
import org.apache.pinot.pql.parsers.pql2.ast.PredicateListAstNode;
import org.apache.pinot.pql.parsers.pql2.ast.PredicateParenthesisGroupAstNode;
import org.apache.pinot.pql.parsers.pql2.ast.SelectAstNode;
import org.apache.pinot.pql.parsers.pql2.ast.StarColumnListAstNode;
import org.apache.pinot.pql.parsers.pql2.ast.StarExpressionAstNode;
import org.apache.pinot.pql.parsers.pql2.ast.StringLiteralAstNode;
import org.apache.pinot.pql.parsers.pql2.ast.WhereAstNode;
import org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader;
import org.apache.pinot.segment.local.segment.readers.PinotSegmentRecordReader;
import org.apache.pinot.segment.spi.ColumnMetadata;
import org.apache.pinot.segment.spi.ImmutableSegment;
import org.apache.pinot.segment.spi.SegmentMetadata;
import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl;
import org.apache.pinot.segment.spi.index.reader.Dictionary;
import org.apache.pinot.spi.data.DateTimeFieldSpec;
import org.apache.pinot.spi.data.DimensionFieldSpec;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.MetricFieldSpec;
import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.spi.data.TimeFieldSpec;
import org.apache.pinot.spi.data.readers.GenericRow;
import org.apache.pinot.spi.utils.ReadMode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/pinot/tools/anonymizer/PinotDataAndQueryAnonymizer.class */
public class PinotDataAndQueryAnonymizer {
    private static final Logger LOGGER = LoggerFactory.getLogger(PinotDataAndQueryAnonymizer.class);
    private static final float FLOAT_BASE_VALUE = 100.23f;
    private static final double DOUBLE_BASE_VALUE = 1000.2375d;
    private static final String COLUMN_MAPPING_FILE_KEY = "columns.mapping";
    private static final String COLUMN_MAPPING_SEPARATOR = ":";
    private final String _outputDir;
    private int _numFilesToGenerate;
    private final String _segmentDir;
    private final String _filePrefix;
    private final GlobalDictionaries _globalDictionaries;
    private final Map<String, String> _origToDerivedColumnsMap;
    private final Stopwatch _timeToBuildDictionaries = Stopwatch.createUnstarted();
    private final Stopwatch _timeToGenerateAvroFiles = Stopwatch.createUnstarted();
    private Schema _pinotSchema = null;
    private org.apache.avro.Schema _avroSchema = null;
    private final Map<String, FieldSpec> _columnToFieldSpecMap;
    private final Map<String, Integer> _globalDictionaryColumns;
    private final Set<String> _columnsNotAnonymized;
    private String[] _segmentDirectories;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: org.apache.pinot.tools.anonymizer.PinotDataAndQueryAnonymizer$1, reason: invalid class name */
    /* loaded from: input_file:org/apache/pinot/tools/anonymizer/PinotDataAndQueryAnonymizer$1.class */
    public static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] $SwitchMap$org$apache$pinot$spi$data$FieldSpec$DataType = new int[FieldSpec.DataType.values().length];

        static {
            try {
                $SwitchMap$org$apache$pinot$spi$data$FieldSpec$DataType[FieldSpec.DataType.INT.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$org$apache$pinot$spi$data$FieldSpec$DataType[FieldSpec.DataType.LONG.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
            try {
                $SwitchMap$org$apache$pinot$spi$data$FieldSpec$DataType[FieldSpec.DataType.FLOAT.ordinal()] = 3;
            } catch (NoSuchFieldError e3) {
            }
            try {
                $SwitchMap$org$apache$pinot$spi$data$FieldSpec$DataType[FieldSpec.DataType.DOUBLE.ordinal()] = 4;
            } catch (NoSuchFieldError e4) {
            }
            try {
                $SwitchMap$org$apache$pinot$spi$data$FieldSpec$DataType[FieldSpec.DataType.STRING.ordinal()] = 5;
            } catch (NoSuchFieldError e5) {
            }
            try {
                $SwitchMap$org$apache$pinot$spi$data$FieldSpec$DataType[FieldSpec.DataType.BYTES.ordinal()] = 6;
            } catch (NoSuchFieldError e6) {
            }
        }
    }

    /* loaded from: input_file:org/apache/pinot/tools/anonymizer/PinotDataAndQueryAnonymizer$FilterColumnExtractor.class */
    public static class FilterColumnExtractor {
        public static Set<String> extractColumnsUsedInFilter(String str, String str2) throws Exception {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(str + "/" + str2))));
            HashSet hashSet = new HashSet();
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    return hashSet;
                }
                examineWhereClause(readLine, hashSet);
            }
        }

        private static void examineWhereClause(String str, Set<String> set) {
            for (WhereAstNode whereAstNode : Pql2Compiler.buildAst(str).getChildren()) {
                if (whereAstNode instanceof WhereAstNode) {
                    parsePredicate((PredicateListAstNode) whereAstNode.getChildren().get(0), set);
                }
            }
        }

        private static void parsePredicate(PredicateListAstNode predicateListAstNode, Set<String> set) {
            int size = predicateListAstNode.getChildren().size();
            List children = predicateListAstNode.getChildren();
            for (int i = 0; i < size; i += 2) {
                PredicateAstNode predicateAstNode = (PredicateAstNode) children.get(i);
                if (predicateAstNode instanceof PredicateParenthesisGroupAstNode) {
                    parsePredicate((PredicateListAstNode) predicateAstNode.getChildren().get(0), set);
                } else {
                    set.add(predicateAstNode.getIdentifier());
                }
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/pinot/tools/anonymizer/PinotDataAndQueryAnonymizer$PredicateValueNotFoundException.class */
    public static class PredicateValueNotFoundException extends Exception {
        final Object _origValue;

        PredicateValueNotFoundException(Object obj) {
            this._origValue = obj;
        }
    }

    /* loaded from: input_file:org/apache/pinot/tools/anonymizer/PinotDataAndQueryAnonymizer$QueryGenerator.class */
    public static class QueryGenerator {
        private static final String GENERATED_QUERIES_FILE_NAME = "queries.generated";
        String _outputDir;
        String _queryDir;
        String _queryFileName;
        String _tableName;
        private final Set<String> _globalDictionaryColumns;
        private final Set<String> _columnsNotAnonymized;
        private final Stopwatch _generateQueryWatch = Stopwatch.createUnstarted();
        private final Map<String, Map<Object, Object>> _origToDerivedValueMap = new HashMap();
        private final Map<String, String> _origToDerivedColumnsMap = new HashMap();

        public QueryGenerator(String str, String str2, String str3, String str4, Set<String> set, Set<String> set2) throws Exception {
            this._outputDir = str;
            this._queryDir = str2;
            this._queryFileName = str3;
            this._tableName = str4;
            this._globalDictionaryColumns = set;
            this._columnsNotAnonymized = set2;
            Iterator<String> it = set2.iterator();
            while (it.hasNext()) {
                this._globalDictionaryColumns.remove(it.next());
            }
            loadGlobalDictionariesAndColumnMapping();
        }

        public void generateQueries() throws Exception {
            this._generateQueryWatch.start();
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(this._queryDir + "/" + this._queryFileName))));
            PrintWriter printWriter = new PrintWriter(new BufferedWriter(new FileWriter(this._queryDir + "/queries.generated")));
            int i = 0;
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    this._generateQueryWatch.stop();
                    PinotDataAndQueryAnonymizer.LOGGER.info("Finished generating {} queries. Time taken {}secs. Please see generated query file in {}", new Object[]{Integer.valueOf(i), Long.valueOf(this._generateQueryWatch.elapsed(TimeUnit.SECONDS)), this._queryDir});
                    printWriter.flush();
                    return;
                } else {
                    try {
                        generateQuery(readLine, printWriter);
                    } catch (PredicateValueNotFoundException e) {
                        PinotDataAndQueryAnonymizer.LOGGER.error("Unable to generate query for original query: {} . exception {}, original predicate not found {}", new Object[]{readLine, e, e._origValue});
                    }
                    i++;
                }
            }
        }

        @VisibleForTesting
        public String generateQuery(String str, PrintWriter printWriter) throws Exception {
            SelectAstNode buildAst = Pql2Compiler.buildAst(str);
            StringBuilder sb = new StringBuilder();
            sb.append("SELECT ");
            SelectAstNode selectAstNode = buildAst;
            boolean z = false;
            boolean z2 = false;
            for (AstNode astNode : selectAstNode.getChildren()) {
                if (astNode instanceof OutputColumnListAstNode) {
                    Preconditions.checkState(!z, "Select list already rewritten");
                    sb.append(rewriteSelectList((OutputColumnListAstNode) astNode));
                    sb.append(" FROM ").append(this._tableName).append(" ");
                    z = true;
                } else if (astNode instanceof WhereAstNode) {
                    Preconditions.checkState(z, "Select list should have been rewritten before rewriting WHERE");
                    Preconditions.checkState(!z2, "WHERE already rewritten");
                    sb.append(rewriteFilter((WhereAstNode) astNode)).append(" ");
                    z2 = true;
                } else if (astNode instanceof GroupByAstNode) {
                    sb.append(rewriteGroupBy((GroupByAstNode) astNode)).append(" ");
                } else if (astNode instanceof StarColumnListAstNode) {
                    sb.append("* FROM ").append(this._tableName).append(" ");
                    z = true;
                } else if (astNode instanceof OrderByAstNode) {
                    sb.append(rewriteOrderBy((OrderByAstNode) astNode)).append(" ");
                }
            }
            if (selectAstNode.isHasLimitClause()) {
                sb.append("LIMIT ").append(selectAstNode.getRecordLimit());
            } else if (selectAstNode.isHasTopClause()) {
                sb.append("TOP ").append(selectAstNode.getTopN());
            }
            String trim = sb.toString().trim();
            if (printWriter != null) {
                printWriter.println(trim);
            }
            return trim;
        }

        private String rewriteGroupBy(GroupByAstNode groupByAstNode) {
            StringBuilder sb = new StringBuilder();
            sb.append("GROUP BY ");
            int i = 0;
            for (IdentifierAstNode identifierAstNode : groupByAstNode.getChildren()) {
                Preconditions.checkState(identifierAstNode instanceof IdentifierAstNode, "Expecting identifier as child node of group by");
                String anonymousColumnName = getAnonymousColumnName(identifierAstNode.getName());
                if (i > 0) {
                    sb.append(", ");
                }
                sb.append(anonymousColumnName);
                i++;
            }
            return sb.toString();
        }

        private String rewriteOrderBy(OrderByAstNode orderByAstNode) {
            StringBuilder sb = new StringBuilder();
            sb.append("ORDER BY ");
            int i = 0;
            for (OrderByExpressionAstNode orderByExpressionAstNode : orderByAstNode.getChildren()) {
                if (i > 0) {
                    sb.append(", ");
                }
                OrderByExpressionAstNode orderByExpressionAstNode2 = orderByExpressionAstNode;
                sb.append(getAnonymousColumnName(orderByExpressionAstNode2.getColumn())).append(" ").append(orderByExpressionAstNode2.getOrdering());
                i++;
            }
            return sb.toString();
        }

        private String rewriteSelectList(OutputColumnListAstNode outputColumnListAstNode) {
            StringBuilder sb = new StringBuilder();
            List<AstNode> children = outputColumnListAstNode.getChildren();
            int size = children.size();
            int i = 0;
            for (AstNode astNode : children) {
                if (!(astNode instanceof OutputColumnAstNode)) {
                    throw new UnsupportedOperationException("Invalid type of child node for OuptutColumnListAstNode");
                }
                List children2 = astNode.getChildren();
                Preconditions.checkState(children2.size() == 1, "Invalid number of children for output column ast node");
                rewriteSelectListColumn((AstNode) children2.get(0), sb, null, i, size);
                i++;
            }
            return sb.toString();
        }

        private void rewriteSelectListColumn(AstNode astNode, StringBuilder sb, AstNode astNode2, int i, int i2) {
            if (astNode instanceof IdentifierAstNode) {
                String anonymousColumnName = getAnonymousColumnName(((IdentifierAstNode) astNode).getName());
                if (astNode2 instanceof FunctionCallAstNode) {
                    sb.append(anonymousColumnName);
                    return;
                } else if (i <= i2 - 2) {
                    sb.append(anonymousColumnName).append(", ");
                    return;
                } else {
                    sb.append(anonymousColumnName);
                    return;
                }
            }
            if (!(astNode instanceof FunctionCallAstNode)) {
                if (!(astNode instanceof StarExpressionAstNode)) {
                    throw new UnsupportedOperationException("Literals are not supported in output columns");
                }
                sb.append("*");
                return;
            }
            FunctionCallAstNode functionCallAstNode = (FunctionCallAstNode) astNode;
            List<AstNode> children = functionCallAstNode.getChildren();
            sb.append(functionCallAstNode.getName()).append("(");
            int i3 = 0;
            for (AstNode astNode3 : children) {
                if (i3 > 0) {
                    sb.append(",");
                }
                rewriteSelectListColumn(astNode3, sb, functionCallAstNode, i, i2);
                i3++;
            }
            if (astNode2 instanceof FunctionCallAstNode) {
                sb.append(")");
            } else if (i <= i2 - 2) {
                sb.append("), ");
            } else {
                sb.append(")");
            }
        }

        private String rewriteFilter(WhereAstNode whereAstNode) throws Exception {
            StringBuilder sb = new StringBuilder();
            sb.append("WHERE ");
            parsePredicate((PredicateListAstNode) whereAstNode.getChildren().get(0), sb);
            return sb.toString();
        }

        private void rewriteBooleanOperator(BooleanOperatorAstNode booleanOperatorAstNode, StringBuilder sb) {
            if (booleanOperatorAstNode.name().equalsIgnoreCase("AND")) {
                sb.append(" AND ");
            } else {
                sb.append(" OR ");
            }
        }

        private void rewritePredicate(PredicateAstNode predicateAstNode, StringBuilder sb) throws Exception {
            String identifier = predicateAstNode.getIdentifier();
            String anonymousColumnName = identifier != null ? getAnonymousColumnName(identifier) : null;
            if (predicateAstNode instanceof ComparisonPredicateAstNode) {
                ComparisonPredicateAstNode comparisonPredicateAstNode = (ComparisonPredicateAstNode) predicateAstNode;
                String operand = comparisonPredicateAstNode.getOperand();
                LiteralAstNode literal = comparisonPredicateAstNode.getLiteral();
                sb.append(anonymousColumnName).append(" ").append(operand).append(" ");
                rewriteLiteral(identifier, literal, sb);
                return;
            }
            if (predicateAstNode instanceof BetweenPredicateAstNode) {
                List<AstNode> children = predicateAstNode.getChildren();
                children.size();
                sb.append(anonymousColumnName).append(" ").append("BETWEEN ");
                int i = 0;
                for (AstNode astNode : children) {
                    Preconditions.checkState(astNode instanceof LiteralAstNode, "Child of BetweenAstNode should be literal");
                    LiteralAstNode literalAstNode = (LiteralAstNode) astNode;
                    if (i > 0) {
                        sb.append(" AND ");
                    }
                    rewriteLiteral(identifier, literalAstNode, sb);
                    i++;
                }
                return;
            }
            if (!(predicateAstNode instanceof InPredicateAstNode)) {
                if (!(predicateAstNode instanceof PredicateParenthesisGroupAstNode)) {
                    throw new UnsupportedOperationException("predicate ast node: " + predicateAstNode.getClass() + " not supported");
                }
                sb.append("(");
                parsePredicate((PredicateListAstNode) predicateAstNode.getChildren().get(0), sb);
                sb.append(")");
                return;
            }
            List<AstNode> children2 = predicateAstNode.getChildren();
            if (((InPredicateAstNode) predicateAstNode).isNotInClause()) {
                sb.append(anonymousColumnName).append(" NOT IN ").append("(");
            } else {
                sb.append(anonymousColumnName).append(" IN ").append("(");
            }
            children2.size();
            int i2 = 0;
            for (AstNode astNode2 : children2) {
                Preconditions.checkState(astNode2 instanceof LiteralAstNode, "Child of InAstNode should be literal");
                LiteralAstNode literalAstNode2 = (LiteralAstNode) astNode2;
                if (i2 > 0) {
                    sb.append(",");
                }
                rewriteLiteral(identifier, literalAstNode2, sb);
                i2++;
            }
            sb.append(")");
        }

        private void parsePredicate(PredicateListAstNode predicateListAstNode, StringBuilder sb) throws Exception {
            int size = predicateListAstNode.getChildren().size();
            List children = predicateListAstNode.getChildren();
            for (int i = 0; i < size; i += 2) {
                rewritePredicate((PredicateAstNode) children.get(i), sb);
                if (i + 1 < size) {
                    rewriteBooleanOperator((BooleanOperatorAstNode) children.get(i + 1), sb);
                }
            }
        }

        private void rewriteLiteral(String str, LiteralAstNode literalAstNode, StringBuilder sb) throws Exception {
            String str2 = (String) getGeneratedValueForOrigValue(str, literalAstNode.getValueAsString());
            if (literalAstNode instanceof StringLiteralAstNode) {
                sb.append("\"").append(str2).append("\"");
            } else {
                sb.append(str2);
            }
        }

        private void loadGlobalDictionariesAndColumnMapping() throws Exception {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(this._outputDir + "/columns.mapping"))));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                String[] split = readLine.split(PinotDataAndQueryAnonymizer.COLUMN_MAPPING_SEPARATOR);
                this._origToDerivedColumnsMap.put(split[0], split[1]);
            }
            for (String str : this._globalDictionaryColumns) {
                HashMap hashMap = new HashMap();
                this._origToDerivedValueMap.put(str, hashMap);
                BufferedReader bufferedReader2 = new BufferedReader(new InputStreamReader(new FileInputStream(new File(this._outputDir + "/" + str + ".dict"))));
                while (true) {
                    String readLine2 = bufferedReader2.readLine();
                    if (readLine2 != null) {
                        hashMap.put(readLine2, bufferedReader2.readLine());
                    }
                }
            }
        }

        private Object getGeneratedValueForOrigValue(String str, Object obj) throws Exception {
            if (this._columnsNotAnonymized.contains(str)) {
                return obj;
            }
            if (!this._globalDictionaryColumns.contains(str)) {
                throw new IllegalStateException("Encountered an invalid filter column: " + str);
            }
            Map<Object, Object> map = this._origToDerivedValueMap.get(str);
            Preconditions.checkState(map != null);
            if (map.containsKey(obj)) {
                return map.get(obj);
            }
            throw new PredicateValueNotFoundException(obj);
        }

        private String getAnonymousColumnName(String str) {
            return this._origToDerivedColumnsMap.get(str);
        }
    }

    public PinotDataAndQueryAnonymizer(String str, String str2, String str3, Map<String, Integer> map, Set<String> set, boolean z) {
        this._outputDir = str2;
        this._segmentDir = str;
        this._filePrefix = str3;
        this._globalDictionaries = z ? new MapBasedGlobalDictionaries() : new ArrayBasedGlobalDictionaries();
        this._origToDerivedColumnsMap = new HashMap();
        this._columnToFieldSpecMap = new HashMap();
        this._globalDictionaryColumns = map;
        this._columnsNotAnonymized = set;
        Iterator<String> it = set.iterator();
        while (it.hasNext()) {
            this._globalDictionaryColumns.remove(it.next());
        }
        StringBuilder sb = new StringBuilder();
        sb.append("Columns to retain data for: ");
        Iterator<String> it2 = this._columnsNotAnonymized.iterator();
        while (it2.hasNext()) {
            sb.append(it2.next()).append(", ");
        }
        LOGGER.info(sb.toString());
        StringBuilder sb2 = new StringBuilder();
        sb2.append("Columns to build global dictionary for: ");
        for (Map.Entry<String, Integer> entry : this._globalDictionaryColumns.entrySet()) {
            sb2.append("Column: ").append(entry.getKey()).append(" Cardinality: ").append(entry.getValue()).append(", ");
        }
        LOGGER.info(sb2.toString());
    }

    public void buildGlobalDictionaries() throws Exception {
        this._segmentDirectories = new File(this._segmentDir).list();
        this._numFilesToGenerate = this._segmentDirectories.length;
        LOGGER.info("Total number of segments: " + this._numFilesToGenerate);
        if (this._globalDictionaryColumns.isEmpty()) {
            LOGGER.info("Set of global dictionary columns is empty. Not building global dictionaries");
            getSchemaFromFirstSegment(this._segmentDir + "/" + this._segmentDirectories[0]);
            writeColumnMapping();
            return;
        }
        this._timeToBuildDictionaries.start();
        for (String str : this._segmentDirectories) {
            readDictionariesFromSegment(this._segmentDir + "/" + str);
        }
        this._globalDictionaries.sortOriginalValuesInGlobalDictionaries();
        this._globalDictionaries.addDerivedValuesToGlobalDictionaries();
        this._timeToBuildDictionaries.stop();
        writeGlobalDictionariesAndColumnMapping();
        LOGGER.info("Finished building global dictionaries. Time taken: {}secs", Long.valueOf(this._timeToBuildDictionaries.elapsed(TimeUnit.SECONDS)));
    }

    private void getSchemaFromFirstSegment(String str) throws Exception {
        LOGGER.info("Reading metadata from segment: " + str);
        pinotToAvroSchema(new SegmentMetadataImpl(new File(str)));
    }

    private void pinotToAvroSchema(SegmentMetadata segmentMetadata) {
        if (this._pinotSchema == null) {
            this._pinotSchema = segmentMetadata.getSchema();
            anonymizeColumnNames(this._pinotSchema);
            this._avroSchema = getAvroSchemaFromPinotSchema(this._pinotSchema);
            LOGGER.info("Pinot schema: " + this._pinotSchema.toPrettyJsonString());
            LOGGER.info("Avro schema: " + this._avroSchema.toString(true));
        }
    }

    private void readDictionariesFromSegment(String str) throws Exception {
        File file = new File(str);
        SegmentMetadataImpl segmentMetadataImpl = new SegmentMetadataImpl(file);
        pinotToAvroSchema(segmentMetadataImpl);
        ImmutableSegment load = ImmutableSegmentLoader.load(file, ReadMode.mmap);
        for (Map.Entry entry : segmentMetadataImpl.getColumnMetadataMap().entrySet()) {
            String str2 = (String) entry.getKey();
            if (this._globalDictionaryColumns.containsKey(str2)) {
                int intValue = this._globalDictionaryColumns.get(str2).intValue();
                ColumnMetadata columnMetadata = (ColumnMetadata) entry.getValue();
                Dictionary dictionary = load.getDictionary(str2);
                if (dictionary == null) {
                    throw new UnsupportedOperationException("Data generator currently does not support filter columns without dictionary");
                }
                for (int i = 0; i < columnMetadata.getCardinality(); i++) {
                    this._globalDictionaries.addOrigValueToGlobalDictionary(dictionary.get(i), str2, columnMetadata, intValue);
                }
            }
        }
    }

    private void writeGlobalDictionariesAndColumnMapping() throws Exception {
        Stopwatch createUnstarted = Stopwatch.createUnstarted();
        createUnstarted.start();
        writeColumnMapping();
        this._globalDictionaries.serialize(this._outputDir);
        createUnstarted.stop();
        LOGGER.info("Finished writing global dictionaries and column name mapping to disk. Time taken: {}secs. Please see the files in {}", Long.valueOf(createUnstarted.elapsed(TimeUnit.SECONDS)), this._outputDir);
    }

    private void writeColumnMapping() throws Exception {
        PrintWriter printWriter = new PrintWriter(new BufferedWriter(new FileWriter(this._outputDir + "/columns.mapping")));
        for (Map.Entry<String, String> entry : this._origToDerivedColumnsMap.entrySet()) {
            printWriter.println(entry.getKey() + ":" + entry.getValue());
        }
        printWriter.flush();
    }

    public void generateAvroFiles() throws Exception {
        this._timeToGenerateAvroFiles.start();
        int i = 0;
        for (int i2 = 0; i2 < this._numFilesToGenerate; i2++) {
            String str = this._segmentDir + "/" + this._segmentDirectories[i2];
            PinotSegmentRecordReader pinotSegmentRecordReader = new PinotSegmentRecordReader(new File(str), this._pinotSchema, (List) null);
            try {
                DataFileWriter<GenericData.Record> dataFileWriter = new DataFileWriter<>(new GenericDatumWriter(this._avroSchema));
                try {
                    String str2 = this._outputDir + "/" + this._filePrefix + i2;
                    LOGGER.info("Using segment {} to generate Avro file {}", str, str2);
                    dataFileWriter.create(this._avroSchema, new File(str2));
                    GenericRow genericRow = new GenericRow();
                    int i3 = 0;
                    while (pinotSegmentRecordReader.hasNext()) {
                        genericRow = pinotSegmentRecordReader.next(genericRow);
                        buildAvroRow(dataFileWriter, genericRow);
                        i3++;
                        genericRow.clear();
                    }
                    i += i3;
                    LOGGER.info("Generated Avro File {} with {} rows", str2, Integer.valueOf(i3));
                    dataFileWriter.close();
                    pinotSegmentRecordReader.close();
                } finally {
                }
            } catch (Throwable th) {
                try {
                    pinotSegmentRecordReader.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
                throw th;
            }
        }
        this._timeToGenerateAvroFiles.stop();
        LOGGER.info("Finished generating {} rows across {} avro files. Time taken {}secs", new Object[]{Integer.valueOf(i), Integer.valueOf(this._numFilesToGenerate), Long.valueOf(this._timeToGenerateAvroFiles.elapsed(TimeUnit.SECONDS))});
    }

    private void buildAvroRow(DataFileWriter<GenericData.Record> dataFileWriter, GenericRow genericRow) throws Exception {
        GenericData.Record record = new GenericData.Record(this._avroSchema);
        for (Map.Entry entry : genericRow.getFieldToValueMap().entrySet()) {
            String str = (String) entry.getKey();
            Object value = entry.getValue();
            String str2 = this._origToDerivedColumnsMap.get(str);
            boolean isSingleValueField = this._columnToFieldSpecMap.get(str).isSingleValueField();
            if (this._columnsNotAnonymized.contains(str)) {
                if (isSingleValueField) {
                    record.put(str2, value);
                } else {
                    record.put(str2, Arrays.asList((Object[]) value));
                }
            } else if (!this._globalDictionaryColumns.containsKey(str)) {
                Object generateRandomDerivedValue = generateRandomDerivedValue(value, this._columnToFieldSpecMap.get(str));
                if (isSingleValueField) {
                    record.put(str2, generateRandomDerivedValue);
                } else {
                    record.put(str2, Arrays.asList((Object[]) generateRandomDerivedValue));
                }
            } else if (isSingleValueField) {
                record.put(str2, this._globalDictionaries.getDerivedValueForOrigValueSV(str, value));
            } else if (value == null) {
                record.put(str2, (Object) null);
            } else {
                record.put(str2, Arrays.asList(this._globalDictionaries.getDerivedValuesForOrigValuesMV(str, (Object[]) value)));
            }
        }
        dataFileWriter.append(record);
    }

    private Object generateRandomDerivedValue(Object obj, FieldSpec fieldSpec) {
        if (fieldSpec.isSingleValueField()) {
            return generateDerivedRandomValueHelper(obj, fieldSpec.getDataType());
        }
        if (obj == null) {
            return null;
        }
        Object[] objArr = (Object[]) obj;
        int length = objArr.length;
        Object[] objArr2 = new Object[length];
        for (int i = 0; i < length; i++) {
            objArr2[i] = generateDerivedRandomValueHelper(objArr[i], fieldSpec.getDataType());
        }
        return objArr2;
    }

    private Object generateDerivedRandomValueHelper(Object obj, FieldSpec.DataType dataType) {
        Random random = new Random();
        switch (AnonymousClass1.$SwitchMap$org$apache$pinot$spi$data$FieldSpec$DataType[dataType.ordinal()]) {
            case 1:
                return Integer.valueOf(random.nextInt());
            case 2:
                return Long.valueOf(random.nextLong());
            case 3:
                return Float.valueOf(FLOAT_BASE_VALUE + random.nextFloat());
            case 4:
                return Double.valueOf(DOUBLE_BASE_VALUE + random.nextDouble());
            case 5:
                String str = (String) obj;
                return (str == null || str.equals("") || str.equals(" ") || str.equals("null")) ? str : RandomStringUtils.randomAlphanumeric(str.length());
            case 6:
                byte[] bArr = (byte[]) obj;
                if (bArr == null || bArr.length == 0) {
                    return bArr;
                }
                byte[] bArr2 = new byte[bArr.length];
                random.nextBytes(bArr2);
                return bArr2;
            default:
                throw new IllegalStateException("Unexpected data type");
        }
    }

    private void anonymizeColumnNames(Schema schema) {
        int i = 0;
        String str = "";
        for (Map.Entry entry : schema.getFieldSpecMap().entrySet()) {
            String str2 = (String) entry.getKey();
            FieldSpec fieldSpec = (FieldSpec) entry.getValue();
            if (fieldSpec instanceof DimensionFieldSpec) {
                str = "DIMENSION";
            } else if (fieldSpec instanceof MetricFieldSpec) {
                str = "METRIC";
            } else if (fieldSpec instanceof TimeFieldSpec) {
                str = "TIME";
            } else if (fieldSpec instanceof DateTimeFieldSpec) {
                str = "DATE_TIME";
            }
            str = fieldSpec.isSingleValueField() ? str + "_SV" : str + "_MV";
            this._origToDerivedColumnsMap.put(str2, str + "_COL_" + i);
            i++;
        }
    }

    private org.apache.avro.Schema getAvroSchemaFromPinotSchema(Schema schema) {
        SchemaBuilder.FieldAssembler fields = SchemaBuilder.record("record").fields();
        for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
            FieldSpec.DataType dataType = fieldSpec.getDataType();
            String name = fieldSpec.getName();
            String str = this._origToDerivedColumnsMap.get(name);
            this._columnToFieldSpecMap.put(name, fieldSpec);
            if (fieldSpec.isSingleValueField()) {
                switch (AnonymousClass1.$SwitchMap$org$apache$pinot$spi$data$FieldSpec$DataType[dataType.ordinal()]) {
                    case 1:
                        fields = fields.name(str).type().intType().noDefault();
                        break;
                    case 2:
                        fields = fields.name(str).type().longType().noDefault();
                        break;
                    case 3:
                        fields = fields.name(str).type().floatType().noDefault();
                        break;
                    case 4:
                        fields = fields.name(str).type().doubleType().noDefault();
                        break;
                    case 5:
                        fields = fields.name(str).type().stringType().noDefault();
                        break;
                    case 6:
                        fields = fields.name(str).type().bytesType().noDefault();
                        break;
                    default:
                        throw new UnsupportedOperationException("Data generator does not support type: " + dataType);
                }
            } else {
                switch (AnonymousClass1.$SwitchMap$org$apache$pinot$spi$data$FieldSpec$DataType[dataType.ordinal()]) {
                    case 1:
                        fields = ((SchemaBuilder.ArrayDefault) fields.name(str).type().array().items().intType()).noDefault();
                        break;
                    case 2:
                        fields = ((SchemaBuilder.ArrayDefault) fields.name(str).type().array().items().longType()).noDefault();
                        break;
                    case 3:
                        fields = ((SchemaBuilder.ArrayDefault) fields.name(str).type().array().items().floatType()).noDefault();
                        break;
                    case 4:
                        fields = ((SchemaBuilder.ArrayDefault) fields.name(str).type().array().items().doubleType()).noDefault();
                        break;
                    case 5:
                        fields = ((SchemaBuilder.ArrayDefault) fields.name(str).type().array().items().stringType()).noDefault();
                        break;
                    default:
                        throw new UnsupportedOperationException("Data generator does not support type: " + dataType);
                }
            }
        }
        return (org.apache.avro.Schema) fields.endRecord();
    }
}
