package com.marklogic.mapreduce;

import com.marklogic.contentpump.DatabaseContentWriter;
import com.marklogic.mapreduce.functions.LexiconFunction;
import com.marklogic.mapreduce.utilities.ForestHost;
import com.marklogic.mapreduce.utilities.InternalUtilities;
import com.marklogic.mapreduce.utilities.RestrictedHostsUtil;
import com.marklogic.xcc.AdhocQuery;
import com.marklogic.xcc.RequestOptions;
import com.marklogic.xcc.ResultItem;
import com.marklogic.xcc.ResultSequence;
import com.marklogic.xcc.Session;
import com.marklogic.xcc.exceptions.RequestException;
import com.marklogic.xcc.exceptions.ServerConnectionException;
import com.marklogic.xcc.exceptions.XccConfigException;
import com.marklogic.xcc.types.ItemType;
import java.io.IOException;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.util.ReflectionUtils;

/* loaded from: input_file:com/marklogic/mapreduce/MarkLogicInputFormat.class */
public abstract class MarkLogicInputFormat<KEYIN, VALUEIN> extends InputFormat<KEYIN, VALUEIN> implements MarkLogicConstants {
    public static final Log LOG = LogFactory.getLog(MarkLogicInputFormat.class);
    static final String DEFAULT_DOCUMENT_SELECTOR = "fn:collection()";
    static final String DEFAULT_CTS_QUERY = "()";
    String docSelector;
    Configuration jobConf = null;
    String localHost = null;
    boolean localMode = false;

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:com/marklogic/mapreduce/MarkLogicInputFormat$ForestSplit.class */
    public class ForestSplit {
        BigInteger forestId;
        String hostName;
        long recordCount;
        List<ForestHost> replicas;

        protected ForestSplit() {
        }
    }

    private void appendNsBindings(StringBuilder sb) {
        Collection stringCollection = this.jobConf.getStringCollection(MarkLogicConstants.PATH_NAMESPACE);
        if (stringCollection == null || stringCollection.isEmpty()) {
            return;
        }
        boolean z = true;
        Iterator it = stringCollection.iterator();
        while (it.hasNext()) {
            String str = (String) it.next();
            if (z) {
                sb.append("declare namespace ");
                sb.append(str);
                sb.append("=\"");
                z = false;
            } else {
                sb.append(str);
                sb.append("\";");
                z = true;
            }
            if (it.hasNext() && z) {
                sb.append('\n');
            }
        }
    }

    private void appendDocumentSelector(StringBuilder sb) {
        this.docSelector = this.jobConf.get(MarkLogicConstants.DOCUMENT_SELECTOR);
        if (this.docSelector != null) {
            sb.append(this.docSelector);
        } else {
            sb.append(DEFAULT_DOCUMENT_SELECTOR);
        }
    }

    protected void appendQuery(StringBuilder sb) {
        String str = this.jobConf.get(MarkLogicConstants.QUERY_FILTER);
        if (str != null) {
            String replaceAll = str.replaceAll("&", "&amp;").replaceAll("'", "&apos;").replaceAll("&", "&amp;").replaceAll("\"", "&quot;");
            sb.append("\"cts:query(xdmp:unquote('");
            sb.append(replaceAll);
            sb.append("')/*)\"");
            return;
        }
        if (this.docSelector != null) {
            sb.append("'");
            sb.append(DEFAULT_CTS_QUERY);
            sb.append("'");
            return;
        }
        Class cls = this.jobConf.getClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS, (Class) null, LexiconFunction.class);
        if (cls == null) {
            sb.append("'");
            sb.append(DEFAULT_CTS_QUERY);
            sb.append("'");
        } else {
            LexiconFunction lexiconFunction = (LexiconFunction) ReflectionUtils.newInstance(cls, this.jobConf);
            sb.append("'");
            sb.append(lexiconFunction.getLexiconQuery());
            sb.append("'");
        }
    }

    private void appendRedactionRuleValidateQuery(StringBuilder sb, String[] strArr) {
        sb.append("\"REDACT\"");
        if (strArr != null) {
            sb.append(",\n");
            sb.append("rdt:rule-validate((");
            for (int i = 0; i < strArr.length; i++) {
                if (i != 0) {
                    sb.append(", ");
                }
                sb.append("\"");
                sb.append(strArr[i]);
                sb.append("\"");
            }
            sb.append("))");
        }
    }

    private void appendReplicaQuery(StringBuilder sb) {
        sb.append("let $repf := fn:function-lookup(");
        sb.append("xs:QName('hadoop:get-splits-with-replica'),0)\n");
        sb.append("return if (exists($repf)) then $repf() else ()\n");
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void getForestSplits(JobContext jobContext, ResultSequence resultSequence, List<MarkLogicInputFormat<KEYIN, VALUEIN>.ForestSplit> list, List<String> list2, String[] strArr, boolean z) throws IOException {
        int i = 0;
        boolean z2 = this.jobConf.getBoolean(MarkLogicConstants.INPUT_RESTRICT_HOSTS, false);
        RestrictedHostsUtil restrictedHostsUtil = z2 ? new RestrictedHostsUtil(strArr) : null;
        while (true) {
            if (!resultSequence.hasNext()) {
                break;
            }
            ResultItem next = resultSequence.next();
            int i2 = i % 3;
            if (i2 == 0) {
                MarkLogicInputFormat<KEYIN, VALUEIN>.ForestSplit forestSplit = new ForestSplit();
                if (ItemType.XS_STRING != next.getItemType()) {
                    forestSplit.forestId = next.getItem().asBigInteger();
                    list.add(forestSplit);
                } else if (!"REDACT".equals(next.getItem().asString())) {
                    throw new IOException("Unexpected string item from getSplits query result");
                }
            } else if (i2 == 1) {
                list.get(list.size() - 1).recordCount = next.getItem().asLong().longValue();
            } else if (i2 == 2) {
                String asString = next.getItem().asString();
                if (z2) {
                    restrictedHostsUtil.addForestHost(asString);
                    list.get(list.size() - 1).hostName = asString;
                } else if (this.localMode && asString.equals(this.localHost)) {
                    list.get(list.size() - 1).hostName = strArr[0];
                } else {
                    list.get(list.size() - 1).hostName = asString;
                }
            }
            i++;
        }
        if (z2) {
            for (MarkLogicInputFormat<KEYIN, VALUEIN>.ForestSplit forestSplit2 : list) {
                forestSplit2.hostName = restrictedHostsUtil.getNextHost(forestSplit2.hostName);
            }
        }
        while (true) {
            if (!resultSequence.hasNext()) {
                break;
            }
            ResultItem next2 = resultSequence.next();
            if (ItemType.XS_INTEGER == next2.getItemType()) {
                if (next2.getItem().asPrimitiveInt() != 0) {
                    throw new IOException("Unexpected item " + next2.getItemType().toString());
                }
            } else {
                if (ItemType.XS_STRING != next2.getItemType()) {
                    throw new IOException("Unexpected item " + next2.getItemType().toString());
                }
                list2.add(next2.getItem().asString());
            }
        }
        if (z) {
            HashMap hashMap = new HashMap();
            while (resultSequence.hasNext()) {
                ResultItem next3 = resultSequence.next();
                if (ItemType.XS_INTEGER == next3.getItemType() && next3.getItem().asPrimitiveInt() == 0) {
                    break;
                }
                String asString2 = next3.asString();
                if (resultSequence.hasNext()) {
                    resultSequence.next().asString();
                    ArrayList arrayList = new ArrayList();
                    while (resultSequence.hasNext()) {
                        ResultItem next4 = resultSequence.next();
                        if (ItemType.XS_INTEGER == next4.getItemType() && next4.getItem().asPrimitiveInt() == 0) {
                            break;
                        }
                        String asString3 = next4.asString();
                        if (resultSequence.hasNext()) {
                            String asString4 = resultSequence.next().asString();
                            if (this.localMode && asString4.equals(this.localHost)) {
                                asString4 = strArr[0];
                            }
                            arrayList.add(new ForestHost(asString3, asString4));
                        }
                    }
                    hashMap.put(asString2, arrayList);
                }
            }
            if (z2) {
                return;
            }
            for (MarkLogicInputFormat<KEYIN, VALUEIN>.ForestSplit forestSplit3 : list) {
                forestSplit3.replicas = (List) hashMap.get(forestSplit3.forestId.toString());
            }
        }
    }

    protected void appendCustom(StringBuilder sb) {
        sb.append(DEFAULT_CTS_QUERY);
    }

    /* JADX WARN: Finally extract failed */
    public List<InputSplit> getSplits(JobContext jobContext) throws IOException, InterruptedException {
        String sb;
        MarkLogicInputSplit markLogicInputSplit;
        this.jobConf = jobContext.getConfiguration();
        boolean equals = this.jobConf.get(MarkLogicConstants.INPUT_MODE, MarkLogicConstants.BASIC_MODE).equals(MarkLogicConstants.ADVANCED_MODE);
        String trimmed = this.jobConf.getTrimmed(MarkLogicConstants.INPUT_RESTRICT_HOSTS);
        boolean z = false;
        if (trimmed != null && !trimmed.isEmpty()) {
            z = Boolean.parseBoolean(trimmed);
        }
        boolean z2 = trimmed == null;
        boolean z3 = !z;
        String str = null;
        String[] strings = this.jobConf.getStrings(MarkLogicConstants.REDACTION_RULE_COLLECTION);
        String[] strings2 = this.jobConf.getStrings(MarkLogicConstants.INPUT_HOST);
        if (strings2 == null || strings2.length == 0) {
            throw new IllegalStateException("mapreduce.marklogic.input.host is not specified.");
        }
        if (equals) {
            str = this.jobConf.get(MarkLogicConstants.INPUT_QUERY_LANGUAGE);
            sb = this.jobConf.get(MarkLogicConstants.SPLIT_QUERY);
        } else {
            StringBuilder sb2 = new StringBuilder();
            sb2.append(DatabaseContentWriter.XQUERY_VERSION_1_0_ML);
            if (z2) {
                sb2.append("fn:exists(xdmp:get-request-header('x-forwarded-for'));\n");
            }
            sb2.append("import module namespace hadoop = ");
            sb2.append("\"http://marklogic.com/xdmp/hadoop\" at ");
            sb2.append("\"/MarkLogic/hadoop.xqy\";\n");
            if (strings != null) {
                sb2.append("import module namespace rdt = \"http://marklogic.com/xdmp/redaction\" at \"/MarkLogic/redaction.xqy\";\n");
            }
            sb2.append("xdmp:host-name(xdmp:host()),\n");
            sb2.append("hadoop:get-splits('");
            appendNsBindings(sb2);
            sb2.append("', '");
            appendDocumentSelector(sb2);
            sb2.append("',");
            appendQuery(sb2);
            sb2.append("),\n");
            appendRedactionRuleValidateQuery(sb2, strings);
            sb2.append(",0,");
            if (!z) {
                appendReplicaQuery(sb2);
                sb2.append(",0,");
            }
            appendCustom(sb2);
            sb = sb2.toString();
        }
        long j = this.jobConf.getLong(MarkLogicConstants.MAX_SPLIT_SIZE, this.jobConf.get(MarkLogicConstants.EXECUTION_MODE, MarkLogicConstants.MODE_DISTRIBUTED).equals(MarkLogicConstants.MODE_DISTRIBUTED) ? MarkLogicConstants.DEFAULT_MAX_SPLIT_SIZE : MarkLogicConstants.DEFAULT_LOCAL_MAX_SPLIT_SIZE);
        if (j <= 0) {
            throw new IllegalStateException("Max split size is required to be positive. It is set to " + j);
        }
        ArrayList arrayList = new ArrayList();
        Session session = null;
        ResultSequence resultSequence = null;
        if (LOG.isDebugEnabled()) {
            LOG.debug("Split query: " + sb);
        }
        this.localMode = MarkLogicConstants.MODE_LOCAL.equals(this.jobConf.get(MarkLogicConstants.EXECUTION_MODE));
        int i = 0;
        while (true) {
            if (i >= strings2.length) {
                break;
            }
            try {
                try {
                    session = InternalUtilities.getInputContentSource(this.jobConf, strings2[i]).newSession();
                    RequestOptions requestOptions = new RequestOptions();
                    requestOptions.setCacheResult(false);
                    if (this.localMode && equals) {
                        AdhocQuery newAdhocQuery = session.newAdhocQuery("xquery version \"1.0-ml\";xdmp:host-name(xdmp:host())");
                        newAdhocQuery.setOptions(requestOptions);
                        ResultSequence submitRequest = session.submitRequest(newAdhocQuery);
                        if (submitRequest.hasNext()) {
                            this.localHost = submitRequest.next().asString();
                        }
                        if (submitRequest != null) {
                            submitRequest.close();
                        }
                    }
                    AdhocQuery newAdhocQuery2 = session.newAdhocQuery(sb);
                    if (str != null) {
                        InternalUtilities.checkQueryLanguage(str);
                        requestOptions.setQueryLanguage(str);
                    }
                    newAdhocQuery2.setOptions(requestOptions);
                    resultSequence = session.submitRequest(newAdhocQuery2);
                    if (!equals && resultSequence.hasNext()) {
                        ResultItem next = resultSequence.next();
                        if (z2) {
                            boolean equals2 = next.asString().equals("true");
                            next = resultSequence.next();
                            if (equals2) {
                                this.jobConf.setBoolean(MarkLogicConstants.INPUT_RESTRICT_HOSTS, true);
                                if (LOG.isDebugEnabled()) {
                                    LOG.debug("HTTP compliant mode enabled since x-forwarded-for exists");
                                }
                            }
                        }
                        this.localHost = next.asString();
                    }
                    getForestSplits(jobContext, resultSequence, arrayList, strings != null ? new ArrayList() : null, strings2, z3);
                    LOG.info("Fetched " + arrayList.size() + " forest splits.");
                    if (resultSequence != null) {
                        resultSequence.close();
                    }
                    if (session != null) {
                        session.close();
                    }
                } catch (RequestException e) {
                    LOG.error(e);
                    LOG.error("Query: " + sb);
                    throw new IOException((Throwable) e);
                }
            } catch (ServerConnectionException e2) {
                try {
                    LOG.warn("Unable to connect to " + strings2[i] + " to query source information");
                    i++;
                    if (resultSequence != null) {
                        resultSequence.close();
                    }
                    if (session != null) {
                        session.close();
                    }
                } catch (Throwable th) {
                    if (resultSequence != null) {
                        resultSequence.close();
                    }
                    if (session != null) {
                        session.close();
                    }
                    throw th;
                }
            } catch (XccConfigException e3) {
                LOG.error(e3);
                throw new IOException((Throwable) e3);
            }
        }
        if (i == strings2.length) {
            throw new IOException("Unable to query source information, no usable hostname found");
        }
        if (arrayList == null || arrayList.isEmpty()) {
            return new ArrayList();
        }
        HashMap hashMap = new HashMap();
        boolean z4 = this.jobConf.get(MarkLogicConstants.INPUT_QUERY_TIMESTAMP) != null;
        for (MarkLogicInputFormat<KEYIN, VALUEIN>.ForestSplit forestSplit : arrayList) {
            if (forestSplit.recordCount > 0 || !z4) {
                String str2 = forestSplit.hostName;
                List list = (List) hashMap.get(str2);
                if (list == null) {
                    list = new ArrayList();
                    hashMap.put(str2, list);
                }
                ArrayList arrayList2 = new ArrayList();
                list.add(arrayList2);
                long j2 = j;
                if ((this instanceof KeyValueInputFormat) && (j2 & 1) != 0) {
                    j2--;
                }
                long j3 = forestSplit.recordCount;
                if (j3 <= 0) {
                    MarkLogicInputSplit markLogicInputSplit2 = new MarkLogicInputSplit(0L, 0L, forestSplit.forestId, forestSplit.hostName, forestSplit.replicas);
                    markLogicInputSplit2.setLastSplit(true);
                    arrayList2.add(markLogicInputSplit2);
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Added split " + markLogicInputSplit2);
                    }
                } else {
                    while (j3 > 0) {
                        long j4 = forestSplit.recordCount - j3;
                        if (j3 < j2) {
                            markLogicInputSplit = new MarkLogicInputSplit(j4, j3, forestSplit.forestId, forestSplit.hostName, forestSplit.replicas);
                            markLogicInputSplit.setLastSplit(true);
                            j3 = 0;
                        } else {
                            markLogicInputSplit = new MarkLogicInputSplit(j4, j2, forestSplit.forestId, forestSplit.hostName, forestSplit.replicas);
                            j3 -= j2;
                        }
                        arrayList2.add(markLogicInputSplit);
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Added split " + markLogicInputSplit);
                        }
                    }
                }
            }
        }
        Set keySet = hashMap.keySet();
        List[] listArr = new List[keySet.size()];
        int i2 = 0;
        Iterator it = keySet.iterator();
        while (it.hasNext()) {
            List<List> list2 = (List) hashMap.get((String) it.next());
            if (list2.size() == 1) {
                int i3 = i2;
                i2++;
                listArr[i3] = (List) list2.get(0);
            } else {
                listArr[i2] = new ArrayList();
                boolean z5 = true;
                int i4 = 0;
                while (z5) {
                    z5 = false;
                    for (List list3 : list2) {
                        if (i4 < list3.size()) {
                            listArr[i2].add(list3.get(i4));
                        }
                        z5 = z5 || i4 + 1 < list3.size();
                    }
                    i4++;
                }
                i2++;
            }
        }
        ArrayList arrayList3 = new ArrayList();
        boolean z6 = true;
        int i5 = 0;
        while (z6) {
            z6 = false;
            for (List list4 : listArr) {
                if (i5 < list4.size()) {
                    arrayList3.add(list4.get(i5));
                }
                z6 = z6 || i5 + 1 < list4.size();
            }
            i5++;
        }
        LOG.info("Made " + arrayList3.size() + " split(s).");
        if (LOG.isDebugEnabled()) {
            Iterator it2 = arrayList3.iterator();
            while (it2.hasNext()) {
                LOG.debug((InputSplit) it2.next());
            }
        }
        return arrayList3;
    }
}
