package com.marklogic.mapreduce.examples;

import com.marklogic.cpox.SimpleLogger;
import com.marklogic.cpox.Utilities;
import com.marklogic.tree.NodeKind;
import com.marklogic.xcc.Session;
import info.bliki.wiki.model.WikiModel;
import java.io.IOException;
import java.io.StringReader;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Properties;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;
import org.xmlpull.v1.XmlPullParserFactory;

/* compiled from: WikiLoader.java */
/* loaded from: input_file:com/marklogic/mapreduce/examples/WikiReader.class */
class WikiReader extends RecordReader<Text, Text> {
    static final int BUFFER_SIZE = 65536;
    static final int READ_AHEAD_SIZE = 2048;
    static final String BEGIN_PAGE_TAG = "<page>";
    static final String END_PAGE_TAG = "</page>";
    static final String END_DOC_TAG = "</mediawiki>";
    private List<Article> articles;
    private Text key = new Text();
    private Text value = new Text();
    private int recordCount = 0;

    /* compiled from: WikiLoader.java */
    /* loaded from: input_file:com/marklogic/mapreduce/examples/WikiReader$WikiModelProcessor.class */
    static class WikiModelProcessor {
        private static final String TITLE = "title";
        private static final String PAGE = "page";
        private static final String ROOT = "mediawiki";
        private static final String NS_XML = "http://www.w3.org/XML/1998/namespace";
        private static final String HEADER = "<mediawiki xmlns=\"http://www.mediawiki.org/xml/export-0.4/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.mediawiki.org/xml/export-0.4/http://www.mediawiki.org/xml/export-0.4.xsd\" version=\"0.4\" xml:lang=\"en\"> \n  <siteinfo> \n    <sitename>Wikipedia</sitename> \n    <base>http://en.wikipedia.org/wiki/Main_Page</base> \n    <generator>MediaWiki 1.16alpha-wmf</generator> \n    <case>first-letter</case> \n    <namespaces> \n      <namespace key=\"-2\">Media</namespace> \n      <namespace key=\"-1\">Special</namespace> \n      <namespace key=\"0\" /> \n      <namespace key=\"1\">Talk</namespace> \n      <namespace key=\"2\">User</namespace> \n      <namespace key=\"3\">User talk</namespace> \n      <namespace key=\"4\">Wikipedia</namespace> \n      <namespace key=\"5\">Wikipedia talk</namespace> \n      <namespace key=\"6\">File</namespace> \n      <namespace key=\"7\">File talk</namespace> \n      <namespace key=\"8\">MediaWiki</namespace> \n      <namespace key=\"9\">MediaWiki talk</namespace> \n      <namespace key=\"10\">Template</namespace> \n      <namespace key=\"11\">Template talk</namespace> \n      <namespace key=\"12\">Help</namespace> \n      <namespace key=\"13\">Help talk</namespace> \n      <namespace key=\"14\">Category</namespace> \n      <namespace key=\"15\">Category talk</namespace> \n      <namespace key=\"100\">Portal</namespace> \n      <namespace key=\"101\">Portal talk</namespace> \n    </namespaces> \n  </siteinfo> \n";
        private static final String FOOTER = "\n</mediawiki>";
        private static LinkedList<String> path;
        private static StringBuilder article;
        private static String title;
        private static XmlPullParser xpp;
        static SimpleLogger logger = SimpleLogger.getSimpleLogger();
        private static int errors = 0;
        private static int pages = 0;
        private static String namespace;
        private static String language;
        private static XmlPullParserFactory factory;
        private static XmlPullParser parser;
        private static Session session;
        private static List<Article> articles;

        WikiModelProcessor() {
        }

        public static List<Article> process(StringBuilder sb) {
            sb.insert(0, HEADER);
            sb.append(FOOTER);
            try {
                factory = XmlPullParserFactory.newInstance(new Properties().getProperty("org.xmlpull.v1.XmlPullParserFactory"), null);
                factory.setNamespaceAware(true);
                xpp = factory.newPullParser();
                xpp.setInput(new StringReader(sb.toString()));
                xpp.setFeature("http://xmlpull.org/v1/doc/features.html#process-namespaces", true);
                logger.configureLogger(new Properties());
                process();
            } catch (Exception e) {
                logger.logException(e);
            }
            logger.info("finished " + pages + " pages with " + errors + " errors");
            return articles;
        }

        private static void process() throws XmlPullParserException, IOException {
            path = new LinkedList<>();
            article = null;
            title = null;
            logger.info("starting loop");
            while (true) {
                int next = xpp.next();
                switch (next) {
                    case 1:
                        processEndDocument();
                        return;
                    case NodeKind.TEXT /* 2 */:
                        processStartElement(xpp.getName());
                        break;
                    case NodeKind.LINK /* 3 */:
                        processEndElement(xpp.getName());
                        break;
                    case 4:
                        if (null == article) {
                            break;
                        } else {
                            String last = path.getLast();
                            if (!"comment".equals(last) && !"text".equals(last)) {
                                article.append(Utilities.escapeXml(xpp.getText()));
                                break;
                            } else {
                                article.append(parse(xpp.getText()));
                                break;
                            }
                        }
                    default:
                        throw new IOException("unexpected event: " + next + " at " + xpp.getPositionDescription());
                }
            }
        }

        private static String parse(String str) throws IOException {
            if (null == str || "".equals(str.trim())) {
                return null;
            }
            String render = new WikiModel("${image}", "${title}").render(str);
            if (null == render || "".equals(render.trim())) {
                return render;
            }
            try {
                parser = factory.newPullParser();
                parser.setInput(new StringReader("<dummy>" + render + "</dummy>"));
                parser.setFeature("http://xmlpull.org/v1/doc/features.html#process-namespaces", true);
                while (true) {
                    try {
                        int next = parser.next();
                        switch (next) {
                            case 1:
                                return render;
                            case NodeKind.TEXT /* 2 */:
                                parser.getName();
                                parser.getNamespace();
                                parser.getText();
                                break;
                            case NodeKind.LINK /* 3 */:
                                parser.getName();
                                parser.getNamespace();
                                parser.getText();
                                break;
                            case 4:
                                String text = parser.getText();
                                if (null != text) {
                                    for (char c : text.toCharArray()) {
                                        if ('\t' != c && '\n' != c && '\r' != c && c <= 31) {
                                            throw new XmlPullParserException("bad codepoint value: " + ((int) c), parser, null);
                                        }
                                    }
                                    break;
                                } else {
                                    continue;
                                }
                            default:
                                throw new IOException("unexpected event: " + next + " at " + parser.getPositionDescription());
                        }
                    } catch (ArrayIndexOutOfBoundsException e) {
                        throw new XmlPullParserException(e.getMessage(), parser, null);
                    }
                }
            } catch (XmlPullParserException e2) {
                logger.warning(title + ": " + e2.getMessage());
                errors++;
                return Utilities.escapeXml(str);
            }
        }

        private static void processEndElement(String str) throws IOException {
            if (!path.getLast().equals(str)) {
                throw new IOException("found " + str + " expected " + path.getLast() + "; " + title + "; " + ((Object) article));
            }
            path.removeLast();
            if (null == article) {
                return;
            }
            article.append(xpp.getText());
            if (PAGE.equals(str)) {
                URI uri = null;
                if (0 != 0) {
                    try {
                        uri = new URI(null, title, null);
                    } catch (URISyntaxException e) {
                        try {
                            uri = new URI(null, null, title);
                        } catch (URISyntaxException e2) {
                            throw new IOException(e);
                        }
                    }
                }
                String str2 = language + "wiki/" + (0 != 0 ? uri.toString() : title);
                if (articles == null) {
                    articles = new ArrayList();
                }
                articles.add(new Article(str2, article));
                article = null;
            }
        }

        private static void processStartElement(String str) throws IOException, XmlPullParserException {
            path.add(str);
            if (ROOT.equals(str)) {
                namespace = xpp.getNamespace();
                language = xpp.getAttributeValue(NS_XML, "lang");
                return;
            }
            if (PAGE.equals(str)) {
                if (null != article) {
                    throw new IOException("article not null at start of page");
                }
                article = new StringBuilder("<page" + (null == namespace ? "" : " xmlns=\"" + namespace + "\"") + (null == language ? "" : " xml:lang=\"" + language + "\"") + ">");
                pages++;
                return;
            }
            if (null != article && !xpp.isEmptyElementTag()) {
                article.append(xpp.getText());
            }
            if (TITLE.equals(str)) {
                title = xpp.nextText().trim();
                article.append(Utilities.escapeXml(title));
                processEndElement(str);
            }
        }

        private static void processEndDocument() throws IOException {
            if (0 != path.size()) {
                throw new IOException("document end before end tag (" + path.size() + ") " + path.getLast() + " " + xpp.getPositionDescription());
            }
            if (null != article) {
                throw new IOException("article not null at end of document: " + title + "; " + article.toString() + "; " + xpp.getPositionDescription());
            }
            if (session != null) {
                session.close();
            }
        }
    }

    public void close() throws IOException {
    }

    /* renamed from: getCurrentKey, reason: merged with bridge method [inline-methods] */
    public Text m117getCurrentKey() throws IOException, InterruptedException {
        return this.key;
    }

    /* renamed from: getCurrentValue, reason: merged with bridge method [inline-methods] */
    public Text m116getCurrentValue() throws IOException, InterruptedException {
        return this.value;
    }

    public float getProgress() throws IOException, InterruptedException {
        if (this.articles == null || this.articles.isEmpty()) {
            return 0.0f;
        }
        return this.recordCount / this.articles.size();
    }

    public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
        Path path = ((FileSplit) inputSplit).getPath();
        FSDataInputStream open = path.getFileSystem(taskAttemptContext.getConfiguration()).open(path);
        byte[] bArr = new byte[BUFFER_SIZE];
        long length = inputSplit.getLength();
        open.seek(((FileSplit) inputSplit).getStart());
        long j = 0;
        StringBuilder sb = new StringBuilder();
        int i = -1;
        while (true) {
            int read = open.read(bArr, 0, (int) Math.min(length - j, bArr.length));
            if (read == -1) {
                System.out.println("Unexpected EOF: bytesTotal=" + length + "bytesRead=" + j);
                break;
            }
            j += read;
            String str = new String(new String(bArr, 0, read));
            if (i == -1) {
                i = str.indexOf(BEGIN_PAGE_TAG);
                if (i > -1) {
                    sb.append(str.substring(i));
                }
            } else if (j < length) {
                sb.append(str);
            } else if (str.contains(END_DOC_TAG) || str.endsWith(END_PAGE_TAG)) {
                sb.append(str.substring(0, str.lastIndexOf(END_PAGE_TAG) + END_PAGE_TAG.length()));
                System.out.println("Found end of doc.");
            } else {
                while (true) {
                    int read2 = open.read(bArr, 0, READ_AHEAD_SIZE);
                    if (read2 == -1) {
                        System.out.println("Unexpected EOF: bytesTotal=" + length + "bytesRead=" + j);
                        System.out.println(str);
                        break;
                    }
                    j += read2;
                    str = new String(bArr, 0, read2);
                    int indexOf = str.indexOf(END_PAGE_TAG);
                    if (indexOf > -1) {
                        sb.append(str.substring(0, indexOf + END_PAGE_TAG.length()));
                        break;
                    }
                    sb.append(str);
                }
            }
        }
        open.close();
        this.articles = WikiModelProcessor.process(sb);
    }

    public boolean nextKeyValue() throws IOException, InterruptedException {
        if (this.articles == null || this.articles.size() <= this.recordCount) {
            return false;
        }
        Article article = this.articles.get(this.recordCount);
        this.key.set(article.title);
        this.value.set(article.pageContent.toString());
        this.recordCount++;
        return true;
    }
}
