package wiki.xsx.core.pdf.doc;

import java.awt.Rectangle;
import java.awt.image.BufferedImage;
import java.io.IOException;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImage;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;

/* loaded from: input_file:wiki/xsx/core/pdf/doc/XEasyPdfDocumentExtractor.class */
public class XEasyPdfDocumentExtractor {
    private final PDDocument document;
    private final XEasyPdfDocument pdfDocument;
    private final SimpleExtractor simpleExtractor;
    private final RegionExtractor regionExtractor;
    private static final Pattern TABLE_PATTERN = Pattern.compile("(\\S[^\\n\\r]+)");

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:wiki/xsx/core/pdf/doc/XEasyPdfDocumentExtractor$RegionExtractor.class */
    public static class RegionExtractor extends PDFTextStripper {
        private final Map<String, ArrayList<List<TextPosition>>> regionCharacterList = new HashMap();
        private final Map<String, StringWriter> regionText = new HashMap(256);
        private final Map<String, Rectangle> regionArea = new HashMap(32);

        RegionExtractor() throws IOException {
            super.setSortByPosition(true);
        }

        void addRegion(String str, Rectangle rectangle) {
            this.regionArea.put(str, rectangle);
        }

        Map<String, String> extract(PDPage pDPage) {
            return extract(pDPage, " ");
        }

        Map<String, String> extract(PDPage pDPage, String str) {
            HashMap hashMap;
            if (this.regionArea.isEmpty()) {
                hashMap = new HashMap(0);
            } else {
                Set<String> keySet = this.regionArea.keySet();
                hashMap = new HashMap(keySet.size());
                for (String str2 : keySet) {
                    setStartPage(getCurrentPageNo());
                    setEndPage(getCurrentPageNo());
                    setWordSeparator(str);
                    ArrayList<List<TextPosition>> arrayList = new ArrayList<>(256);
                    arrayList.add(new ArrayList(256));
                    this.regionCharacterList.put(str2, arrayList);
                    this.regionText.put(str2, new StringWriter());
                }
                if (pDPage.hasContents()) {
                    processPage(pDPage);
                }
                for (String str3 : keySet) {
                    hashMap.put(str3, this.regionText.get(str3).toString());
                }
            }
            return hashMap;
        }

        protected void processTextPosition(TextPosition textPosition) {
            for (Map.Entry<String, Rectangle> entry : this.regionArea.entrySet()) {
                if (entry.getValue().contains(textPosition.getX(), textPosition.getY())) {
                    this.charactersByArticle = this.regionCharacterList.get(entry.getKey());
                    super.processTextPosition(textPosition);
                }
            }
        }

        protected void writePage() throws IOException {
            for (String str : this.regionArea.keySet()) {
                this.charactersByArticle = this.regionCharacterList.get(str);
                this.output = this.regionText.get(str);
                super.writePage();
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:wiki/xsx/core/pdf/doc/XEasyPdfDocumentExtractor$SimpleExtractor.class */
    public static class SimpleExtractor extends PDFTextStripper {
        private final PDDocument document;

        public SimpleExtractor(PDDocument pDDocument) throws IOException {
            this.document = pDDocument;
        }

        void extract(List<String> list, String str, int... iArr) {
            if (iArr == null || iArr.length <= 0) {
                extract(list, str);
                return;
            }
            for (int i : iArr) {
                setStartPage(i + 1);
                setEndPage(i + 1);
                extract(list, str);
            }
        }

        private void extract(List<String> list, String str) {
            String text = getText(this.document);
            if (str == null || str.trim().length() <= 0) {
                list.add(text);
            } else {
                Matcher matcher = Pattern.compile(str).matcher(text);
                while (matcher.find()) {
                    list.add(matcher.group());
                }
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public XEasyPdfDocumentExtractor(XEasyPdfDocument xEasyPdfDocument) {
        this.pdfDocument = xEasyPdfDocument;
        this.document = this.pdfDocument.build();
        this.simpleExtractor = new SimpleExtractor(this.document);
        this.regionExtractor = new RegionExtractor();
    }

    public XEasyPdfDocumentExtractor addRegion(String str, Rectangle rectangle) {
        this.regionExtractor.addRegion(str, rectangle);
        return this;
    }

    public XEasyPdfDocumentExtractor extractText(List<String> list, int... iArr) {
        extractText(list, null, iArr);
        return this;
    }

    public XEasyPdfDocumentExtractor extractText(List<String> list, String str, int... iArr) {
        this.simpleExtractor.extract(list, str, iArr);
        return this;
    }

    public XEasyPdfDocumentExtractor extractTextByRegions(List<Map<String, String>> list, int... iArr) {
        if (iArr == null || iArr.length <= 0) {
            int numberOfPages = this.document.getNumberOfPages() - 1;
            for (int i = 0; i < numberOfPages; i++) {
                addText(list, i);
            }
        } else {
            for (int i2 : iArr) {
                addText(list, i2);
            }
        }
        return this;
    }

    public XEasyPdfDocumentExtractor extractTextForSimpleTable(List<List<String>> list, int i) {
        PDRectangle mediaBox = this.document.getPage(i).getMediaBox();
        int max = Math.max(((int) mediaBox.getWidth()) + 1, ((int) mediaBox.getHeight()) + 1);
        extractTextByRegionsForSimpleTable(list, new Rectangle(max, max), i);
        return this;
    }

    public XEasyPdfDocumentExtractor extractTextByRegionsForSimpleTable(List<List<String>> list, Rectangle rectangle, int i) {
        RegionExtractor regionExtractor = new RegionExtractor();
        regionExtractor.addRegion("table", rectangle);
        String str = regionExtractor.extract(this.document.getPage(i), "X-EasyPdf-Separator").get("table");
        if (str != null && str.length() > 0) {
            ArrayList arrayList = new ArrayList(list.size());
            Matcher matcher = TABLE_PATTERN.matcher(str);
            while (matcher.find()) {
                arrayList.add(matcher.group());
            }
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                list.add(Arrays.asList(((String) it.next()).split("X-EasyPdf-Separator")));
            }
        }
        return this;
    }

    public XEasyPdfDocumentExtractor extractImage(List<BufferedImage> list) {
        Iterator it = this.document.getPages().iterator();
        while (it.hasNext()) {
            addImage(list, ((PDPage) it.next()).getResources());
        }
        return this;
    }

    public XEasyPdfDocumentExtractor extractImage(List<BufferedImage> list, int... iArr) {
        if (iArr != null && iArr.length > 0) {
            PDPageTree pages = this.document.getPages();
            for (int i : iArr) {
                if (i >= 0) {
                    addImage(list, pages.get(i).getResources());
                }
            }
        }
        return this;
    }

    public XEasyPdfDocumentExtractor extractForm(Map<String, String> map) {
        PDAcroForm acroForm = this.document.getDocumentCatalog().getAcroForm();
        if (acroForm != null) {
            for (PDField pDField : acroForm.getFields()) {
                map.put(pDField.getFullyQualifiedName(), pDField.getValueAsString());
            }
        }
        return this;
    }

    public XEasyPdfDocument finish() {
        return this.pdfDocument;
    }

    private void addText(List<Map<String, String>> list, int i) {
        Map<String, String> extract = this.regionExtractor.extract(this.document.getPage(i));
        if (extract.isEmpty()) {
            return;
        }
        list.add(extract);
    }

    private void addImage(List<BufferedImage> list, PDResources pDResources) {
        Iterator it = pDResources.getXObjectNames().iterator();
        while (it.hasNext()) {
            PDImage xObject = pDResources.getXObject((COSName) it.next());
            if (xObject instanceof PDImage) {
                list.add(xObject.getImage());
            } else if (xObject instanceof PDFormXObject) {
                addImage(list, ((PDFormXObject) xObject).getResources());
            }
        }
    }
}
