package io.annot8.components.documents.processors;

import com.drew.imaging.ImageMetadataReader;
import com.drew.imaging.ImageProcessingException;
import io.annot8.api.components.annotations.ComponentDescription;
import io.annot8.api.components.annotations.ComponentName;
import io.annot8.api.components.annotations.ComponentTags;
import io.annot8.api.components.annotations.SettingsClass;
import io.annot8.api.context.Context;
import io.annot8.api.exceptions.ProcessingException;
import io.annot8.common.data.content.FileContent;
import io.annot8.common.data.content.InputStreamContent;
import io.annot8.common.data.content.Row;
import io.annot8.common.data.content.Table;
import io.annot8.components.documents.data.ExtractionWithProperties;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.imageio.ImageIO;
import org.odftoolkit.odfdom.doc.OdfTextDocument;
import org.odftoolkit.odfdom.doc.table.OdfTable;
import org.odftoolkit.odfdom.incubator.doc.draw.OdfDrawFrame;
import org.odftoolkit.odfdom.incubator.meta.OdfMetaDocumentStatistic;
import org.odftoolkit.odfdom.incubator.meta.OdfOfficeMeta;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

@ComponentDescription("Extracts image and text from Open Document Text (*.odt) files")
@ComponentTags({"documents", "opendocument", "odt", "extractor", "text", "images", "metadata", "tables"})
@ComponentName("Open Document Text (ODT) Extractor")
@SettingsClass(DocumentExtractorSettings.class)
/* loaded from: input_file:io/annot8/components/documents/processors/OdtExtractor.class */
public class OdtExtractor extends AbstractDocumentExtractorDescriptor<Processor, DocumentExtractorSettings> {

    /* loaded from: input_file:io/annot8/components/documents/processors/OdtExtractor$OdtTable.class */
    public static class OdtTable implements Table {
        private final List<Row> rows;
        private final List<String> columnNames;

        /* JADX WARN: Can't fix incorrect switch cases order, some code will duplicate */
        /* JADX WARN: Code restructure failed: missing block: B:43:0x0170, code lost:
        
            switch(r18) {
                case 0: goto L43;
                case 1: goto L44;
                case 2: goto L55;
                case 3: goto L56;
                case 4: goto L57;
                case 5: goto L58;
                case 6: goto L59;
                default: goto L60;
            };
         */
        /* JADX WARN: Code restructure failed: missing block: B:44:0x019c, code lost:
        
            r0.add(r0.getBooleanValue());
         */
        /* JADX WARN: Code restructure failed: missing block: B:46:0x0278, code lost:
        
            r15 = r15 + 1;
         */
        /* JADX WARN: Code restructure failed: missing block: B:47:0x01ac, code lost:
        
            r19 = "";
         */
        /* JADX WARN: Code restructure failed: missing block: B:48:0x01b5, code lost:
        
            if (r0.getCurrencyCode() == null) goto L49;
         */
        /* JADX WARN: Code restructure failed: missing block: B:50:0x01c0, code lost:
        
            if (r0.getCurrencyCode().isBlank() != false) goto L49;
         */
        /* JADX WARN: Code restructure failed: missing block: B:51:0x01c3, code lost:
        
            r19 = r0.getCurrencyCode() + " ";
         */
        /* JADX WARN: Code restructure failed: missing block: B:52:0x01ec, code lost:
        
            r0.add(r19 + r0.getCurrencyValue());
         */
        /* JADX WARN: Code restructure failed: missing block: B:55:0x01d7, code lost:
        
            if (r0.getCurrencySymbol() == null) goto L54;
         */
        /* JADX WARN: Code restructure failed: missing block: B:57:0x01e2, code lost:
        
            if (r0.getCurrencySymbol().isBlank() != false) goto L54;
         */
        /* JADX WARN: Code restructure failed: missing block: B:58:0x01e5, code lost:
        
            r19 = r0.getCurrencySymbol();
         */
        /* JADX WARN: Code restructure failed: missing block: B:59:0x0203, code lost:
        
            r0.add(r0.getDateValue().toInstant().atZone(java.time.ZoneId.systemDefault()).toLocalDate());
         */
        /* JADX WARN: Code restructure failed: missing block: B:61:0x021f, code lost:
        
            r0.add(r0.getDoubleValue());
         */
        /* JADX WARN: Code restructure failed: missing block: B:63:0x022f, code lost:
        
            r0.add(r0.getPercentageValue());
         */
        /* JADX WARN: Code restructure failed: missing block: B:65:0x023f, code lost:
        
            r0.add(r0.getStringValue());
         */
        /* JADX WARN: Code restructure failed: missing block: B:67:0x024f, code lost:
        
            r0.add(r0.getDateValue().toInstant().atZone(java.time.ZoneId.systemDefault()).toLocalTime());
         */
        /* JADX WARN: Code restructure failed: missing block: B:69:0x026b, code lost:
        
            r0.add(r0.getDisplayText());
         */
        /* JADX WARN: Removed duplicated region for block: B:19:0x00a6  */
        /*
            Code decompiled incorrectly, please refer to instructions dump.
            To view partially-correct add '--show-bad-code' argument
        */
        public OdtTable(org.odftoolkit.odfdom.doc.table.OdfTable r8) {
            /*
                Method dump skipped, instructions count: 681
                To view this dump add '--comments-level debug' option
            */
            throw new UnsupportedOperationException("Method not decompiled: io.annot8.components.documents.processors.OdtExtractor.OdtTable.<init>(org.odftoolkit.odfdom.doc.table.OdfTable):void");
        }

        public int getColumnCount() {
            return this.columnNames.size();
        }

        public int getRowCount() {
            return this.rows.size();
        }

        public Optional<List<String>> getColumnNames() {
            return Optional.of(this.columnNames);
        }

        public Stream<Row> getRows() {
            return this.rows.stream();
        }
    }

    /* loaded from: input_file:io/annot8/components/documents/processors/OdtExtractor$Processor.class */
    public static class Processor extends AbstractDocumentExtractorProcessor<OdfTextDocument, DocumentExtractorSettings> {
        private final Map<String, OdfTextDocument> cache;

        public Processor(Context context, DocumentExtractorSettings documentExtractorSettings) {
            super(context, documentExtractorSettings);
            this.cache = new HashMap();
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public void reset() {
            this.cache.clear();
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public boolean isMetadataSupported() {
            return true;
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public boolean isTextSupported() {
            return true;
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public boolean isImagesSupported() {
            return true;
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public boolean isTablesSupported() {
            return true;
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public boolean acceptFile(FileContent fileContent) {
            try {
                this.cache.put(fileContent.getId(), OdfTextDocument.loadDocument((File) fileContent.getData()));
                return true;
            } catch (Exception e) {
                log().debug("FileContent {} not accepted due to: {}", fileContent.getId(), e.getMessage());
                return false;
            }
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public boolean acceptInputStream(InputStreamContent inputStreamContent) {
            try {
                this.cache.put(inputStreamContent.getId(), OdfTextDocument.loadDocument((InputStream) inputStreamContent.getData()));
                return true;
            } catch (Exception e) {
                log().debug("InputStreamContent {} not accepted due to: {}", inputStreamContent.getId(), e.getMessage());
                return false;
            }
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public OdfTextDocument extractDocument(FileContent fileContent) throws IOException {
            if (this.cache.containsKey(fileContent.getId())) {
                return this.cache.get(fileContent.getId());
            }
            try {
                return OdfTextDocument.loadDocument((File) fileContent.getData());
            } catch (Exception e) {
                throw new IOException("Unable to read ODT document", e);
            }
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public OdfTextDocument extractDocument(InputStreamContent inputStreamContent) throws IOException {
            if (this.cache.containsKey(inputStreamContent.getId())) {
                return this.cache.get(inputStreamContent.getId());
            }
            try {
                return OdfTextDocument.loadDocument((InputStream) inputStreamContent.getData());
            } catch (Exception e) {
                throw new IOException("Unable to read ODT document", e);
            }
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public Map<String, Object> extractMetadata(OdfTextDocument odfTextDocument) {
            HashMap hashMap = new HashMap();
            OdfOfficeMeta officeMetadata = odfTextDocument.getOfficeMetadata();
            hashMap.put(DocumentProperties.CREATION_DATE, toTemporal(officeMetadata.getCreationDate()));
            hashMap.put(DocumentProperties.CREATOR, officeMetadata.getCreator());
            hashMap.put(DocumentProperties.LAST_MODIFIED_DATE, toTemporal(officeMetadata.getDate()));
            hashMap.put("description", officeMetadata.getDescription());
            hashMap.put(DocumentProperties.REVISION, officeMetadata.getEditingCycles());
            hashMap.put(DocumentProperties.EDITING_DURATION, officeMetadata.getEditingDuration().getValue());
            hashMap.put(DocumentProperties.GENERATOR, officeMetadata.getGenerator());
            hashMap.put(DocumentProperties.INITIAL_CREATOR, officeMetadata.getInitialCreator());
            hashMap.put(DocumentProperties.KEYWORDS, officeMetadata.getKeywords());
            hashMap.put("language", officeMetadata.getLanguage());
            hashMap.put(DocumentProperties.LAST_PRINTED_DATE, officeMetadata.getPrintDate());
            hashMap.put(DocumentProperties.LAST_PRINTED_BY, officeMetadata.getPrintedBy());
            hashMap.put(DocumentProperties.SUBJECT, officeMetadata.getSubject());
            hashMap.put("title", officeMetadata.getTitle());
            if (officeMetadata.getTemplate() != null && officeMetadata.getTemplate().getMetaTemplateElement() != null) {
                hashMap.put(DocumentProperties.TEMPLATE, officeMetadata.getTemplate().getMetaTemplateElement().getXlinkHrefAttribute());
            }
            if (officeMetadata.getUserDefinedDataNames() != null) {
                for (String str : officeMetadata.getUserDefinedDataNames()) {
                    hashMap.put("custom." + str, officeMetadata.getUserDefinedDataValue(str));
                }
            }
            OdfMetaDocumentStatistic documentStatistic = officeMetadata.getDocumentStatistic();
            hashMap.put(DocumentProperties.CELL_COUNT, documentStatistic.getCellCount());
            hashMap.put(DocumentProperties.CHARACTER_COUNT, documentStatistic.getCharacterCount());
            hashMap.put(DocumentProperties.DRAW_COUNT, documentStatistic.getDrawCount());
            hashMap.put(DocumentProperties.FRAME_COUNT, documentStatistic.getFrameCount());
            hashMap.put(DocumentProperties.IMAGE_COUNT, documentStatistic.getImageCount());
            hashMap.put(DocumentProperties.NW_CHARACTER_COUNT, documentStatistic.getNonWhitespaceCharacterCount());
            hashMap.put(DocumentProperties.OBJECT_COUNT, documentStatistic.getObjectCount());
            hashMap.put(DocumentProperties.OLE_OBJECT_COUNT, documentStatistic.getOleObjectCount());
            hashMap.put(DocumentProperties.PAGE_COUNT, documentStatistic.getPageCount());
            hashMap.put(DocumentProperties.PARAGRAPH_COUNT, documentStatistic.getParagraphCount());
            hashMap.put(DocumentProperties.ROW_COUNT, documentStatistic.getRowCount());
            hashMap.put(DocumentProperties.SENTENCE_COUNT, documentStatistic.getSentenceCount());
            hashMap.put(DocumentProperties.SYLLABLE_COUNT, documentStatistic.getSyllableCount());
            hashMap.put(DocumentProperties.TABLE_COUNT, documentStatistic.getTableCount());
            hashMap.put(DocumentProperties.WORD_COUNT, documentStatistic.getWordCount());
            return hashMap;
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public Collection<ExtractionWithProperties<String>> extractText(OdfTextDocument odfTextDocument) {
            StringBuilder sb = new StringBuilder();
            try {
                recurseNodes(odfTextDocument.getStylesDom().getElementsByTagName("style:header"), sb);
                try {
                    recurseNodes(odfTextDocument.getContentRoot().getChildNodes(), sb);
                    try {
                        recurseNodes(odfTextDocument.getStylesDom().getElementsByTagName("style:footer"), sb);
                        return List.of(new ExtractionWithProperties(sb.toString()));
                    } catch (Exception e) {
                        throw new ProcessingException("Unable to get Styles DOM from ODT", e);
                    }
                } catch (Exception e2) {
                    throw new ProcessingException("Unable to get Content Root from ODT", e2);
                }
            } catch (Exception e3) {
                throw new ProcessingException("Unable to get Styles DOM from ODT", e3);
            }
        }

        private void recurseNodes(NodeList nodeList, StringBuilder sb) {
            for (int i = 0; i < nodeList.getLength(); i++) {
                Node item = nodeList.item(i);
                if ("text:s".equals(item.getNodeName())) {
                    sb.append(" ");
                }
                if ("text:list-item".equals(item.getNodeName())) {
                    int i2 = 0;
                    Node node = item;
                    while (true) {
                        Node parentNode = node.getParentNode();
                        node = parentNode;
                        if (parentNode == null) {
                            break;
                        } else if ("text:list".equals(node.getNodeName())) {
                            i2++;
                        }
                    }
                    sb.append("\t".repeat(i2));
                    sb.append("* ");
                }
                if (item.getNodeType() == 3) {
                    sb.append(item.getTextContent());
                } else if (item.hasChildNodes()) {
                    recurseNodes(item.getChildNodes(), sb);
                    String nodeName = item.getNodeName();
                    if ("text:p".equals(nodeName) || "text:h".equals(nodeName)) {
                        sb.append("\n");
                    }
                }
            }
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public Collection<ExtractionWithProperties<BufferedImage>> extractImages(OdfTextDocument odfTextDocument) {
            ArrayList arrayList = new ArrayList();
            int i = 0;
            try {
                NodeList elementsByTagName = odfTextDocument.getContentRoot().getElementsByTagName("draw:frame");
                for (int i2 = 0; i2 < elementsByTagName.getLength(); i2++) {
                    OdfDrawFrame item = elementsByTagName.item(i2);
                    String attribute = item.getAttribute("draw:name");
                    NodeList elementsByTagName2 = item.getElementsByTagName("draw:image");
                    for (int i3 = 0; i3 < elementsByTagName2.getLength(); i3++) {
                        i++;
                        String attribute2 = elementsByTagName2.item(i2).getAttribute("xlink:href");
                        try {
                            BufferedImage read = ImageIO.read(odfTextDocument.getPackage().getInputStream(attribute2));
                            if (read == null) {
                                log().warn("Null image {} extracted from document", Integer.valueOf(i));
                            } else {
                                HashMap hashMap = new HashMap();
                                try {
                                    hashMap.putAll(toMap(ImageMetadataReader.readMetadata(odfTextDocument.getPackage().getInputStream(attribute2))));
                                } catch (ImageProcessingException | IOException e) {
                                    log().warn("Unable to extract metadata from image {}", Integer.valueOf(i), e);
                                }
                                hashMap.put("name", attribute);
                                hashMap.put("index", Integer.valueOf(i));
                                hashMap.put("title", getValueOfFirstElement(item.getElementsByTagName("svg:title")));
                                hashMap.put("description", getValueOfFirstElement(item.getElementsByTagName("svg:desc")));
                                arrayList.add(new ExtractionWithProperties(read, hashMap));
                            }
                        } catch (IOException e2) {
                            log().warn("Unable to extract image {} from document", Integer.valueOf(i), e2);
                        }
                    }
                }
                return arrayList;
            } catch (Exception e3) {
                throw new ProcessingException("Unable to get Content Root from ODT", e3);
            }
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public Collection<ExtractionWithProperties<Table>> extractTables(OdfTextDocument odfTextDocument) throws ProcessingException {
            return (Collection) odfTextDocument.getTableList().stream().map(Processor::transformTable).collect(Collectors.toList());
        }

        private static ExtractionWithProperties<Table> transformTable(OdfTable odfTable) {
            HashMap hashMap = new HashMap();
            String tableName = odfTable.getTableName();
            if (tableName != null && !tableName.isBlank()) {
                hashMap.put("name", tableName);
            }
            return new ExtractionWithProperties<>(new OdtTable(odfTable), hashMap);
        }

        private String getValueOfFirstElement(NodeList nodeList) {
            if (nodeList.getLength() == 0) {
                return null;
            }
            return nodeList.item(0).getTextContent().strip();
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Processor createComponent(Context context, DocumentExtractorSettings documentExtractorSettings) {
        return new Processor(context, documentExtractorSettings);
    }
}
