package io.annot8.components.documents.processors;

import com.drew.imaging.ImageMetadataReader;
import com.drew.imaging.ImageProcessingException;
import io.annot8.api.components.annotations.ComponentDescription;
import io.annot8.api.components.annotations.ComponentName;
import io.annot8.api.components.annotations.ComponentTags;
import io.annot8.api.components.annotations.SettingsClass;
import io.annot8.api.context.Context;
import io.annot8.api.exceptions.ProcessingException;
import io.annot8.common.data.content.DefaultRow;
import io.annot8.common.data.content.FileContent;
import io.annot8.common.data.content.InputStreamContent;
import io.annot8.common.data.content.Row;
import io.annot8.common.data.content.Table;
import io.annot8.common.utils.java.ConversionUtils;
import io.annot8.components.documents.data.ExtractionWithProperties;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.imageio.ImageIO;
import org.apache.poi.ooxml.POIXMLProperties;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFPictureData;
import org.apache.poi.xwpf.usermodel.XWPFTable;

@ComponentDescription("Extracts image and text from Word Document (*.docx) files")
@ComponentTags({"documents", "word", "docx", "extractor", "text", "images", "metadata", "tables"})
@ComponentName("Word Document (DOCX) Extractor")
@SettingsClass(DocumentExtractorSettings.class)
/* loaded from: input_file:io/annot8/components/documents/processors/DocxExtractor.class */
public class DocxExtractor extends AbstractDocumentExtractorDescriptor<Processor, DocumentExtractorSettings> {

    /* loaded from: input_file:io/annot8/components/documents/processors/DocxExtractor$DocxTable.class */
    public static class DocxTable implements Table {
        private final List<Row> rows;
        private final List<String> columnNames;

        public DocxTable(XWPFTable xWPFTable) {
            ArrayList arrayList = new ArrayList(xWPFTable.getNumberOfRows() - 1);
            List<String> emptyList = Collections.emptyList();
            for (int i = 0; i < xWPFTable.getNumberOfRows(); i++) {
                List list = (List) xWPFTable.getRow(i).getTableCells().stream().map((v0) -> {
                    return v0.getText();
                }).map(ConversionUtils::parseString).collect(Collectors.toList());
                if (i == 0) {
                    emptyList = (List) list.stream().map((v0) -> {
                        return v0.toString();
                    }).collect(Collectors.toList());
                } else {
                    arrayList.add(new DefaultRow(i - 1, emptyList, list));
                }
            }
            this.rows = Collections.unmodifiableList(arrayList);
            this.columnNames = emptyList;
        }

        public int getColumnCount() {
            return this.columnNames.size();
        }

        public int getRowCount() {
            return this.rows.size();
        }

        public Optional<List<String>> getColumnNames() {
            return Optional.of(this.columnNames);
        }

        public Stream<Row> getRows() {
            return this.rows.stream();
        }
    }

    /* loaded from: input_file:io/annot8/components/documents/processors/DocxExtractor$Processor.class */
    public static class Processor extends AbstractDocumentExtractorProcessor<XWPFDocument, DocumentExtractorSettings> {
        private final Map<String, XWPFDocument> cache;

        public Processor(Context context, DocumentExtractorSettings documentExtractorSettings) {
            super(context, documentExtractorSettings);
            this.cache = new HashMap();
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public void reset() {
            this.cache.clear();
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public boolean isMetadataSupported() {
            return true;
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public boolean isTextSupported() {
            return true;
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public boolean isImagesSupported() {
            return true;
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public boolean isTablesSupported() {
            return true;
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public boolean acceptFile(FileContent fileContent) {
            try {
                this.cache.put(fileContent.getId(), new XWPFDocument(new FileInputStream((File) fileContent.getData())));
                return true;
            } catch (Exception e) {
                log().debug("FileContent {} not accepted due to: {}", fileContent.getId(), e.getMessage());
                return false;
            }
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public boolean acceptInputStream(InputStreamContent inputStreamContent) {
            try {
                this.cache.put(inputStreamContent.getId(), new XWPFDocument((InputStream) inputStreamContent.getData()));
                return true;
            } catch (Exception e) {
                log().debug("InputStreamContent {} not accepted due to: {}", inputStreamContent.getId(), e.getMessage());
                return false;
            }
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public XWPFDocument extractDocument(FileContent fileContent) throws IOException {
            return this.cache.containsKey(fileContent.getId()) ? this.cache.get(fileContent.getId()) : new XWPFDocument(new FileInputStream((File) fileContent.getData()));
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public XWPFDocument extractDocument(InputStreamContent inputStreamContent) throws IOException {
            return this.cache.containsKey(inputStreamContent.getId()) ? this.cache.get(inputStreamContent.getId()) : new XWPFDocument((InputStream) inputStreamContent.getData());
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public Map<String, Object> extractMetadata(XWPFDocument xWPFDocument) {
            HashMap hashMap = new HashMap();
            POIXMLProperties.CoreProperties coreProperties = xWPFDocument.getProperties().getCoreProperties();
            hashMap.put(DocumentProperties.CATEGORY, coreProperties.getCategory());
            hashMap.put(DocumentProperties.CONTENT_STATUS, coreProperties.getContentStatus());
            hashMap.put(DocumentProperties.CONTENT_TYPE, coreProperties.getContentType());
            hashMap.put(DocumentProperties.CREATION_DATE, toTemporal(coreProperties.getCreated()));
            hashMap.put(DocumentProperties.CREATOR, coreProperties.getCreator());
            hashMap.put("description", coreProperties.getDescription());
            hashMap.put("identifier", coreProperties.getIdentifier());
            hashMap.put(DocumentProperties.KEYWORDS, coreProperties.getKeywords());
            hashMap.put(DocumentProperties.LAST_MODIFIED_BY, coreProperties.getLastModifiedByUser());
            hashMap.put(DocumentProperties.LAST_PRINTED_DATE, toTemporal(coreProperties.getLastPrinted()));
            hashMap.put(DocumentProperties.LAST_MODIFIED_DATE, toTemporal(coreProperties.getModified()));
            hashMap.put(DocumentProperties.REVISION, coreProperties.getRevision());
            hashMap.put(DocumentProperties.SUBJECT, coreProperties.getSubject());
            hashMap.put("title", coreProperties.getTitle());
            POIXMLProperties.ExtendedProperties extendedProperties = xWPFDocument.getProperties().getExtendedProperties();
            hashMap.put(DocumentProperties.APPLICATION, extendedProperties.getApplication());
            hashMap.put(DocumentProperties.APPLICATION_VERSION, extendedProperties.getAppVersion());
            hashMap.put(DocumentProperties.NW_CHARACTER_COUNT, Integer.valueOf(extendedProperties.getCharacters()));
            hashMap.put(DocumentProperties.CHARACTER_COUNT, Integer.valueOf(extendedProperties.getCharactersWithSpaces()));
            hashMap.put(DocumentProperties.COMPANY, extendedProperties.getCompany());
            hashMap.put(DocumentProperties.HIDDEN_SLIDE_COUNT, Integer.valueOf(extendedProperties.getHiddenSlides()));
            hashMap.put(DocumentProperties.HYPERLINK_BASE, extendedProperties.getHyperlinkBase());
            hashMap.put(DocumentProperties.LINE_COUNT, Integer.valueOf(extendedProperties.getLines()));
            hashMap.put(DocumentProperties.MANAGER, extendedProperties.getManager());
            hashMap.put(DocumentProperties.MULTIMEDIA_CLIP_COUNT, Integer.valueOf(extendedProperties.getMMClips()));
            hashMap.put(DocumentProperties.NOTE_COUNT, Integer.valueOf(extendedProperties.getNotes()));
            hashMap.put(DocumentProperties.PAGE_COUNT, Integer.valueOf(extendedProperties.getPages()));
            hashMap.put(DocumentProperties.PARAGRAPH_COUNT, Integer.valueOf(extendedProperties.getParagraphs()));
            hashMap.put(DocumentProperties.PRESENTATION_FORMAT, extendedProperties.getPresentationFormat());
            hashMap.put(DocumentProperties.SLIDE_COUNT, Integer.valueOf(extendedProperties.getSlides()));
            hashMap.put(DocumentProperties.TEMPLATE, extendedProperties.getTemplate());
            hashMap.put(DocumentProperties.EDITING_DURATION, Integer.valueOf(extendedProperties.getTotalTime()));
            hashMap.put(DocumentProperties.WORD_COUNT, Integer.valueOf(extendedProperties.getWords()));
            hashMap.values().removeIf(obj -> {
                Integer num = -1;
                return num.equals(obj);
            });
            return hashMap;
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public Collection<ExtractionWithProperties<String>> extractText(XWPFDocument xWPFDocument) {
            return List.of(new ExtractionWithProperties(new XWPFWordExtractor(xWPFDocument).getText()));
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public Collection<ExtractionWithProperties<BufferedImage>> extractImages(XWPFDocument xWPFDocument) {
            ArrayList arrayList = new ArrayList();
            int i = 0;
            for (XWPFPictureData xWPFPictureData : xWPFDocument.getAllPictures()) {
                i++;
                try {
                    BufferedImage read = ImageIO.read(new ByteArrayInputStream(xWPFPictureData.getData()));
                    if (read == null) {
                        log().warn("Null image {} extracted from document", Integer.valueOf(i));
                    } else {
                        HashMap hashMap = new HashMap();
                        try {
                            hashMap.putAll(toMap(ImageMetadataReader.readMetadata(new ByteArrayInputStream(xWPFPictureData.getData()))));
                        } catch (ImageProcessingException | IOException e) {
                            log().warn("Unable to extract metadata from image {}", Integer.valueOf(i), e);
                        }
                        hashMap.put("index", Integer.valueOf(i));
                        hashMap.put("name", xWPFPictureData.getFileName());
                        arrayList.add(new ExtractionWithProperties(read, hashMap));
                    }
                } catch (IOException e2) {
                    log().warn("Unable to extract image {} from document", Integer.valueOf(i), e2);
                }
            }
            return arrayList;
        }

        @Override // io.annot8.components.documents.processors.AbstractDocumentExtractorProcessor
        public Collection<ExtractionWithProperties<Table>> extractTables(XWPFDocument xWPFDocument) throws ProcessingException {
            return (Collection) xWPFDocument.getTables().stream().map(Processor::transformTable).collect(Collectors.toList());
        }

        private static ExtractionWithProperties<Table> transformTable(XWPFTable xWPFTable) {
            return new ExtractionWithProperties<>(new DocxTable(xWPFTable));
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Processor createComponent(Context context, DocumentExtractorSettings documentExtractorSettings) {
        return new Processor(context, documentExtractorSettings);
    }
}
