package io.annot8.components.documents.processors;

import io.annot8.api.capabilities.Capabilities;
import io.annot8.api.components.annotations.ComponentDescription;
import io.annot8.api.components.annotations.ComponentName;
import io.annot8.api.components.annotations.ComponentTags;
import io.annot8.api.components.annotations.SettingsClass;
import io.annot8.api.components.responses.ProcessorResponse;
import io.annot8.api.context.Context;
import io.annot8.api.data.Content;
import io.annot8.api.data.Item;
import io.annot8.api.settings.Description;
import io.annot8.common.components.AbstractProcessor;
import io.annot8.common.components.AbstractProcessorDescriptor;
import io.annot8.common.components.capabilities.SimpleCapabilities;
import io.annot8.common.data.content.FileContent;
import io.annot8.common.data.content.InputStreamContent;
import io.annot8.common.data.content.TableContent;
import io.annot8.components.documents.data.WorksheetTable;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.SheetVisibility;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.javatuples.Pair;

@ComponentDescription("Extracts content from Excel (*.xls and *.xlsx) files into a table")
@ComponentTags({"documents", "excel", "xls", "xlsx", "extractor", "metadata"})
@ComponentName("Excel (XLS and XLSX) Extractor")
@SettingsClass(Settings.class)
/* loaded from: input_file:io/annot8/components/documents/processors/ExcelExtractor.class */
public class ExcelExtractor extends AbstractProcessorDescriptor<Processor, Settings> {

    /* loaded from: input_file:io/annot8/components/documents/processors/ExcelExtractor$Processor.class */
    public static class Processor extends AbstractProcessor {
        private final Settings settings;

        public Processor(Settings settings) {
            this.settings = settings;
        }

        public ProcessorResponse process(Item item) {
            Stream.concat(item.getContents(FileContent.class).map(this::mapToWorkbook), item.getContents(InputStreamContent.class).map(this::mapToWorkbook)).filter((v0) -> {
                return Objects.nonNull(v0);
            }).forEach(pair -> {
                processWorkbook(item, (Workbook) pair.getValue0(), (String) pair.getValue1());
                if (this.settings.isRemoveSourceContent()) {
                    item.removeContent((String) pair.getValue1());
                }
            });
            return ProcessorResponse.ok();
        }

        public Pair<Workbook, String> mapToWorkbook(FileContent fileContent) {
            try {
                return new Pair<>(WorkbookFactory.create((File) fileContent.getData()), fileContent.getId());
            } catch (IOException e) {
                log().warn("Unable to process file {}", ((File) fileContent.getData()).getAbsolutePath(), e);
                return null;
            }
        }

        public Pair<Workbook, String> mapToWorkbook(InputStreamContent inputStreamContent) {
            try {
                return new Pair<>(WorkbookFactory.create((InputStream) inputStreamContent.getData()), inputStreamContent.getId());
            } catch (IOException e) {
                log().warn("Unable to process InputStream {}", inputStreamContent.getId(), e);
                return null;
            }
        }

        private void processWorkbook(Item item, Workbook workbook, String str) {
            item.getProperties().set("version", workbook.getSpreadsheetVersion().name());
            int i = 0;
            while (i < workbook.getNumberOfSheets()) {
                if (this.settings.getSkipSheets().contains(workbook.getSheetName(i))) {
                    log().info("Skipping sheet {}", workbook.getSheetName(i));
                } else {
                    processSheet(item, workbook.getSheetAt(i), i, i == workbook.getActiveSheetIndex(), workbook.getSheetVisibility(i) == SheetVisibility.VISIBLE, str);
                }
                i++;
            }
        }

        private void processSheet(Item item, Sheet sheet, int i, boolean z, boolean z2, String str) {
            ((Content.Builder) ((Content.Builder) ((Content.Builder) ((Content.Builder) item.createContent(TableContent.class).withData(new WorksheetTable(sheet, this.settings.isFirstRowHeader(), this.settings.getSkipRows())).withDescription(sheet.getSheetName()).withProperty("page", Integer.valueOf(i))).withProperty("active", Boolean.valueOf(z))).withProperty("visible", Boolean.valueOf(z2))).withPropertyIfPresent("parent", Optional.ofNullable(str))).save();
        }
    }

    /* loaded from: input_file:io/annot8/components/documents/processors/ExcelExtractor$Settings.class */
    public static class Settings implements io.annot8.api.settings.Settings {
        private List<String> extensions = List.of("xls", "xlsx");
        private boolean removeSourceContent = true;
        private boolean firstRowHeader = true;
        private int skipRows = 0;
        private List<String> skipSheets = Collections.emptyList();

        public boolean validate() {
            return (this.extensions == null || this.skipSheets == null) ? false : true;
        }

        @Description("The list of file extensions on which this processor will act (case insensitive). If empty, then the processor will act on all files.")
        public List<String> getExtensions() {
            return this.extensions;
        }

        public void setExtensions(List<String> list) {
            this.extensions = (List) list.stream().map((v0) -> {
                return v0.toLowerCase();
            }).collect(Collectors.toList());
        }

        @Description(value = "Should the source Content be removed after successful processing?", defaultValue = "true")
        public boolean isRemoveSourceContent() {
            return this.removeSourceContent;
        }

        public void setRemoveSourceContent(boolean z) {
            this.removeSourceContent = z;
        }

        @Description(value = "Is the first row of the spreadsheet a header row, to be used for column names?", defaultValue = "true")
        public boolean isFirstRowHeader() {
            return this.firstRowHeader;
        }

        public void setFirstRowHeader(boolean z) {
            this.firstRowHeader = z;
        }

        @Description(value = "The number of rows to skip (prior to reading the header, if firstRowHeader is true)", defaultValue = "0")
        public int getSkipRows() {
            return this.skipRows;
        }

        public void setSkipRows(int i) {
            this.skipRows = i;
        }

        @Description("The name of any spreadsheets within a workbook which should be skipped")
        public List<String> getSkipSheets() {
            return this.skipSheets;
        }

        public void setSkipSheets(List<String> list) {
            this.skipSheets = list;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Processor createComponent(Context context, Settings settings) {
        return new Processor(settings);
    }

    public Capabilities capabilities() {
        SimpleCapabilities.Builder withCreatesContent = new SimpleCapabilities.Builder().withProcessesContent(FileContent.class).withProcessesContent(InputStreamContent.class).withCreatesContent(TableContent.class);
        if (((Settings) getSettings()).isRemoveSourceContent()) {
            withCreatesContent = withCreatesContent.withDeletesContent(FileContent.class);
        }
        return withCreatesContent.build();
    }
}
