package io.annot8.components.documents.processors;

import com.drew.metadata.Directory;
import com.drew.metadata.Metadata;
import com.drew.metadata.Tag;
import io.annot8.api.components.responses.ProcessorResponse;
import io.annot8.api.context.Context;
import io.annot8.api.data.Content;
import io.annot8.api.data.Item;
import io.annot8.api.exceptions.ProcessingException;
import io.annot8.api.properties.Properties;
import io.annot8.common.components.AbstractProcessor;
import io.annot8.common.data.content.FileContent;
import io.annot8.common.data.content.Image;
import io.annot8.common.data.content.InputStreamContent;
import io.annot8.common.data.content.Table;
import io.annot8.common.data.content.TableContent;
import io.annot8.common.data.content.Text;
import io.annot8.components.documents.data.ExtractionWithProperties;
import io.annot8.components.documents.processors.DocumentExtractorSettings;
import java.awt.image.BufferedImage;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.time.Instant;
import java.time.ZonedDateTime;
import java.time.temporal.TemporalAccessor;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;

/* loaded from: input_file:io/annot8/components/documents/processors/AbstractDocumentExtractorProcessor.class */
public abstract class AbstractDocumentExtractorProcessor<T, S extends DocumentExtractorSettings> extends AbstractProcessor {
    private final Context context;
    protected final S settings;
    protected static final String METADATA_SEPARATOR = "/";

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:io/annot8/components/documents/processors/AbstractDocumentExtractorProcessor$InMemoryProperties.class */
    public static class InMemoryProperties implements Properties {
        private Map<String, Object> map;

        public InMemoryProperties(Map<String, Object> map) {
            this.map = map;
            this.map.values().removeIf(Objects::isNull);
            this.map.values().removeIf(obj -> {
                if (obj instanceof String) {
                    return ((String) obj).isEmpty();
                }
                return false;
            });
        }

        public Map<String, Object> getAll() {
            return this.map;
        }
    }

    public AbstractDocumentExtractorProcessor(Context context, S s) {
        this.context = context;
        this.settings = s;
        if (!isMetadataSupported() && s.isExtractMetadata()) {
            log().warn("This extractor does not support extraction of metadata");
            this.settings.setExtractMetadata(false);
        }
        if (!isTextSupported() && s.isExtractText()) {
            log().warn("This extractor does not support extraction of text");
            this.settings.setExtractText(false);
        }
        if (!isImagesSupported() && s.isExtractImages()) {
            log().warn("This extractor does not support extraction of images");
            this.settings.setExtractImages(false);
        }
        if (isTablesSupported() || !s.isExtractTables()) {
            return;
        }
        log().warn("This extractor does not support extraction of tables");
        this.settings.setExtractTables(false);
    }

    public ProcessorResponse process(Item item) {
        reset();
        ArrayList arrayList = new ArrayList();
        item.getContents(FileContent.class).filter(this::acceptFile).forEach(fileContent -> {
            log().info("Extracting content from File Content {} ({})", fileContent.getId(), ((File) fileContent.getData()).getPath());
            try {
                T extractDocument = extractDocument(fileContent);
                arrayList.addAll(extract(item, fileContent.getId(), extractDocument));
                if (extractDocument instanceof Closeable) {
                    try {
                        ((Closeable) extractDocument).close();
                    } catch (IOException e) {
                    }
                }
                if (this.settings.isDiscardOriginal()) {
                    item.removeContent(fileContent);
                }
            } catch (Exception e2) {
                arrayList.add(e2);
            }
        });
        item.getContents(InputStreamContent.class).filter(this::acceptInputStream).forEach(inputStreamContent -> {
            log().info("Extracting content from InputStream Content {}", inputStreamContent.getId());
            try {
                T extractDocument = extractDocument(inputStreamContent);
                arrayList.addAll(extract(item, inputStreamContent.getId(), extractDocument));
                if (extractDocument instanceof Closeable) {
                    try {
                        ((Closeable) extractDocument).close();
                    } catch (IOException e) {
                    }
                }
                if (this.settings.isDiscardOriginal()) {
                    item.removeContent(inputStreamContent);
                }
            } catch (Exception e2) {
                arrayList.add(e2);
            }
        });
        return arrayList.isEmpty() ? ProcessorResponse.ok() : ProcessorResponse.processingError(arrayList);
    }

    private List<Exception> extract(Item item, String str, T t) {
        ArrayList arrayList = new ArrayList();
        if (this.settings.isExtractMetadata()) {
            try {
                Map<String, Object> extractMetadata = extractMetadata(t);
                extractMetadata.values().removeIf(Objects::isNull);
                extractMetadata.values().removeIf(obj -> {
                    if (obj instanceof String) {
                        return ((String) obj).isEmpty();
                    }
                    return false;
                });
                extractMetadata.forEach((str2, obj2) -> {
                    item.getProperties().set(str2, obj2);
                });
            } catch (Exception e) {
                arrayList.add(e);
            }
        }
        if (this.settings.isExtractText()) {
            try {
                extractText(t).stream().filter(extractionWithProperties -> {
                    return !((String) extractionWithProperties.getExtractedValue()).isEmpty();
                }).forEach(extractionWithProperties2 -> {
                    ((Content.Builder) ((Content.Builder) item.createContent(Text.class).withDescription("Text extracted from " + str).withData((String) extractionWithProperties2.getExtractedValue()).withProperties(new InMemoryProperties(extractionWithProperties2.getProperties()))).withProperty("parent", str)).save();
                });
            } catch (Exception e2) {
                arrayList.add(e2);
            }
        }
        if (this.settings.isExtractImages()) {
            try {
                for (ExtractionWithProperties<BufferedImage> extractionWithProperties3 : extractImages(t)) {
                    ((Content.Builder) ((Content.Builder) item.createContent(Image.class).withDescription("Image extracted from " + str).withData(extractionWithProperties3.getExtractedValue()).withProperties(new InMemoryProperties(extractionWithProperties3.getProperties()))).withProperty("parent", str)).save();
                }
            } catch (Exception e3) {
                arrayList.add(e3);
            }
        }
        if (this.settings.isExtractTables()) {
            try {
                for (ExtractionWithProperties<Table> extractionWithProperties4 : extractTables(t)) {
                    ((Content.Builder) ((Content.Builder) item.createContent(TableContent.class).withDescription("Table extracted from " + str).withData(extractionWithProperties4.getExtractedValue()).withProperties(new InMemoryProperties(extractionWithProperties4.getProperties()))).withProperty("parent", str)).save();
                }
            } catch (Exception e4) {
                arrayList.add(e4);
            }
        }
        return arrayList;
    }

    public void reset() {
    }

    public abstract boolean isMetadataSupported();

    public abstract boolean isTextSupported();

    public abstract boolean isImagesSupported();

    public abstract boolean isTablesSupported();

    public abstract boolean acceptFile(FileContent fileContent);

    public abstract boolean acceptInputStream(InputStreamContent inputStreamContent);

    public abstract T extractDocument(FileContent fileContent) throws IOException;

    public abstract T extractDocument(InputStreamContent inputStreamContent) throws IOException;

    public abstract Map<String, Object> extractMetadata(T t) throws ProcessingException;

    public abstract Collection<ExtractionWithProperties<String>> extractText(T t) throws ProcessingException;

    public abstract Collection<ExtractionWithProperties<BufferedImage>> extractImages(T t) throws ProcessingException;

    public abstract Collection<ExtractionWithProperties<Table>> extractTables(T t) throws ProcessingException;

    /* JADX INFO: Access modifiers changed from: protected */
    public static TemporalAccessor toTemporal(Date date) {
        if (date == null) {
            return null;
        }
        return date.toInstant();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static TemporalAccessor toTemporal(Calendar calendar) {
        if (calendar == null) {
            return null;
        }
        Instant instant = calendar.toInstant();
        return calendar.getTimeZone() == null ? instant : ZonedDateTime.ofInstant(instant, calendar.getTimeZone().toZoneId());
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static Map<String, Object> toMap(Metadata metadata) {
        HashMap hashMap = new HashMap();
        for (Directory directory : metadata.getDirectories()) {
            for (Tag tag : directory.getTags()) {
                String normaliseMetadataName = normaliseMetadataName(tag);
                hashMap.put(normaliseMetadataName, tag.getDescription());
                hashMap.put(normaliseMetadataName + "/raw", directory.getObject(tag.getTagType()));
            }
        }
        return hashMap;
    }

    protected static String normaliseMetadataName(Tag tag) {
        return camelCaseString(tag.getDirectoryName()) + "/" + camelCaseString(tag.getTagName());
    }

    private static String camelCaseString(String str) {
        StringBuilder sb = new StringBuilder();
        boolean z = false;
        for (char c : str.toLowerCase().toCharArray()) {
            if (c == ' ') {
                z = true;
            } else if (z) {
                sb.append(String.valueOf(c).toUpperCase());
                z = false;
            } else {
                sb.append(c);
            }
        }
        return sb.toString();
    }
}
