package uk.nhs.ciao.docs.parser;

import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.io.IOException;
import java.io.InputStream;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import uk.nhs.ciao.docs.parser.extractor.PropertiesExtractor;
import uk.nhs.ciao.docs.parser.xml.SAXContentToDOMHandler;

/* loaded from: input_file:uk/nhs/ciao/docs/parser/TikaDocumentParser.class */
public class TikaDocumentParser implements DocumentParser {
    private final Parser parser;
    private final PropertiesExtractor<Document> propertiesExtractor;
    private final SAXContentToDOMHandler handler = createHandler();

    public TikaDocumentParser(Parser parser, PropertiesExtractor<Document> propertiesExtractor) throws ParserConfigurationException {
        this.parser = (Parser) Preconditions.checkNotNull(parser);
        this.propertiesExtractor = (PropertiesExtractor) Preconditions.checkNotNull(propertiesExtractor);
    }

    @Override // uk.nhs.ciao.docs.parser.DocumentParser
    public Map<String, Object> parseDocument(InputStream inputStream) throws UnsupportedDocumentTypeException, IOException {
        Document parseToDom = parseToDom(inputStream);
        Map<String, Object> extractProperties = this.propertiesExtractor.extractProperties(parseToDom);
        addTikaMetadataProperties(parseToDom, extractProperties);
        return extractProperties;
    }

    private Document parseToDom(InputStream inputStream) throws IOException {
        try {
            try {
                this.parser.parse(inputStream, this.handler, new Metadata(), new ParseContext());
                return this.handler.getDocument();
            } catch (SAXException e) {
                throw new IOException(e);
            } catch (TikaException e2) {
                throw new IOException((Throwable) e2);
            }
        } finally {
            this.handler.clear();
        }
    }

    private void addTikaMetadataProperties(Document document, Map<String, Object> map) {
        if (map == null) {
            return;
        }
        LinkedHashMap newLinkedHashMap = Maps.newLinkedHashMap();
        if (map.containsKey("metadata")) {
            newLinkedHashMap.putAll((Map) map.get("metadata"));
        }
        NodeList elementsByTagName = document.getElementsByTagName("meta");
        for (int i = 0; i < elementsByTagName.getLength(); i++) {
            Element element = (Element) elementsByTagName.item(i);
            String attribute = element.getAttribute("name");
            String attribute2 = element.getAttribute("content");
            if (!Strings.isNullOrEmpty(attribute) && !Strings.isNullOrEmpty(attribute2)) {
                Object obj = newLinkedHashMap.get(attribute);
                if (obj == null) {
                    newLinkedHashMap.put(attribute, attribute2);
                } else if (obj instanceof List) {
                    ((List) obj).add(attribute2);
                } else {
                    newLinkedHashMap.put(attribute, Lists.newArrayList(new Object[]{obj, attribute2}));
                }
            }
        }
        if (newLinkedHashMap.isEmpty()) {
            return;
        }
        map.put("metadata", newLinkedHashMap);
    }

    private static SAXContentToDOMHandler createHandler() throws ParserConfigurationException {
        return new SAXContentToDOMHandler(DocumentBuilderFactory.newInstance().newDocumentBuilder(), true);
    }
}
