package org.corpus_tools.pepper.impl;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import org.corpus_tools.pepper.common.CorpusDesc;
import org.corpus_tools.pepper.common.FormatDesc;
import org.corpus_tools.pepper.common.PepperUtil;
import org.corpus_tools.pepper.exceptions.WorkflowException;
import org.corpus_tools.pepper.modules.PepperImporter;
import org.corpus_tools.pepper.modules.PepperModule;
import org.corpus_tools.pepper.modules.exceptions.PepperModuleException;
import org.corpus_tools.salt.SALT_TYPE;
import org.corpus_tools.salt.common.SCorpus;
import org.corpus_tools.salt.common.SCorpusGraph;
import org.corpus_tools.salt.graph.Identifier;
import org.custommonkey.xmlunit.XMLConstants;
import org.eclipse.emf.common.util.URI;
import org.xml.sax.ext.DefaultHandler2;

/* loaded from: input_file:org/corpus_tools/pepper/impl/PepperImporterImpl.class */
public abstract class PepperImporterImpl extends PepperModuleImpl implements PepperImporter {
    protected CorpusDesc corpusDesc;
    private Map<Identifier, URI> sElementId2ResourceTable;
    private Collection<String> sDocumentEndings;
    private Collection<String> sCorpusEndings;
    private Collection<String> importIgnoreList;
    private CorpusPathResolver corpusPathResolver;

    protected PepperImporterImpl() {
        super("MyImporter");
        this.sElementId2ResourceTable = null;
        this.sDocumentEndings = null;
        this.sCorpusEndings = null;
        this.importIgnoreList = null;
        this.corpusPathResolver = null;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public PepperImporterImpl(String str) {
        super(str);
        this.sElementId2ResourceTable = null;
        this.sDocumentEndings = null;
        this.sCorpusEndings = null;
        this.importIgnoreList = null;
        this.corpusPathResolver = null;
    }

    @Override // org.corpus_tools.pepper.modules.PepperImporter
    public List<FormatDesc> getSupportedFormats() {
        return getFingerprint().getSupportedFormats();
    }

    @Override // org.corpus_tools.pepper.modules.PepperImporter
    public FormatDesc addSupportedFormat(String str, String str2, URI uri) {
        return getFingerprint().addSupportedFormat(str, str2, uri);
    }

    @Override // org.corpus_tools.pepper.modules.PepperImporter
    public CorpusDesc getCorpusDesc() {
        if (this.corpusDesc == null) {
            this.corpusDesc = new CorpusDesc();
        }
        return this.corpusDesc;
    }

    @Override // org.corpus_tools.pepper.modules.PepperImporter
    public void setCorpusDesc(CorpusDesc corpusDesc) {
        this.corpusDesc = corpusDesc;
    }

    @Override // org.corpus_tools.pepper.modules.PepperImporter
    public synchronized Map<Identifier, URI> getIdentifier2ResourceTable() {
        if (this.sElementId2ResourceTable == null) {
            this.sElementId2ResourceTable = new Hashtable();
        }
        return this.sElementId2ResourceTable;
    }

    @Override // org.corpus_tools.pepper.modules.PepperImporter
    public void importCorpusStructure(SCorpusGraph sCorpusGraph) throws PepperModuleException {
        setCorpusGraph(sCorpusGraph);
        if (getCorpusGraph() == null) {
            throw new PepperModuleException(this, "Cannot start with importing corpus, because salt project isn't set.");
        }
        if (getCorpusDesc() == null) {
            throw new PepperModuleException(this, "Cannot start with importing corpus, because no corpus definition to import is given.");
        }
        if (getCorpusDesc().getCorpusPath() == null) {
            throw new PepperModuleException(this, "Cannot start with importing corpus, because the path of given corpus definition is null.");
        }
        if (!getCorpusDesc().getCorpusPath().isFile()) {
            throw new PepperModuleException(this, "Cannot start with importing corpus, because the given corpus path does not locate a file.");
        }
        if (getCorpusDesc().getCorpusPath().toFileString().endsWith(XMLConstants.XPATH_SEPARATOR) || getCorpusDesc().getCorpusPath().toFileString().endsWith("\\")) {
            getCorpusDesc().setCorpusPath(getCorpusDesc().getCorpusPath().trimSegments(1));
        }
        Boolean importCorpusStructureRec = importCorpusStructureRec(getCorpusDesc().getCorpusPath(), null);
        if (this.logger.isDebugEnabled() && getIdentifier2ResourceTable().size() > 0) {
            StringBuilder sb = new StringBuilder();
            sb.append(XMLConstants.XPATH_NODE_INDEX_START);
            sb.append(getName());
            sb.append(XMLConstants.XPATH_NODE_INDEX_END);
            sb.append(" import corpora and documents: \n");
            for (URI uri : getIdentifier2ResourceTable().values()) {
                sb.append("\t");
                sb.append(uri);
                sb.append("\n");
            }
            this.logger.debug(sb.toString());
        }
        if (getIdentifier2ResourceTable().isEmpty()) {
            this.logger.warn("[{}] No corpora and documents fount to import in '{}'. ", getName(), getCorpusDesc().getCorpusPath());
        }
        if (importCorpusStructureRec.booleanValue()) {
            return;
        }
        this.logger.warn("[{}] No documents fount to import in '{}'. ", getName(), getCorpusDesc().getCorpusPath());
    }

    protected Boolean importCorpusStructureRec(URI uri, SCorpus sCorpus) {
        SALT_TYPE typeOfResource;
        File[] listFiles;
        Boolean bool = false;
        if (getCorpusGraph().getName() == null || getCorpusGraph().getName().isEmpty()) {
            getCorpusGraph().setName(uri.lastSegment());
        }
        if (uri.lastSegment() != null && !getIgnoreEndings().contains(uri.lastSegment()) && (typeOfResource = setTypeOfResource(uri)) != null) {
            File file = new File(uri.toFileString());
            if (SALT_TYPE.SCORPUS.equals(typeOfResource)) {
                SCorpus createCorpus = getCorpusGraph().createCorpus(sCorpus, uri.lastSegment());
                getIdentifier2ResourceTable().put(createCorpus.getIdentifier(), uri);
                if (file.isDirectory() && (listFiles = file.listFiles()) != null) {
                    for (File file2 : listFiles) {
                        try {
                            bool = Boolean.valueOf(bool.booleanValue() || importCorpusStructureRec(URI.createFileURI(file2.getCanonicalPath()), createCorpus).booleanValue());
                        } catch (IOException e) {
                            throw new PepperModuleException("Cannot import corpus structure, because cannot create a URI out of file '" + file2 + "'. ", e);
                        }
                    }
                }
            } else if (SALT_TYPE.SDOCUMENT.equals(typeOfResource)) {
                bool = true;
                if (sCorpus == null) {
                    sCorpus = getCorpusGraph().createCorpus((SCorpus) null, uri.lastSegment().replace("." + uri.fileExtension(), ""));
                    getIdentifier2ResourceTable().put(sCorpus.getIdentifier(), uri);
                }
                getIdentifier2ResourceTable().put((new File(uri.toFileString()).isDirectory() ? getCorpusGraph().createDocument(sCorpus, uri.lastSegment()) : getCorpusGraph().createDocument(sCorpus, uri.lastSegment().replace("." + uri.fileExtension(), ""))).getIdentifier(), uri);
            }
        }
        return bool;
    }

    @Override // org.corpus_tools.pepper.impl.PepperModuleImpl, org.corpus_tools.pepper.modules.PepperModule
    public void start() throws PepperModuleException {
        if (getCorpusDesc().getCorpusPath() == null) {
            throw new WorkflowException(XMLConstants.XPATH_NODE_INDEX_START + getName() + "] Cannot import corpus-structure, because no corpus path was given. ");
        }
        File file = new File(getCorpusDesc().getCorpusPath().toFileString());
        if (!file.exists()) {
            throw new WorkflowException(XMLConstants.XPATH_NODE_INDEX_START + getName() + "] Cannot import corpus-structure, because the given corpus path '" + file.getAbsolutePath() + "' does not exist. ");
        }
        super.start();
    }

    @Override // org.corpus_tools.pepper.modules.PepperImporter
    public synchronized Collection<String> getDocumentEndings() {
        if (this.sDocumentEndings == null) {
            this.sDocumentEndings = new HashSet();
        }
        return this.sDocumentEndings;
    }

    @Override // org.corpus_tools.pepper.modules.PepperImporter
    public synchronized Collection<String> getCorpusEndings() {
        if (this.sCorpusEndings == null) {
            this.sCorpusEndings = new HashSet();
            this.sCorpusEndings.add(PepperModule.ENDING_FOLDER);
        }
        return this.sCorpusEndings;
    }

    @Override // org.corpus_tools.pepper.modules.PepperImporter
    public SALT_TYPE setTypeOfResource(URI uri) {
        if (!new File(uri.toFileString()).isDirectory()) {
            String fileExtension = uri.fileExtension();
            if (new File(uri.toFileString()).isHidden()) {
                return null;
            }
            if (!getDocumentEndings().contains(PepperModule.ENDING_ALL_FILES) && !getDocumentEndings().contains(fileExtension)) {
                if (getCorpusEndings().contains(fileExtension)) {
                    return SALT_TYPE.SCORPUS;
                }
                return null;
            }
            return SALT_TYPE.SDOCUMENT;
        }
        if (!isLeafFolder(new File(uri.toFileString()))) {
            if (getCorpusEndings().contains(PepperModule.ENDING_FOLDER)) {
                return SALT_TYPE.SCORPUS;
            }
            return null;
        }
        if (getDocumentEndings().contains(PepperModule.ENDING_LEAF_FOLDER)) {
            return SALT_TYPE.SDOCUMENT;
        }
        if (getCorpusEndings().contains(PepperModule.ENDING_FOLDER) || getCorpusEndings().contains(PepperModule.ENDING_LEAF_FOLDER)) {
            return SALT_TYPE.SCORPUS;
        }
        return null;
    }

    private boolean isLeafFolder(File file) {
        if (!file.isDirectory()) {
            return false;
        }
        File[] listFiles = file.listFiles();
        if (listFiles == null) {
            return true;
        }
        for (File file2 : listFiles) {
            if (file2.isDirectory() && !getIgnoreEndings().contains(file2.getName())) {
                return false;
            }
        }
        return true;
    }

    @Override // org.corpus_tools.pepper.modules.PepperImporter
    public synchronized Collection<String> getIgnoreEndings() {
        if (this.importIgnoreList == null) {
            this.importIgnoreList = new HashSet();
            this.importIgnoreList.add(".svn");
        }
        return this.importIgnoreList;
    }

    protected void readXMLResource(DefaultHandler2 defaultHandler2, URI uri) {
        PepperUtil.readXMLResource(defaultHandler2, uri);
    }

    @Override // org.corpus_tools.pepper.modules.PepperImporter
    public Double isImportable(URI uri) {
        return null;
    }

    public void setCorpusPathResolver(CorpusPathResolver corpusPathResolver) {
        this.corpusPathResolver = corpusPathResolver;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Collection<String> sampleFileContent(URI uri, String... strArr) {
        CorpusPathResolver corpusPathResolver = this.corpusPathResolver;
        if (corpusPathResolver == null) {
            try {
                corpusPathResolver = new CorpusPathResolver(uri);
            } catch (FileNotFoundException e) {
                throw new PepperModuleException("Cannot sample files for isImportable. ", e);
            }
        }
        return corpusPathResolver.sampleFileContent(strArr);
    }
}
