package de.julielab.jcore.reader.ct;

import de.julielab.jcore.reader.ct.ctmodel.ClinicalTrial;
import de.julielab.jcore.types.Keyword;
import de.julielab.jcore.types.MeshHeading;
import de.julielab.jcore.types.ct.ArmGroupDescription;
import de.julielab.jcore.types.ct.BriefTitle;
import de.julielab.jcore.types.ct.Condition;
import de.julielab.jcore.types.ct.Description;
import de.julielab.jcore.types.ct.Exclusion;
import de.julielab.jcore.types.ct.Header;
import de.julielab.jcore.types.ct.Inclusion;
import de.julielab.jcore.types.ct.InterventionName;
import de.julielab.jcore.types.ct.InterventionType;
import de.julielab.jcore.types.ct.OfficialTitle;
import de.julielab.jcore.types.ct.OutcomeDescription;
import de.julielab.jcore.types.ct.OutcomeMeasure;
import de.julielab.jcore.types.ct.StudyDesignInfo;
import de.julielab.jcore.types.ct.Summary;
import de.julielab.jcore.types.pubmed.ManualDescriptor;
import de.julielab.jcore.utility.JCoReTools;
import java.io.File;
import java.io.IOException;
import java.nio.file.FileVisitOption;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.stream.Stream;
import org.apache.uima.UimaContext;
import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.StringArray;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ResourceMetaData(name = "JCoRe Clinical Trials Reader", description = "This component reads the XML format provided by ClinicalTrials.gov. To this end, the JCoRe type system contains a number of types specifically created for this kind of document. Note that the CAS text created by this reader might be confusing without checking the corresponding annotations. This is due to the fact that the CT XML contains multiple enumerations which are not very well reflected in plain text. Also, enumerations with subitems, such as the outcomes, are not displayed in the expected groups of items. Instead, each item type is displayed separately. This could be changed, if necessary. Since all items are correctly annotated by their category, this might not even be an issue, depending on the downstream tasks.")
/* loaded from: input_file:de/julielab/jcore/reader/ct/ClinicalTrialsReader.class */
public class ClinicalTrialsReader extends JCasCollectionReader_ImplBase {
    public static final String PARAM_INPUT_DIR = "InputDirectory";
    public static final String PARAM_FILES = "FileNames";
    private static final Logger log = LoggerFactory.getLogger(ClinicalTrialsReader.class);

    @ConfigurationParameter(name = PARAM_INPUT_DIR, description = "The root directory that contains the clinical trials. The reader supports a subdirectory structure where the actual XML documents are located on deeper levels.")
    private File inputDirectory;

    @ConfigurationParameter(name = PARAM_FILES, mandatory = false, description = "For debugging: Restrict the documents read to the given document file names.")
    private String[] fileNames;
    private Iterator<File> files;

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this.inputDirectory = new File((String) uimaContext.getConfigParameterValue(PARAM_INPUT_DIR));
        this.fileNames = (String[]) uimaContext.getConfigParameterValue(PARAM_FILES);
        try {
            this.files = readFiles(this.inputDirectory);
            log.info("{}: {}", PARAM_INPUT_DIR, this.inputDirectory);
            log.info("{}: {}", PARAM_FILES, this.fileNames);
        } catch (IOException e) {
            log.error("Could not read clinical trials files", e);
            throw new ResourceInitializationException(e);
        }
    }

    private Iterator<File> readFiles(File file) throws IOException {
        ArrayList arrayList = new ArrayList(250000);
        Stream<Path> filter = Files.walk(file.toPath(), new FileVisitOption[0]).filter(path -> {
            return Files.isRegularFile(path, new LinkOption[0]);
        });
        if (this.fileNames != null && this.fileNames.length > 0) {
            HashSet hashSet = new HashSet(Arrays.asList(this.fileNames));
            filter = filter.filter(path2 -> {
                return hashSet.contains(path2.toFile().getName());
            });
        }
        filter.forEach(path3 -> {
            arrayList.add(path3.toFile());
        });
        return arrayList.iterator();
    }

    public void getNext(JCas jCas) {
        if (this.files.hasNext()) {
            File next = this.files.next();
            try {
                StringBuilder sb = new StringBuilder();
                ClinicalTrial fromXml = ClinicalTrial.fromXml(next.getAbsolutePath());
                addHeader(jCas, fromXml);
                addManualDescriptor(jCas, fromXml);
                addAnnotatedText(sb, fromXml.brief_title, new BriefTitle(jCas));
                addAnnotatedText(sb, fromXml.official_title, new OfficialTitle(jCas));
                addAnnotatedText(sb, fromXml.summary, new Summary(jCas));
                addAnnotatedText(sb, fromXml.description, new Description(jCas));
                for (int i = 0; i < fromXml.outcomeMeasures.size(); i++) {
                    addAnnotatedText(sb, fromXml.outcomeMeasures.get(i), new OutcomeMeasure(jCas));
                    if (i < fromXml.outcomeDescriptions.size()) {
                        addAnnotatedText(sb, fromXml.outcomeDescriptions.get(i), new OutcomeDescription(jCas));
                    }
                }
                Iterator<String> it = fromXml.conditions.iterator();
                while (it.hasNext()) {
                    addAnnotatedText(sb, it.next(), new Condition(jCas));
                }
                for (int i2 = 0; i2 < fromXml.interventionTypes.size(); i2++) {
                    String str = fromXml.interventionTypes.get(i2);
                    String str2 = fromXml.interventionNames.get(i2);
                    addAnnotatedText(sb, str, new InterventionType(jCas));
                    addAnnotatedText(sb, str2, new InterventionName(jCas));
                }
                Iterator<String> it2 = fromXml.armGroupDescriptions.iterator();
                while (it2.hasNext()) {
                    addAnnotatedText(sb, it2.next(), new ArmGroupDescription(jCas));
                }
                addAnnotatedText(sb, fromXml.inclusion, new Inclusion(jCas));
                addAnnotatedText(sb, fromXml.exclusion, new Exclusion(jCas));
                jCas.setDocumentText(sb.toString());
            } catch (Throwable th) {
                log.error("Exception occurred when reading file {}", next, th);
            }
        }
    }

    private void addManualDescriptor(JCas jCas, ClinicalTrial clinicalTrial) {
        ManualDescriptor manualDescriptor = null;
        if (!clinicalTrial.meshTags.isEmpty() || !clinicalTrial.keywords.isEmpty()) {
            manualDescriptor = new ManualDescriptor(jCas);
        }
        Iterator<String> it = clinicalTrial.meshTags.iterator();
        while (it.hasNext()) {
            String next = it.next();
            MeshHeading meshHeading = new MeshHeading(jCas);
            meshHeading.setDescriptorName(next);
            manualDescriptor.setMeSHList(JCoReTools.addToFSArray(manualDescriptor.getMeSHList(), meshHeading, 1));
        }
        Iterator<String> it2 = clinicalTrial.keywords.iterator();
        while (it2.hasNext()) {
            String next2 = it2.next();
            Keyword keyword = new Keyword(jCas);
            keyword.setName(next2);
            manualDescriptor.setKeywordList(JCoReTools.addToFSArray(manualDescriptor.getKeywordList(), keyword, 1));
        }
        if (manualDescriptor != null) {
            manualDescriptor.addToIndexes();
        }
    }

    private Annotation addAnnotatedText(StringBuilder sb, String str, Annotation annotation) {
        annotation.setBegin(sb.length());
        sb.append(str);
        annotation.setEnd(sb.length());
        annotation.addToIndexes();
        sb.append(System.getProperty("line.separator"));
        return annotation;
    }

    private void addHeader(JCas jCas, ClinicalTrial clinicalTrial) {
        Header header = new Header(jCas);
        header.setDocId(clinicalTrial.id);
        header.setStudyType(clinicalTrial.studyType);
        header.setMinimumAge(clinicalTrial.minAge);
        header.setMaximumAge(clinicalTrial.maxAge);
        StudyDesignInfo studyDesignInfo = new StudyDesignInfo(jCas);
        studyDesignInfo.setInterventionModel(clinicalTrial.interventionModel);
        studyDesignInfo.setPrimaryPurpose(clinicalTrial.primaryPurpose);
        header.setStudyDesignInfo(studyDesignInfo);
        if (clinicalTrial.sex != null) {
            StringArray stringArray = new StringArray(jCas, 0);
            Iterator<String> it = clinicalTrial.sex.iterator();
            while (it.hasNext()) {
                stringArray = JCoReTools.addToStringArray(stringArray, it.next());
            }
            header.setGender(stringArray);
        }
        header.addToIndexes();
    }

    public void close() {
    }

    public Progress[] getProgress() {
        return new Progress[]{new ProgressImpl(0, 0, "TODO")};
    }

    public boolean hasNext() {
        return this.files.hasNext();
    }
}
