package de.julielab.geneexpbase.data;

import de.julielab.geneexpbase.genemodel.GeneMention;
import java.util.EnumSet;
import java.util.List;

/* loaded from: input_file:de/julielab/geneexpbase/data/DocumentSourceFileRegistry.class */
public class DocumentSourceFileRegistry {
    public static DocumentSourceFiles decaSpeciesCorpus() {
        DocumentSourceFiles documentSourceFiles = new DocumentSourceFiles();
        documentSourceFiles.setName("decaSpeciesCorpus");
        documentSourceFiles.setHasGeneIds(false);
        documentSourceFiles.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.GOLD));
        documentSourceFiles.setFilterSpecies(true);
        documentSourceFiles.setAddReferenceSpecies(true);
        documentSourceFiles.setSpeciesCorpus(true);
        documentSourceFiles.setCompletelyAnnotated(true);
        documentSourceFiles.setMeshPath("../built-resources/corpora/species_corpus_0.2/mesh.tsv.gz");
        documentSourceFiles.setSubstancesPath("../built-resources/corpora/species_corpus_0.2/substances.tsv.gz");
        documentSourceFiles.setBasePath("../built-resources/corpora/species_corpus_0.2");
        documentSourceFiles.setPredictedGenesPath("gold.taxlist");
        documentSourceFiles.setSentencesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setChunksPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setPosPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setSpeciesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setAcronymsPath("annotated/acronyms.tsv.gz");
        documentSourceFiles.setDocTextPath("annotated/text");
        documentSourceFiles.setGoldGeneList("gold.taxlist");
        return documentSourceFiles;
    }

    public static DocumentSourceFiles speciesCorpusCustomGnormPlusBC2TrainGoldGenes() {
        DocumentSourceFiles documentSourceFiles = new DocumentSourceFiles();
        documentSourceFiles.setName("speciesCorpusCustomGnormPlusBC2Train");
        documentSourceFiles.setHasGeneIds(false);
        documentSourceFiles.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.GOLD));
        documentSourceFiles.setAllowedGeneTypes(List.of("Gene"));
        documentSourceFiles.setFilterSpecies(true);
        documentSourceFiles.setAddReferenceSpecies(true);
        documentSourceFiles.setSpeciesCorpus(true);
        documentSourceFiles.setCompletelyAnnotated(true);
        documentSourceFiles.setMeshPath("../built-resources/corpora/bc2_data/train/mesh.tsv.gz");
        documentSourceFiles.setSubstancesPath("../built-resources/corpora/bc2_data/train/substances.tsv.gz");
        documentSourceFiles.setBasePath("../built-resources/corpora/species_corpus_custom_data");
        documentSourceFiles.setPredictedGenesPath("gold.taxlist");
        documentSourceFiles.setSentencesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setChunksPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setPosPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setSpeciesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setAcronymsPath("annotated/acronyms.tsv.gz");
        documentSourceFiles.setDocTextPath("annotated/text");
        documentSourceFiles.setGoldGeneList("gold.taxlist");
        return documentSourceFiles;
    }

    public static DocumentSourceFiles speciesCorpusCustomGnormPlusBC2TrainFlairGenes() {
        DocumentSourceFiles documentSourceFiles = new DocumentSourceFiles();
        documentSourceFiles.setName("speciesCorpusCustomGnormPlusBC2Train");
        documentSourceFiles.setHasGeneIds(false);
        documentSourceFiles.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.FLAIR_JPG_NOBC2TEST_NOTEST_COLLAPSED_VAR, GeneMention.GeneTagger.CONSISTENCY_TAGGER, GeneMention.GeneTagger.EXPANSION_TAGGER, GeneMention.GeneTagger.GAZETTEER));
        documentSourceFiles.setAllowedGeneTypes(List.of("Gene"));
        documentSourceFiles.setFilterSpecies(true);
        documentSourceFiles.setAddReferenceSpecies(true);
        documentSourceFiles.setSpeciesCorpus(true);
        documentSourceFiles.setCompletelyAnnotated(true);
        documentSourceFiles.setMeshPath("../built-resources/corpora/bc2_data/train/mesh.tsv.gz");
        documentSourceFiles.setSubstancesPath("../built-resources/corpora/bc2_data/train/substances.tsv.gz");
        documentSourceFiles.setBasePath("../built-resources/corpora/species_corpus_custom_data");
        documentSourceFiles.setPredictedGenesPath("annotated/genes.tsv.gz");
        documentSourceFiles.setSentencesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setChunksPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setPosPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setSpeciesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setAcronymsPath("annotated/acronyms.tsv.gz");
        documentSourceFiles.setDocTextPath("annotated/text");
        documentSourceFiles.setGoldGeneList("gold.taxlist");
        return documentSourceFiles;
    }

    public static DocumentSourceFiles gnpBc2gnTrain() {
        DocumentSourceFiles documentSourceFiles = new DocumentSourceFiles();
        documentSourceFiles.setName("gnpBc2gnTrainFlPgCvGaz");
        documentSourceFiles.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.FLAIR_JPG_NOBC2TEST_NOTEST_COLLAPSED_VAR, GeneMention.GeneTagger.CONSISTENCY_TAGGER, GeneMention.GeneTagger.EXPANSION_TAGGER, GeneMention.GeneTagger.GAZETTEER));
        documentSourceFiles.setAllowedGeneTypes(List.of("Gene", "protein", "protein_complex", "protein_enum", "protein_familiy_or_group"));
        documentSourceFiles.setFilterSpecies(true);
        documentSourceFiles.setAddReferenceSpecies(true);
        documentSourceFiles.setCompletelyAnnotated(true);
        documentSourceFiles.setMeshPath("../built-resources/corpora/bc2_data/train/mesh.tsv.gz");
        documentSourceFiles.setSubstancesPath("../built-resources/corpora/bc2_data/train/substances.tsv.gz");
        documentSourceFiles.setBasePath("../built-resources/corpora/gnormplus_data/bc2train");
        documentSourceFiles.setPredictedGenesPath("annotated/genes.tsv.gz");
        documentSourceFiles.setSentencesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setChunksPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setNonGenePhrasesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setPosPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setOntologyMentionsPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setSpeciesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setAcronymsPath("annotated/acronyms.tsv.gz");
        documentSourceFiles.setCorefPath("annotated/coreferences.tsv.gz");
        documentSourceFiles.setAppositionsPath("annotated/appositions.tsv.gz");
        documentSourceFiles.setDocTextPath("annotated/text");
        documentSourceFiles.setGoldGeneList("bc2train.genelist");
        return documentSourceFiles;
    }

    public static DocumentSourceFiles gnpBc2gnTest() {
        DocumentSourceFiles documentSourceFiles = new DocumentSourceFiles();
        documentSourceFiles.setName("gnpBc2gnTestFlPgCvGaz");
        documentSourceFiles.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.FLAIR_JPG_NOBC2TEST_NOTEST_COLLAPSED_VAR, GeneMention.GeneTagger.CONSISTENCY_TAGGER, GeneMention.GeneTagger.EXPANSION_TAGGER, GeneMention.GeneTagger.GAZETTEER));
        documentSourceFiles.setAllowedGeneTypes(List.of("Gene", "protein", "protein_complex", "protein_enum", "protein_familiy_or_group"));
        documentSourceFiles.setFilterSpecies(true);
        documentSourceFiles.setAddReferenceSpecies(true);
        documentSourceFiles.setCompletelyAnnotated(true);
        documentSourceFiles.setMeshPath("../built-resources/corpora/bc2_data/test/mesh.tsv.gz");
        documentSourceFiles.setSubstancesPath("../built-resources/corpora/bc2_data/test/substances.tsv.gz");
        documentSourceFiles.setBasePath("../built-resources/corpora/gnormplus_data/bc2test");
        documentSourceFiles.setPredictedGenesPath("annotated/genes.tsv.gz");
        documentSourceFiles.setSentencesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setChunksPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setNonGenePhrasesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setPosPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setOntologyMentionsPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setSpeciesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setAcronymsPath("annotated/acronyms.tsv.gz");
        documentSourceFiles.setCorefPath("annotated/coreferences.tsv.gz");
        documentSourceFiles.setAppositionsPath("annotated/appositions.tsv.gz");
        documentSourceFiles.setDocTextPath("annotated/text");
        documentSourceFiles.setGoldGeneList("bc2test.genelist");
        return documentSourceFiles;
    }

    public static DocumentSourceFiles gnpBc2gnTestFlairProGeneGnpEntities() {
        DocumentSourceFiles documentSourceFiles = new DocumentSourceFiles();
        documentSourceFiles.setName("gnpBc2gnTestFlPgCvGazGnpEntities");
        documentSourceFiles.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.FLAIR_JPG_GNP_ENTITIES, GeneMention.GeneTagger.CONSISTENCY_TAGGER, GeneMention.GeneTagger.EXPANSION_TAGGER, GeneMention.GeneTagger.GAZETTEER));
        documentSourceFiles.setAllowedGeneTypes(List.of("Gene", "FamilyName"));
        documentSourceFiles.setFilterSpecies(true);
        documentSourceFiles.setAddReferenceSpecies(true);
        documentSourceFiles.setCompletelyAnnotated(true);
        documentSourceFiles.setMeshPath("../built-resources/corpora/bc2_data/test/mesh.tsv.gz");
        documentSourceFiles.setSubstancesPath("../built-resources/corpora/bc2_data/test/substances.tsv.gz");
        documentSourceFiles.setBasePath("../built-resources/corpora/gnormplus_data/bc2test/annotated_progene_gnp_entities");
        documentSourceFiles.setPredictedGenesPath("genes.tsv.gz");
        documentSourceFiles.setSentencesPath("annotations.tsv.gz");
        documentSourceFiles.setChunksPath("annotations.tsv.gz");
        documentSourceFiles.setNonGenePhrasesPath("annotations.tsv.gz");
        documentSourceFiles.setPosPath("annotations.tsv.gz");
        documentSourceFiles.setOntologyMentionsPath("annotations.tsv.gz");
        documentSourceFiles.setSpeciesPath("annotations.tsv.gz");
        documentSourceFiles.setAcronymsPath("acronyms.tsv.gz");
        documentSourceFiles.setCorefPath("coreferences.tsv.gz");
        documentSourceFiles.setAppositionsPath("appositions.tsv.gz");
        documentSourceFiles.setDocTextPath("text");
        documentSourceFiles.setGoldGeneList("../bc2test.genelist");
        return documentSourceFiles;
    }

    public static DocumentSourceFiles gnpBc2gnTestFlairProGeneGnpEntitiesConsistencyLongerGazetteer() {
        DocumentSourceFiles gnpBc2gnTestFlairProGeneGnpEntities = gnpBc2gnTestFlairProGeneGnpEntities();
        gnpBc2gnTestFlairProGeneGnpEntities.setName("gnpBc2gnTestFlPgCvGazGnpEntitiesConsistencyLongerGaz");
        gnpBc2gnTestFlairProGeneGnpEntities.setBasePath("../built-resources/corpora/gnormplus_data/bc2test/annotated_progene_gnp_entities_consis_longgaz");
        gnpBc2gnTestFlairProGeneGnpEntities.setPredictedGenesPath("genes.tsv.gz");
        gnpBc2gnTestFlairProGeneGnpEntities.setSentencesPath("annotations.tsv.gz");
        gnpBc2gnTestFlairProGeneGnpEntities.setChunksPath("annotations.tsv.gz");
        gnpBc2gnTestFlairProGeneGnpEntities.setNonGenePhrasesPath("annotations.tsv.gz");
        gnpBc2gnTestFlairProGeneGnpEntities.setPosPath("annotations.tsv.gz");
        gnpBc2gnTestFlairProGeneGnpEntities.setOntologyMentionsPath("annotations.tsv.gz");
        gnpBc2gnTestFlairProGeneGnpEntities.setSpeciesPath("annotations.tsv.gz");
        gnpBc2gnTestFlairProGeneGnpEntities.setAcronymsPath("acronyms.tsv.gz");
        gnpBc2gnTestFlairProGeneGnpEntities.setCorefPath("coreferences.tsv.gz");
        gnpBc2gnTestFlairProGeneGnpEntities.setAppositionsPath("appositions.tsv.gz");
        gnpBc2gnTestFlairProGeneGnpEntities.setDocTextPath("text");
        gnpBc2gnTestFlairProGeneGnpEntities.setGoldGeneList("../bc2test.genelist");
        return gnpBc2gnTestFlairProGeneGnpEntities;
    }

    public static DocumentSourceFiles bc2gntest() {
        DocumentSourceFiles documentSourceFiles = new DocumentSourceFiles();
        documentSourceFiles.setName("bc2gntest");
        documentSourceFiles.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.FLAIR_JPG_NOBC2TEST_NOTEST_COLLAPSED_VAR, GeneMention.GeneTagger.CONSISTENCY_TAGGER, GeneMention.GeneTagger.EXPANSION_TAGGER, GeneMention.GeneTagger.GAZETTEER));
        documentSourceFiles.setAllowedGeneTypes(List.of("Gene", "protein", "protein_complex", "protein_enum", "protein_familiy_or_group"));
        documentSourceFiles.setFilterSpecies(true);
        documentSourceFiles.setAddReferenceSpecies(true);
        documentSourceFiles.setCompletelyAnnotated(true);
        documentSourceFiles.setMeshPath("../built-resources/corpora/bc2_data/test/mesh.tsv.gz");
        documentSourceFiles.setSubstancesPath("../built-resources/corpora/bc2_data/test/substances.tsv.gz");
        documentSourceFiles.setBasePath("../built-resources/corpora/bc2_data/test");
        documentSourceFiles.setPredictedGenesPath("annotated/genes.tsv.gz");
        documentSourceFiles.setSentencesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setChunksPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setNonGenePhrasesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setPosPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setOntologyMentionsPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setSpeciesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setAcronymsPath("annotated/acronyms.tsv.gz");
        documentSourceFiles.setCorefPath("annotated/coreferences.tsv.gz");
        documentSourceFiles.setAppositionsPath("annotated/appositions.tsv.gz");
        documentSourceFiles.setDocTextPath("annotated/text");
        documentSourceFiles.setGoldGeneList("test.genelist");
        return documentSourceFiles;
    }

    public static DocumentSourceFiles gnpNlmIat() {
        DocumentSourceFiles documentSourceFiles = new DocumentSourceFiles();
        documentSourceFiles.setName("gnpNlmIat");
        documentSourceFiles.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.FLAIR_JPG_NOBC2TEST_NOTEST_COLLAPSED_VAR, GeneMention.GeneTagger.CONSISTENCY_TAGGER, GeneMention.GeneTagger.EXPANSION_TAGGER, GeneMention.GeneTagger.GAZETTEER));
        documentSourceFiles.setAllowedGeneTypes(List.of("Gene", "protein", "protein_complex", "protein_enum", "protein_familiy_or_group"));
        documentSourceFiles.setFilterSpecies(true);
        documentSourceFiles.setAddReferenceSpecies(true);
        documentSourceFiles.setCompletelyAnnotated(true);
        documentSourceFiles.setBasePath("../built-resources/corpora/gnormplus_data/nlmiat");
        documentSourceFiles.setPredictedGenesPath("annotated/genes.tsv.gz");
        documentSourceFiles.setSentencesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setChunksPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setNonGenePhrasesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setPosPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setOntologyMentionsPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setSpeciesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setAcronymsPath("annotated/acronyms.tsv.gz");
        documentSourceFiles.setCorefPath("annotated/coreferences.tsv.gz");
        documentSourceFiles.setAppositionsPath("annotated/appositions.tsv.gz");
        documentSourceFiles.setDocTextPath("annotated/text");
        documentSourceFiles.setGoldGeneList("nlmiat.genelist");
        return documentSourceFiles;
    }

    public static DocumentSourceFiles bc3Trainset1() {
        DocumentSourceFiles documentSourceFiles = new DocumentSourceFiles();
        documentSourceFiles.setName("bc3Trainset1");
        documentSourceFiles.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.FLAIR_JPG_NOBC2TEST_NOTEST_COLLAPSED_VAR, GeneMention.GeneTagger.GAZETTEER));
        documentSourceFiles.setAllowedGeneTypes(List.of("Gene", "protein", "protein_complex", "protein_enum", "protein_familiy_or_group"));
        documentSourceFiles.setFilterSpecies(true);
        documentSourceFiles.setAddReferenceSpecies(true);
        documentSourceFiles.setCompletelyAnnotated(false);
        documentSourceFiles.setBasePath("../built-resources/corpora/bc3_data/trainset1");
        documentSourceFiles.setPredictedGenesPath("annotated/genes.tsv.gz");
        documentSourceFiles.setSentencesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setChunksPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setNonGenePhrasesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setPosPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setSpeciesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setAcronymsPath("annotated/acronyms.tsv.gz");
        documentSourceFiles.setDocTextPath("annotated/text");
        documentSourceFiles.setGoldGeneList("trainset1.genelist");
        return documentSourceFiles;
    }

    public static DocumentSourceFiles bc3Test50() {
        DocumentSourceFiles documentSourceFiles = new DocumentSourceFiles();
        documentSourceFiles.setName("bc3Test50");
        documentSourceFiles.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.FLAIR_JPG_NOBC2TEST_NOTEST_COLLAPSED_VAR, GeneMention.GeneTagger.GAZETTEER));
        documentSourceFiles.setAllowedGeneTypes(List.of("Gene", "protein", "protein_complex", "protein_enum", "protein_familiy_or_group"));
        documentSourceFiles.setFilterSpecies(true);
        documentSourceFiles.setAddReferenceSpecies(true);
        documentSourceFiles.setCompletelyAnnotated(false);
        documentSourceFiles.setBasePath("../built-resources/corpora/bc3_data/test50");
        documentSourceFiles.setPredictedGenesPath("annotated/genes.tsv.gz");
        documentSourceFiles.setSentencesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setChunksPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setNonGenePhrasesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setPosPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setSpeciesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setAcronymsPath("annotated/acronyms.tsv.gz");
        documentSourceFiles.setDocTextPath("annotated/text");
        documentSourceFiles.setGoldGeneList("test50.genelist");
        return documentSourceFiles;
    }

    public static DocumentSourceFiles bc3Trainset1InferredMentionIds() {
        DocumentSourceFiles bc3Trainset1 = bc3Trainset1();
        bc3Trainset1.setInferDocumentLevelLabelsToMentions(true);
        bc3Trainset1.setCompletelyAnnotated(true);
        return bc3Trainset1;
    }

    public static DocumentSourceFiles bc3Trainset2() {
        DocumentSourceFiles documentSourceFiles = new DocumentSourceFiles();
        documentSourceFiles.setName("bc3Trainset2");
        documentSourceFiles.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.FLAIR_JPG_NOBC2TEST_NOTEST_COLLAPSED_VAR, GeneMention.GeneTagger.GAZETTEER));
        documentSourceFiles.setAllowedGeneTypes(List.of("Gene", "protein", "protein_complex", "protein_enum", "protein_familiy_or_group"));
        documentSourceFiles.setFilterSpecies(true);
        documentSourceFiles.setAddReferenceSpecies(true);
        documentSourceFiles.setInferDocumentLevelLabelsToMentions(true);
        documentSourceFiles.setCompletelyAnnotated(false);
        documentSourceFiles.setBasePath("../built-resources/corpora/bc3_data/trainset2");
        documentSourceFiles.setPredictedGenesPath("annotated/genes.tsv.gz");
        documentSourceFiles.setSentencesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setChunksPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setNonGenePhrasesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setPosPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setSpeciesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setAcronymsPath("annotated/acronyms.tsv.gz");
        documentSourceFiles.setDocTextPath("annotated/text");
        documentSourceFiles.setGoldGeneList("trainset2.genelist");
        return documentSourceFiles;
    }

    public static DocumentSourceFiles bc3Trainset1Species() {
        DocumentSourceFiles bc3Trainset1 = bc3Trainset1();
        bc3Trainset1.setName("bc3Trainset1Species");
        bc3Trainset1.setHasGeneIds(true);
        bc3Trainset1.setGoldGeneList("trainset1.taxlist");
        bc3Trainset1.setSpeciesCorpus(true);
        bc3Trainset1.setInferDocumentLevelLabelsToMentions(true);
        return bc3Trainset1;
    }

    public static DocumentSourceFiles bc3Trainset2Species() {
        DocumentSourceFiles bc3Trainset2 = bc3Trainset2();
        bc3Trainset2.setName("bc3Trainset2Species");
        bc3Trainset2.setHasGeneIds(false);
        bc3Trainset2.setGoldGeneList("trainset2.taxlist");
        bc3Trainset2.setSpeciesCorpus(true);
        bc3Trainset2.setInferDocumentLevelLabelsToMentions(true);
        return bc3Trainset2;
    }

    public static DocumentSourceFiles gnpBc2gnTrainGnpPred() {
        DocumentSourceFiles gnpBc2gnTrain = gnpBc2gnTrain();
        gnpBc2gnTrain.setName("gnpBc2gnTrainGnpPred");
        gnpBc2gnTrain.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.GNORM_PLUS));
        gnpBc2gnTrain.setPredictedGenesPath("bc2gntrain.genormplusoutput.genelist");
        return gnpBc2gnTrain;
    }

    public static DocumentSourceFiles gnpBc2gnTrainGnpPredSpeciesAfterSr() {
        DocumentSourceFiles gnpBc2gnTrain = gnpBc2gnTrain();
        gnpBc2gnTrain.setName("gnpBc2gnTrainGnpPredSpeciesAfterSr");
        gnpBc2gnTrain.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.GNORM_PLUS));
        gnpBc2gnTrain.setAllowedGeneTypes(List.of("Gene", "FamilyName", "DomainMotif", "Cell"));
        gnpBc2gnTrain.setFilterSpecies(false);
        gnpBc2gnTrain.setAddReferenceSpecies(false);
        gnpBc2gnTrain.setPredictedGenesPath("bc2gntrain.genormplusoutput_after_sr.taxlist");
        gnpBc2gnTrain.setSpeciesPath("bc2gntrain.genormplusoutput.taxlist");
        gnpBc2gnTrain.setSentencesPath("bc2gntrain.genormplusoutput_sentences_from_sr.tsv");
        return gnpBc2gnTrain;
    }

    public static DocumentSourceFiles gnpBc2gnTrainGnpPredGnpSpecies() {
        DocumentSourceFiles gnpBc2gnTrainGnpPred = gnpBc2gnTrainGnpPred();
        gnpBc2gnTrainGnpPred.setName("gnpBc2gnTrainGnpPredGnpSpecies");
        gnpBc2gnTrainGnpPred.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.GNORM_PLUS));
        gnpBc2gnTrainGnpPred.setSpeciesPath("bc2gntrain.genormplusoutput.taxlist");
        gnpBc2gnTrainGnpPred.setFilterSpecies(false);
        gnpBc2gnTrainGnpPred.setAddReferenceSpecies(false);
        return gnpBc2gnTrainGnpPred;
    }

    public static DocumentSourceFiles nlmGeneTrain() {
        DocumentSourceFiles documentSourceFiles = new DocumentSourceFiles();
        documentSourceFiles.setName("nlmGeneTrain");
        documentSourceFiles.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.FLAIR_JPG_NOBC2TEST_NOTEST_COLLAPSED_VAR, GeneMention.GeneTagger.CONSISTENCY_TAGGER, GeneMention.GeneTagger.EXPANSION_TAGGER, GeneMention.GeneTagger.GAZETTEER));
        documentSourceFiles.setAllowedGeneTypes(List.of("Gene", "protein", "protein_complex", "protein_enum", "protein_familiy_or_group"));
        documentSourceFiles.setFilterSpecies(true);
        documentSourceFiles.setAddReferenceSpecies(true);
        documentSourceFiles.setCompletelyAnnotated(true);
        documentSourceFiles.setBasePath("../built-resources/corpora/nlm_gene/train");
        documentSourceFiles.setPredictedGenesPath("annotated/genes.tsv.gz");
        documentSourceFiles.setSentencesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setChunksPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setNonGenePhrasesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setPosPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setOntologyMentionsPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setSpeciesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setAcronymsPath("annotated/acronyms.tsv.gz");
        documentSourceFiles.setCorefPath("annotated/coreferences.tsv.gz");
        documentSourceFiles.setAppositionsPath("annotated/appositions.tsv.gz");
        documentSourceFiles.setDocTextPath("annotated/text");
        documentSourceFiles.setGoldGeneList("nlmgenetrain.genelist");
        return documentSourceFiles;
    }

    public static DocumentSourceFiles nlmGeneTest() {
        DocumentSourceFiles documentSourceFiles = new DocumentSourceFiles();
        documentSourceFiles.setName("nlmGeneTrain");
        documentSourceFiles.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.FLAIR, GeneMention.GeneTagger.CONSISTENCY_TAGGER, GeneMention.GeneTagger.EXPANSION_TAGGER, GeneMention.GeneTagger.GAZETTEER));
        documentSourceFiles.setAllowedGeneTypes(List.of("Gene", "protein", "protein_complex", "protein_enum", "protein_familiy_or_group"));
        documentSourceFiles.setFilterSpecies(true);
        documentSourceFiles.setAddReferenceSpecies(true);
        documentSourceFiles.setCompletelyAnnotated(true);
        documentSourceFiles.setBasePath("../built-resources/corpora/nlm_gene/test");
        documentSourceFiles.setPredictedGenesPath("annotated/genes.tsv.gz");
        documentSourceFiles.setSentencesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setChunksPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setNonGenePhrasesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setPosPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setOntologyMentionsPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setSpeciesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setAcronymsPath("annotated/acronyms.tsv.gz");
        documentSourceFiles.setCorefPath("annotated/coreferences.tsv.gz");
        documentSourceFiles.setAppositionsPath("annotated/appositions.tsv.gz");
        documentSourceFiles.setDocTextPath("annotated/text");
        documentSourceFiles.setGoldGeneList("nlmgenetest.nofamilies.genelist");
        return documentSourceFiles;
    }

    public static DocumentSourceFiles unitTests() {
        DocumentSourceFiles documentSourceFiles = new DocumentSourceFiles();
        documentSourceFiles.setName("unitTests");
        documentSourceFiles.setTaggersToUse(EnumSet.of(GeneMention.GeneTagger.FLAIR_JPG_NOBC2TEST_NOTEST_COLLAPSED_VAR, GeneMention.GeneTagger.CONSISTENCY_TAGGER, GeneMention.GeneTagger.EXPANSION_TAGGER, GeneMention.GeneTagger.GAZETTEER));
        documentSourceFiles.setAllowedGeneTypes(List.of("Gene", "protein", "protein_complex", "protein_enum", "protein_familiy_or_group"));
        documentSourceFiles.setFilterSpecies(true);
        documentSourceFiles.setAddReferenceSpecies(true);
        documentSourceFiles.setCompletelyAnnotated(true);
        documentSourceFiles.setMeshPath("../built-resources/corpora/bc2_data/train/mesh.tsv.gz");
        documentSourceFiles.setSubstancesPath("../built-resources/corpora/bc2_data/train/substances.tsv.gz");
        documentSourceFiles.setBasePath("../built-resources/corpora/gnormplus_bc2_unittest_data");
        documentSourceFiles.setPredictedGenesPath("annotated/genes.tsv.gz");
        documentSourceFiles.setSentencesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setChunksPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setNonGenePhrasesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setPosPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setOntologyMentionsPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setSpeciesPath("annotated/annotations.tsv.gz");
        documentSourceFiles.setAcronymsPath("annotated/acronyms.tsv.gz");
        documentSourceFiles.setCorefPath("annotated/coreferences.tsv.gz");
        documentSourceFiles.setAppositionsPath("annotated/appositions.tsv.gz");
        documentSourceFiles.setDocTextPath("annotated/text");
        documentSourceFiles.setGoldGeneList("unitTests.genelist");
        return documentSourceFiles;
    }
}
