package de.julielab.genemapper.resources;

import com.google.inject.Guice;
import com.google.inject.Injector;
import com.google.inject.Module;
import de.julielab.geneexpbase.data.DocumentLoader;
import de.julielab.geneexpbase.data.DocumentLoadingException;
import de.julielab.geneexpbase.data.DocumentSourceFileRegistry;
import de.julielab.geneexpbase.data.DocumentSourceFiles;
import de.julielab.geneexpbase.ioc.ServicesShutdownHub;
import de.julielab.genemapper.Configuration;
import de.julielab.genemapper.GeneMapper;
import de.julielab.genemapper.classification.TransformerDisambiguationDataUtils;
import de.julielab.genemapper.ioc.GeneMappingModule;
import de.julielab.genemapper.utils.GeneMapperException;
import de.julielab.genemapper.utils.GeneMapperInitializationException;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/julielab/genemapper/resources/TransformerDisambiguationDataWriter.class */
public class TransformerDisambiguationDataWriter {
    private static final Logger log = LoggerFactory.getLogger(TransformerDisambiguationDataWriter.class);

    public static void main(String[] strArr) throws IOException, GeneMapperInitializationException, ExecutionException, GeneMapperException, DocumentLoadingException {
        Injector createInjector = Guice.createInjector(new Module[]{new GeneMappingModule(new Configuration(new File("configurations/genemapper_transformer_data.properties")))});
        createDisambiguationData(DocumentSourceFileRegistry.gnpBc2gnTest(), (DocumentLoader) createInjector.getInstance(DocumentLoader.class), (GeneMapper) createInjector.getInstance(GeneMapper.class), new File("transformerDisambiguationData-bc2test-v23-" + "goldTax" + "-" + "onlyApproxMatches" + "-" + "includeFpMentions" + ".tsv"), null);
        ((ServicesShutdownHub) createInjector.getInstance(ServicesShutdownHub.class)).shutdown();
        log.info("Data creation complete.");
    }

    public static void createDisambiguationData(DocumentSourceFiles documentSourceFiles, DocumentLoader documentLoader, GeneMapper geneMapper, File file, File file2) throws IOException, ExecutionException, DocumentLoadingException, GeneMapperException {
        String absolutePath = file.getAbsolutePath();
        List list = (List) documentLoader.getDocuments(documentSourceFiles).collect(Collectors.toList());
        if (file2 == null) {
            TransformerDisambiguationDataUtils.writeData(geneMapper, file, list.stream());
            return;
        }
        List readLines = FileUtils.readLines(file2, StandardCharsets.UTF_8);
        log.info("Read {} document IDs from {}", Integer.valueOf(readLines.size()), file2);
        Set set = (Set) readLines.stream().map(str -> {
            return str.split("\\s+");
        }).filter(strArr -> {
            return strArr[1].equals("dev");
        }).map(strArr2 -> {
            return strArr2[0];
        }).collect(Collectors.toSet());
        File file3 = new File(absolutePath.substring(0, absolutePath.lastIndexOf(46)) + "-dev.tsv");
        log.info("Got {} dev docs from {} that will be omitted from the training data and written to {}.", new Object[]{Integer.valueOf(set.size()), file2, file3});
        Stream filter = list.stream().filter(geneDocument -> {
            return !set.contains(geneDocument.getId());
        });
        Stream filter2 = list.stream().filter(geneDocument2 -> {
            return set.contains(geneDocument2.getId());
        });
        log.info("Writing transformer training data for corpus {} to {}", documentSourceFiles.getName(), file);
        TransformerDisambiguationDataUtils.writeData(geneMapper, file, filter);
        TransformerDisambiguationDataUtils.writeData(geneMapper, file3, filter2);
    }
}
