package uk.ac.man.entitytagger.generate;

import dk.brics.automaton.Automaton;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import java.util.logging.Logger;
import martin.common.ArgParser;
import martin.common.Loggers;
import martin.common.Misc;
import martin.common.Pair;
import martin.common.SQL;
import martin.common.StreamIterator;
import martin.common.Tuple;
import martin.common.compthreads.IteratorBasedMaster;
import uk.ac.man.entitytagger.Mention;
import uk.ac.man.entitytagger.entities.species.GenerateDictionary;

/* loaded from: input_file:uk/ac/man/entitytagger/generate/GenerateMatchers.class */
public class GenerateMatchers {
    public ArrayList<Pair<String>> loadExtraSynonyms(File file) {
        System.out.print("Loading " + file.getAbsolutePath() + "...");
        ArrayList<Pair<String>> arrayList = new ArrayList<>();
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
            for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
                if (!readLine.startsWith("#") && readLine.length() > 0) {
                    String[] split = readLine.split("\t");
                    arrayList.add(new Pair<>(split[0], split[1]));
                }
            }
            bufferedReader.close();
        } catch (Exception e) {
            System.err.println(e);
            e.printStackTrace();
            System.exit(-1);
        }
        System.out.println(" done, loaded " + arrayList.size() + " synonyms.");
        return arrayList;
    }

    private static ArrayList<DictionaryEntry> hashToList(HashMap<String, DictionaryEntry> hashMap) {
        ArrayList<DictionaryEntry> arrayList = new ArrayList<>();
        Iterator<DictionaryEntry> it = hashMap.values().iterator();
        while (it.hasNext()) {
            arrayList.add(it.next());
        }
        return arrayList;
    }

    public static PreparedStatement initVariantTable(Connection connection, String str, boolean z) {
        String str2 = "DROP TABLE IF EXISTS `" + str + "`";
        String str3 = "CREATE TABLE  `" + str + "` (`id` int(10) unsigned NOT NULL auto_increment,`id_entity` varchar(128) NOT NULL,`name` TEXT NOT NULL,`tag` varchar(4096) default NULL,PRIMARY KEY  (`id`),KEY `Index_2` (`tag`)) ENGINE=MyISAM DEFAULT CHARSET=latin1;";
        if (z) {
            try {
                connection.createStatement().execute(str2);
                connection.createStatement().execute(str3);
            } catch (Exception e) {
                System.err.println(e);
                e.printStackTrace();
                System.exit(-1);
                return null;
            }
        }
        return connection.prepareStatement("INSERT INTO " + str + " (id_entity,name,tag) VALUES (?,?,?)");
    }

    public static void main(String[] strArr) {
        ArgParser argParser = new ArgParser(strArr);
        if (strArr.length == 0 || argParser.containsKey("help")) {
            System.out.println("Usage (default values in capitals): generate.jar [--taxonomy <taxonomy data file> [--examine <species>] [--report <report interval>]]|[--loadArray <file>]|[--loadRArray <file>] [--multiJoin <num>] [--minimize] [--storeArray <file>] [--storeRArray <file>] [--threads <number of threads>]");
            System.exit(0);
        }
        Logger defaultLogger = Loggers.getDefaultLogger(argParser);
        int intValue = argParser.containsKey("threads") ? argParser.getInt("threads").intValue() : 1;
        int i = argParser.getInt("report", -1);
        Tuple<ArrayList<Automaton>, Boolean> tuple = null;
        if (argParser.containsKey("species") || argParser.containsKey("genus") || argParser.containsKey("regexp")) {
            defaultLogger.info("Using additional synonym files: " + argParser.containsKey("extraSynonymFiles") + "\n");
            File[] files = argParser.getFiles("extraSynonymFiles");
            boolean containsKey = argParser.containsKey("includeLineNumbers");
            defaultLogger.info("Including line numbers: " + containsKey + "\n");
            HashMap<String, DictionaryEntry> generateSpeciesDictionary = argParser.containsKey("species") ? GenerateDictionary.generateSpeciesDictionary(argParser.getFile("species"), files, containsKey, defaultLogger, i) : null;
            if (argParser.containsKey("genus")) {
                generateSpeciesDictionary = GenerateDictionary.generateGenusDictionary(argParser.getFile("genus"), files, containsKey, defaultLogger);
            }
            if (argParser.containsKey("regexp")) {
                generateSpeciesDictionary = loadRegexp(argParser.getFile("regexp"), containsKey, defaultLogger);
            }
            if (argParser.containsKey("listKeys") && generateSpeciesDictionary != null) {
                Iterator<String> it = generateSpeciesDictionary.keySet().iterator();
                while (it.hasNext()) {
                    System.out.println("'" + it.next() + "'");
                }
            }
            if (argParser.containsKey("examine") && generateSpeciesDictionary != null) {
                String[] sVar = argParser.gets("examine");
                for (int i2 = 0; i2 < sVar.length; i2++) {
                    if (generateSpeciesDictionary.containsKey(sVar[i2])) {
                        System.out.println(sVar[i2] + ": " + generateSpeciesDictionary.get(sVar[i2]).getRegexp());
                    } else {
                        System.out.println(sVar[i2] + ": <does not exist in dictionary>");
                    }
                }
            }
            if (argParser.containsKey("examineGraph") && generateSpeciesDictionary != null) {
                String[] sVar2 = argParser.gets("examineGraph");
                ArrayList<DictionaryEntry> arrayList = new ArrayList<>();
                for (int i3 = 0; i3 < sVar2.length; i3++) {
                    if (generateSpeciesDictionary.containsKey(sVar2[i3])) {
                        arrayList.add(generateSpeciesDictionary.get(sVar2[i3]));
                    } else {
                        System.out.println(sVar2[i3] + ": <does not exist in dictionary>");
                    }
                }
                Iterator<Automaton> it2 = new GenerateAutomatons().toAutomatons(arrayList, intValue, -1, argParser.containsKey("ignoreCase"), defaultLogger).iterator();
                while (it2.hasNext()) {
                    System.out.println(it2.next().toDot());
                }
            }
            if (argParser.containsKey("convertToVariants") || argParser.containsKey("convertToVariantsDB")) {
                convertToVariants(generateSpeciesDictionary, argParser.getFile("convertToVariants"), argParser.containsKey("convertToVariantsDB") ? initVariantTable(SQL.connectMySQL(argParser, defaultLogger, "dictionaries"), argParser.get("convertToVariantsDB"), true) : null, intValue, defaultLogger, i);
            }
            if (argParser.containsKey("storeArray") || argParser.containsKey("storeRArray")) {
                if (generateSpeciesDictionary != null) {
                    defaultLogger.info("%t: Escaping dictionary ID regular expressions...");
                    generateSpeciesDictionary = escapeIDs(generateSpeciesDictionary);
                    defaultLogger.info(" done.\n");
                }
                boolean containsKey2 = argParser.containsKey("ignoreCase");
                tuple = new Tuple<>(new GenerateAutomatons().toAutomatons(hashToList(generateSpeciesDictionary), intValue, argParser.getInt("report"), containsKey2, defaultLogger), Boolean.valueOf(containsKey2));
            }
        } else if (argParser.containsKey("loadArray")) {
            defaultLogger.info("%t: Loading array...");
            tuple = GenerateAutomatons.loadArray(argParser.getFile("loadArray"));
            defaultLogger.info(" done. Loaded " + tuple.getA().size() + " automatons from file " + argParser.get("loadArray") + ".\n");
        }
        if (argParser.containsKey("multiJoin") || argParser.containsKey("minimize")) {
            tuple = new Tuple<>(new GenerateAutomatons().process(tuple.getA(), argParser.getInt("multiJoin", 1), argParser.containsKey("minimize"), true, intValue, defaultLogger), tuple.getB());
        }
        if (argParser.containsKey("storeArray") && tuple != null) {
            defaultLogger.info("%t: Storing...");
            GenerateAutomatons.storeArray(argParser.getFile("storeArray"), tuple.getA(), tuple.getB().booleanValue());
            defaultLogger.info(" done. Stored " + tuple.getA().size() + " arrays to file " + argParser.get("storeArray") + ".\n");
        }
        if (argParser.containsKey("storeRArray")) {
            GenerateAutomatons.storeRArray(tuple.getA(), tuple.getB().booleanValue(), argParser.containsKey("tableize"), argParser.getFile("storeRArray"), defaultLogger);
        }
    }

    private static void convertToVariants(HashMap<String, DictionaryEntry> hashMap, File file, PreparedStatement preparedStatement, int i, Logger logger, int i2) {
        BufferedWriter bufferedWriter;
        IteratorBasedMaster iteratorBasedMaster = new IteratorBasedMaster(new ConvertToVariantsProblemIterator(hashMap), i);
        iteratorBasedMaster.startThread();
        if (file != null) {
            try {
                bufferedWriter = new BufferedWriter(new FileWriter(file));
            } catch (Exception e) {
                System.err.println(e);
                e.printStackTrace();
                System.exit(-1);
                return;
            }
        } else {
            bufferedWriter = null;
        }
        BufferedWriter bufferedWriter2 = bufferedWriter;
        int i3 = 0;
        Iterator it = iteratorBasedMaster.iterator();
        while (it.hasNext()) {
            Tuple tuple = (Tuple) it.next();
            if (bufferedWriter2 != null) {
                String str = ((DictionaryEntry) tuple.getA()).getId() + "\t" + Misc.unsplit((Collection) tuple.getB(), Mention.COMMENT_SEPARATOR) + "\t";
                if (((DictionaryEntry) tuple.getA()).getComment() != null) {
                    str = str + ((DictionaryEntry) tuple.getA()).getComment();
                }
                bufferedWriter2.write(str + "\n");
            }
            if (preparedStatement != null) {
                Set<String> set = (Set) tuple.getB();
                DictionaryEntry dictionaryEntry = (DictionaryEntry) tuple.getA();
                for (String str2 : set) {
                    preparedStatement.setString(1, dictionaryEntry.getId());
                    preparedStatement.setString(2, str2);
                    if (dictionaryEntry.getComment() != null) {
                        preparedStatement.setString(3, dictionaryEntry.getComment());
                    } else {
                        preparedStatement.setNull(3, 0);
                    }
                    preparedStatement.addBatch();
                }
                preparedStatement.executeBatch();
            }
            if (i2 != -1) {
                i3++;
                if (i3 % i2 == 0) {
                    logger.info("%t: Converted " + i3 + " dictionary entries to variants.\n");
                }
            }
        }
        if (bufferedWriter2 != null) {
            bufferedWriter2.close();
        }
    }

    private static HashMap<String, DictionaryEntry> escapeIDs(HashMap<String, DictionaryEntry> hashMap) {
        HashMap<String, DictionaryEntry> hashMap2 = new HashMap<>();
        for (String str : hashMap.keySet()) {
            DictionaryEntry dictionaryEntry = hashMap.get(str);
            dictionaryEntry.setId(escapeRegexp(dictionaryEntry.getId()));
            hashMap2.put(str, hashMap.get(str));
        }
        return hashMap2;
    }

    private static HashMap<String, DictionaryEntry> loadRegexp(File file, boolean z, Logger logger) {
        String str;
        if (file != null) {
            logger.info("%t: Loading regular expressions from file " + file.getAbsolutePath() + "...");
        } else {
            logger.info("%t: Loading regular expressions from STDIN...");
        }
        HashMap<String, DictionaryEntry> hashMap = new HashMap<>();
        int i = 0;
        Iterator<String> it = (file != null ? new StreamIterator(file, true) : new StreamIterator(System.in, true)).iterator();
        while (it.hasNext()) {
            String[] split = it.next().split("\t");
            if (z) {
                int i2 = i;
                i++;
                str = split[0] + Mention.COMMENT_SEPARATOR + i2;
            } else {
                str = split[0];
            }
            if (hashMap.containsKey(str)) {
                hashMap.get(str).addPattern(split[1]);
            } else {
                DictionaryEntry dictionaryEntry = new DictionaryEntry(str);
                dictionaryEntry.addPattern(split[1]);
                hashMap.put(str, dictionaryEntry);
            }
        }
        logger.info(" done.\n");
        return hashMap;
    }

    public static String escapeRegexp(String str) {
        return str.replace("\\", "\\\\").replace("(", "\\(").replace(")", "\\)").replace("[", "\\[").replace("]", "\\]").replace("{", "\\{").replace("}", "\\}").replace("<", "\\<").replace(">", "\\<").replace("*", "\\*").replace(".", "\\.").replace("+", "\\+").replace("?", "\\?").replace("&", "\\&").replace(Mention.COMMENT_SEPARATOR, "\\|").replace("^", "\\^").replace("$", "\\$").replace("~", "\\~").replace("#", "\\#").replace("@", "\\@").replace("'", "\\'").replace("\"", "\\\"");
    }
}
