package uk.ac.man.entitytagger.entities.species;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.StringReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.logging.Logger;
import javax.xml.parsers.DocumentBuilderFactory;
import martin.common.ArgParser;
import martin.common.Loggers;
import martin.common.Misc;
import martin.common.Pair;
import martin.common.StreamIterator;
import martin.common.xml.XPath;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
import uk.ac.man.entitytagger.Mention;
import uk.ac.man.entitytagger.generate.DictionaryEntry;
import uk.ac.man.entitytagger.generate.GenerateMatchers;

/* loaded from: input_file:uk/ac/man/entitytagger/entities/species/GenerateDictionary.class */
public class GenerateDictionary {
    private static final String COL_WEBSERVICE_URL = "http://webservice.catalogueoflife.org/annual-checklist/2009/search.php";

    public static void main(String[] strArr) {
        ArgParser argParser = new ArgParser(strArr);
        Logger defaultLogger = Loggers.getDefaultLogger(argParser);
        File[] files = argParser.getFiles("extraSynonyms");
        boolean containsKey = argParser.containsKey("includeLineNumbers");
        defaultLogger.info("%t: includeLineNumbers = " + containsKey + "\n");
        int i = argParser.getInt("report", -1);
        if (argParser.containsKey("inSpecies")) {
            File file = argParser.getFile("inSpecies");
            if (argParser.containsKey("outRegexp")) {
                save(argParser.getFile("outRegexp"), generateSpeciesDictionary(file, files, containsKey, defaultLogger, i), defaultLogger);
            }
            if (argParser.containsKey("outNames")) {
                saveNames(argParser.getFile("outNames"), generateSpeciesNames(file, files, containsKey, defaultLogger), generateSpeciesDictionary(file, files, containsKey, defaultLogger, i), defaultLogger);
            }
        }
    }

    private static void saveNames(File file, HashMap<String, List<String>> hashMap, HashMap<String, DictionaryEntry> hashMap2, Logger logger) {
        logger.info("%t: Saving to file " + file.getAbsolutePath() + "...\n");
        try {
            BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(file));
            for (String str : hashMap.keySet()) {
                if (!hashMap2.containsKey(str) || hashMap2.get(str).getComment() == null) {
                    bufferedWriter.write(str + "\t" + Misc.implode(hashMap.get(str).toArray(new String[0]), Mention.COMMENT_SEPARATOR) + "\t\n");
                } else {
                    bufferedWriter.write(str + "\t" + Misc.implode(hashMap.get(str).toArray(new String[0]), Mention.COMMENT_SEPARATOR) + "\t" + hashMap2.get(str).getComment() + "\n");
                }
            }
            bufferedWriter.close();
            logger.info("%t: Done.\n");
        } catch (Exception e) {
            System.err.println(e);
            e.printStackTrace();
            System.exit(-1);
        }
    }

    private static HashMap<String, List<String>> generateSpeciesNames(File file, File[] fileArr, boolean z, Logger logger) {
        logger.info("%t: Generating species name variants...\n");
        HashMap<String, List<String>> hashMap = new HashMap<>();
        for (File file2 : fileArr) {
            Iterator<String> it = new StreamIterator(file2).iterator();
            while (it.hasNext()) {
                String[] split = it.next().split("\\t");
                for (String str : split[1].split("\\|")) {
                    addName(hashMap, split[0], str, "common name");
                }
            }
        }
        int i = 0;
        Iterator<String> it2 = new StreamIterator(file).iterator();
        while (it2.hasNext()) {
            String[] split2 = it2.next().split(",");
            if (split2.length == 4) {
                String str2 = "species:ncbi:" + split2[0];
                if (z) {
                    str2 = str2 + Mention.COMMENT_SEPARATOR + i;
                }
                String str3 = split2[3];
                if (!str3.contains("acronym") && !split2[1].matches("([\\[\\(\\{\\?].*)") && !split2[1].contains("@")) {
                    addName(hashMap, str2, split2[1], str3);
                }
            }
            i++;
        }
        logger.info("%t: Done.\n");
        return hashMap;
    }

    private static void addName(HashMap<String, List<String>> hashMap, String str, String str2, String str3) {
        while (str2.startsWith(" ")) {
            str2 = str2.substring(1);
        }
        String[] split = str2.split(" ");
        ArrayList arrayList = new ArrayList();
        int indexOf = str2.indexOf(" ");
        char charAt = str2.charAt(0);
        if ((str3.equals("scientific name") || str3.contains("synonym") || str3.contains("anamorph")) && split.length > 1 && (split.length != 2 || split[1].length() >= 4)) {
            arrayList.add(Character.toLowerCase(charAt) + str2.substring(1));
            arrayList.add(Character.toUpperCase(charAt) + str2.substring(1));
            arrayList.add(Character.toLowerCase(charAt) + "." + str2.substring(indexOf));
            arrayList.add(Character.toUpperCase(charAt) + "." + str2.substring(indexOf));
        } else if ((!str3.contains("common name") && !str3.contains("include")) || str2.endsWith("s") || str2.endsWith("family")) {
            arrayList.add(Character.toLowerCase(charAt) + str2.substring(1));
            arrayList.add(Character.toUpperCase(charAt) + str2.substring(1));
        } else {
            arrayList.add(Character.toLowerCase(charAt) + str2.substring(1));
            arrayList.add(Character.toUpperCase(charAt) + str2.substring(1));
            arrayList.add(Character.toLowerCase(charAt) + str2.substring(1) + "s");
            arrayList.add(Character.toUpperCase(charAt) + str2.substring(1) + "s");
        }
        if (hashMap.containsKey(str)) {
            hashMap.get(str).addAll(arrayList);
        } else {
            hashMap.put(str, arrayList);
        }
    }

    private static String getAcceptedName(String str) {
        if (str.contains("<") || str.contains(">")) {
            return null;
        }
        try {
            Node node = XPath.getNode("results/result/accepted_name/name", DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(new StringReader(Misc.downloadURL(new URL("http://webservice.catalogueoflife.org/annual-checklist/2009/search.php?name=" + str.replace(" ", "+") + "&format=xml&response=terse"))))));
            if (node == null) {
                return null;
            }
            return node.getTextContent();
        } catch (Exception e) {
            System.err.println(e.toString());
            e.printStackTrace();
            System.exit(-1);
            return null;
        }
    }

    private static void save(File file, HashMap<String, DictionaryEntry> hashMap, Logger logger) {
        try {
            logger.info("Writing regular expressions to file " + file.getAbsolutePath() + "...\n");
            BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(file));
            Iterator<String> it = hashMap.keySet().iterator();
            while (it.hasNext()) {
                bufferedWriter.write(hashMap.get(it.next()).toString() + "\n");
            }
            bufferedWriter.close();
        } catch (Exception e) {
            System.err.println(e);
            e.printStackTrace();
            System.exit(-1);
        }
    }

    private static int addGenusName(DictionaryEntry dictionaryEntry, String str, String str2) {
        int i = 1;
        while (str.startsWith(" ")) {
            str = str.substring(1);
        }
        String replace = str.replace("\"", "").replace("'", "").replace("(", "\\(").replace(")", "\\)").replace(".", "\\.").replace("<", "\\<").replace(">", "\\>").replace("{", "\\{").replace("}", "\\}").replace("[", "\\[").replace("]", "\\]");
        String[] split = replace.split(" ");
        String[] strArr = new String[split.length];
        char charAt = split[0].charAt(0);
        strArr[0] = "(" + Character.toUpperCase(charAt) + Mention.COMMENT_SEPARATOR + Character.toLowerCase(charAt) + ")" + split[0].substring(1);
        String str3 = strArr[0];
        for (int i2 = 1; i2 < split.length; i2++) {
            if (0 == 0 || i2 > 1) {
                str3 = str3 + " ";
            }
            str3 = str3 + split[i2];
        }
        if ((str2.contains("common name") || str2.contains("include")) && !replace.endsWith("s") && !replace.endsWith("family")) {
            str3 = str3 + "s?";
            i = 2;
        }
        dictionaryEntry.addPattern(str3);
        return i;
    }

    private static void addSynonyms(HashMap<String, DictionaryEntry> hashMap, File file) {
        Iterator<Pair<String>> it = new GenerateMatchers().loadExtraSynonyms(file).iterator();
        while (it.hasNext()) {
            Pair<String> next = it.next();
            if (!hashMap.containsKey(next.getX())) {
                hashMap.put(next.getX(), new DictionaryEntry(next.getX()));
            }
            hashMap.get(next.getX()).addPattern(next.getY());
        }
    }

    public static HashMap<String, DictionaryEntry> generateGenusDictionary(File file, File[] fileArr, boolean z, Logger logger) {
        String str;
        logger.info("Loading NCBI taxonomy data... ");
        int i = 0;
        HashMap<String, DictionaryEntry> hashMap = new HashMap<>();
        int i2 = 0;
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
            for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
                String[] split = readLine.replaceAll("<.*?,.*?>", "<...>").split("\t\\|\t");
                if (z) {
                    int i3 = i2;
                    i2++;
                    str = "genus:ncbi:" + split[0] + Mention.COMMENT_SEPARATOR + i3;
                } else {
                    str = "genus:ncbi:" + split[0];
                }
                if (split.length == 4) {
                    String str2 = split[3];
                    if (!str2.contains("acronym") && !split[1].matches("([\\[\\(\\{\\?].*)") && !split[1].contains("@")) {
                        String str3 = split[1];
                        if (!hashMap.containsKey(str)) {
                            hashMap.put(str, new DictionaryEntry(str));
                        }
                        i += addGenusName(hashMap.get(str), str3, str2);
                    }
                }
            }
            bufferedReader.close();
        } catch (Exception e) {
            System.err.println(e);
            e.printStackTrace();
            System.exit(-1);
        }
        for (File file2 : fileArr) {
            addSynonyms(hashMap, file2);
        }
        logger.info("Done, loaded " + hashMap.size() + " genus and " + i + " name variants.\n");
        return hashMap;
    }

    public static HashMap<String, DictionaryEntry> generateSpeciesDictionary(File file, File[] fileArr, boolean z, Logger logger, int i) {
        String acceptedName;
        logger.info("Loading NCBI taxonomy data... ");
        int i2 = 0;
        HashMap<String, DictionaryEntry> hashMap = new HashMap<>();
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
            int i3 = 0;
            for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
                String[] split = readLine.split(",");
                String str = z ? "species:ncbi:" + split[0] + Mention.COMMENT_SEPARATOR + i3 : "species:ncbi:" + split[0];
                i3++;
                if (split.length == 4) {
                    String str2 = split[3];
                    if (!str2.contains("acronym") && !split[1].matches("([\\[\\(\\{\\?\"'].*)") && !split[1].contains("@")) {
                        String str3 = split[1];
                        if (str3.startsWith(". ")) {
                            str3 = str3.substring(2);
                        }
                        if (!hashMap.containsKey(str)) {
                            hashMap.put(str, new DictionaryEntry(str));
                        }
                        i2 += addName(hashMap.get(str), str3, str2);
                        if (z) {
                            if (str2.equals("misnomer") || str2.equals("misspelling") || str2.equals("in-part")) {
                                hashMap.get(str).setComment(str2);
                            }
                            if (str2.equals("scientific name") && (acceptedName = getAcceptedName(str3)) != null) {
                                hashMap.get(str).setComment("accepted: \"" + acceptedName + "\"");
                            }
                        }
                    }
                }
                if (i != -1 && i3 % i == 0) {
                    logger.info("%t: generateSpeciesDictionary: processed " + i3 + " lines.\n");
                }
            }
            bufferedReader.close();
        } catch (Exception e) {
            System.err.println(e);
            e.printStackTrace();
            System.exit(-1);
        }
        for (File file2 : fileArr) {
            addSynonyms(hashMap, file2);
        }
        logger.info("Done, loaded " + hashMap.size() + " species and " + i2 + " name variants.\n");
        return hashMap;
    }

    private static int addName(DictionaryEntry dictionaryEntry, String str, String str2) {
        int i = 1;
        while (str.startsWith(" ")) {
            str = str.substring(1);
        }
        String escapeRegexp = GenerateMatchers.escapeRegexp(str);
        String[] split = escapeRegexp.split(" ");
        String[] strArr = new String[split.length];
        char charAt = split[0].charAt(0);
        boolean z = false;
        if (!(str2.equals("scientific name") || str2.contains("synonym") || str2.contains("anamorph")) || split.length <= 1 || (split.length == 2 && split[1].length() < 4)) {
            strArr[0] = "(" + Character.toUpperCase(charAt) + Mention.COMMENT_SEPARATOR + Character.toLowerCase(charAt) + ")" + split[0].substring(1);
        } else if (split[0].length() > 1) {
            strArr[0] = "(" + Character.toUpperCase(charAt) + Mention.COMMENT_SEPARATOR + Character.toLowerCase(charAt) + ")(\\. ?|" + split[0].substring(1) + " )";
            z = true;
            i = 4;
        } else {
            strArr[0] = "(" + Character.toUpperCase(charAt) + Mention.COMMENT_SEPARATOR + Character.toLowerCase(charAt) + ")";
        }
        String str3 = strArr[0];
        for (int i2 = 1; i2 < split.length; i2++) {
            if (!z || i2 > 1) {
                str3 = str3 + " ";
            }
            str3 = str3 + split[i2];
        }
        if ((str2.contains("common name") || str2.contains("include")) && !escapeRegexp.endsWith("s") && !escapeRegexp.endsWith("family")) {
            str3 = str3 + "s?";
            i = 2;
        }
        dictionaryEntry.addPattern(str3);
        return i;
    }
}
