package uk.ac.man.entitytagger.matching.matchers;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import martin.common.CacheMap;
import martin.common.Function;
import martin.common.Misc;
import martin.common.Pair;
import martin.common.Sizeable;
import martin.common.StreamIterator;
import uk.ac.man.documentparser.dataholders.Document;
import uk.ac.man.entitytagger.Mention;
import uk.ac.man.entitytagger.matching.Matcher;

/* loaded from: input_file:uk/ac/man/entitytagger/matching/matchers/VariantDictionaryMatcher.class */
public class VariantDictionaryMatcher extends Matcher implements Sizeable {
    private String[] terms;
    private String[][] termToIdsMap;
    private Connection conn;
    private String[] tableNames;
    private String tag;
    private final Pattern tokenizationPattern;
    private boolean ignoreCase;
    private long size;

    public VariantDictionaryMatcher(String[][] strArr, String[] strArr2, boolean z) {
        this.terms = null;
        this.termToIdsMap = (String[][]) null;
        this.tokenizationPattern = Pattern.compile("\\b");
        this.size = -1L;
        this.termToIdsMap = strArr;
        this.terms = strArr2;
        this.ignoreCase = z;
    }

    @Override // uk.ac.man.entitytagger.matching.Matcher
    public int size() {
        return this.terms.length;
    }

    public VariantDictionaryMatcher(Connection connection, String[] strArr, String str, boolean z) {
        this.terms = null;
        this.termToIdsMap = (String[][]) null;
        this.tokenizationPattern = Pattern.compile("\\b");
        this.size = -1L;
        this.ignoreCase = z;
        this.conn = connection;
        this.tableNames = strArr;
        this.tag = str;
    }

    public static VariantDictionaryMatcher load(File file, boolean z) {
        try {
            return load(new FileInputStream(file), z);
        } catch (FileNotFoundException e) {
            System.err.println("Could not find file " + file.getAbsolutePath() + ".");
            System.exit(0);
            return null;
        }
    }

    /* JADX WARN: Type inference failed for: r0v14, types: [java.lang.String[], java.lang.String[][]] */
    public static VariantDictionaryMatcher load(InputStream inputStream, boolean z) {
        Map<String, Set<String>> loadStream = loadStream(inputStream, z);
        String[] strArr = new String[loadStream.size()];
        int i = 0;
        Iterator<String> it = loadStream.keySet().iterator();
        while (it.hasNext()) {
            int i2 = i;
            i++;
            strArr[i2] = it.next();
        }
        Arrays.sort(strArr);
        ?? r0 = new String[strArr.length];
        for (int i3 = 0; i3 < strArr.length; i3++) {
            r0[i3] = (String[]) loadStream.get(strArr[i3]).toArray(new String[0]);
        }
        return new VariantDictionaryMatcher(r0, strArr, z);
    }

    /* JADX WARN: Type inference failed for: r0v14, types: [java.lang.String[], java.lang.String[][]] */
    private void init() {
        Map<String, Set<String>> loadFromDB = loadFromDB();
        String[] strArr = new String[loadFromDB.size()];
        int i = 0;
        Iterator<String> it = loadFromDB.keySet().iterator();
        while (it.hasNext()) {
            int i2 = i;
            i++;
            strArr[i2] = it.next();
        }
        Arrays.sort(strArr);
        ?? r0 = new String[strArr.length];
        for (int i3 = 0; i3 < strArr.length; i3++) {
            r0[i3] = (String[]) loadFromDB.get(strArr[i3]).toArray(new String[0]);
        }
        this.termToIdsMap = r0;
        this.terms = strArr;
    }

    public static Map<String, Matcher> loadSeparatedFromDB(Connection connection, String[] strArr, boolean z) {
        try {
            Statement createStatement = connection.createStatement();
            HashMap hashMap = new HashMap();
            for (String str : strArr) {
                ResultSet executeQuery = createStatement.executeQuery("SELECT DISTINCT(tag) FROM " + str);
                while (executeQuery.next()) {
                    String string = executeQuery.getString(1);
                    if (!hashMap.containsKey(string)) {
                        hashMap.put(string, new VariantDictionaryMatcher(connection, strArr, string, z));
                    }
                }
            }
            connection.close();
            return hashMap;
        } catch (Exception e) {
            System.err.println(e);
            e.printStackTrace();
            System.exit(-1);
            return null;
        }
    }

    public static CacheMap<String, VariantDictionaryMatcher> loadSeparatedFromDBCached(final Connection connection, final String[] strArr, final boolean z, long j, Logger logger) {
        return new CacheMap<>(j, new Function<VariantDictionaryMatcher>() { // from class: uk.ac.man.entitytagger.matching.matchers.VariantDictionaryMatcher.1
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // martin.common.Function
            public VariantDictionaryMatcher function(Object[] objArr) {
                return new VariantDictionaryMatcher(connection, strArr, (String) objArr[0], z);
            }
        }, logger);
    }

    /* JADX WARN: Type inference failed for: r0v34, types: [java.lang.String[], java.lang.String[][]] */
    public static Map<String, Matcher> loadSeparated(File[] fileArr, boolean z) {
        HashMap hashMap = new HashMap();
        for (File file : fileArr) {
            Map<String, Map<String, Set<String>>> loadFileSeparated = loadFileSeparated(file, z);
            for (String str : loadFileSeparated.keySet()) {
                Map<String, Set<String>> map = loadFileSeparated.get(str);
                String[] strArr = new String[map.size()];
                int i = 0;
                Iterator<String> it = map.keySet().iterator();
                while (it.hasNext()) {
                    int i2 = i;
                    i++;
                    strArr[i2] = it.next();
                }
                Arrays.sort(strArr);
                ?? r0 = new String[strArr.length];
                for (int i3 = 0; i3 < strArr.length; i3++) {
                    r0[i3] = (String[]) map.get(strArr[i3]).toArray(new String[0]);
                }
                hashMap.put(str, new VariantDictionaryMatcher(r0, strArr, z));
            }
        }
        return hashMap;
    }

    private static Map<String, Map<String, Set<String>>> loadFileSeparated(File file, boolean z) {
        HashMap hashMap = new HashMap();
        Iterator<String> it = new StreamIterator(file, true).iterator();
        while (it.hasNext()) {
            String[] split = it.next().split("\t");
            if (split.length < 3) {
                throw new IllegalStateException("The input file need three columns when calling loadFileSeparated");
            }
            if (z) {
                split[1] = split[1].toLowerCase();
            }
            if (!hashMap.containsKey(split[2])) {
                hashMap.put(split[2], new HashMap());
            }
            Map map = (Map) hashMap.get(split[2]);
            for (String str : split[1].split("\\|")) {
                if (!hashMap.containsKey(str)) {
                    map.put(str, new HashSet());
                }
                ((Set) map.get(str)).add(split[0]);
            }
        }
        return hashMap;
    }

    private static Map<String, Set<String>> loadStream(InputStream inputStream, boolean z) {
        HashMap hashMap = new HashMap();
        StreamIterator streamIterator = new StreamIterator(inputStream, true);
        Pattern compile = Pattern.compile("\t");
        Pattern compile2 = Pattern.compile("\\|");
        Iterator<String> it = streamIterator.iterator();
        while (it.hasNext()) {
            String[] split = compile.split(it.next());
            if (z) {
                split[1] = split[1].toLowerCase();
            }
            for (String str : compile2.split(split[1])) {
                if (!hashMap.containsKey(str)) {
                    hashMap.put(str, new HashSet());
                }
                ((Set) hashMap.get(str)).add(split[0]);
            }
        }
        return hashMap;
    }

    private Map<String, Set<String>> loadFromDB() {
        try {
            if (this.tag != null) {
                System.out.println("Loading variantMatcher from " + Misc.implode(this.tableNames, ", ") + " (" + this.tag + ")... ");
            } else {
                System.out.println("Loading variantMatcher from " + Misc.implode(this.tableNames, ", ") + "... ");
            }
            HashMap hashMap = new HashMap();
            for (String str : this.tableNames) {
                ResultSet executeQuery = this.tag != null ? this.conn.createStatement().executeQuery("SELECT id_entity, name FROM " + str + " WHERE tag = '" + this.tag + "'") : this.conn.createStatement().executeQuery("SELECT id_entity, name FROM " + str);
                while (executeQuery.next()) {
                    String string = executeQuery.getString(1);
                    String string2 = executeQuery.getString(2);
                    if (this.ignoreCase) {
                        string2 = string2.toLowerCase();
                    }
                    if (!hashMap.containsKey(string2)) {
                        hashMap.put(string2, new HashSet());
                    }
                    ((Set) hashMap.get(string2)).add(string);
                }
            }
            return hashMap;
        } catch (Exception e) {
            System.err.println(e);
            e.printStackTrace();
            System.exit(-1);
            return null;
        }
    }

    @Override // uk.ac.man.entitytagger.matching.Matcher
    public List<Mention> match(String str, Document document) {
        if (this.terms == null || this.termToIdsMap == null) {
            init();
        }
        ArrayList arrayList = new ArrayList();
        String lowerCase = this.ignoreCase ? str.toLowerCase() : str;
        String id = document != null ? document.getID() : null;
        java.util.regex.Matcher matcher = this.tokenizationPattern.matcher(lowerCase);
        ArrayList arrayList2 = new ArrayList();
        int i = -1;
        while (true) {
            int i2 = i;
            if (!matcher.find()) {
                break;
            }
            if (i2 != -1 && Character.isLetterOrDigit(lowerCase.charAt(i2))) {
                arrayList2.add(new Pair<>(Integer.valueOf(i2), Integer.valueOf(matcher.start())));
            }
            i = matcher.start();
        }
        for (int i3 = 0; i3 < arrayList2.size(); i3++) {
            Pair<Integer> pair = arrayList2.get(i3);
            Iterator<Integer> it = getMatchIds(arrayList2, i3, lowerCase).iterator();
            while (it.hasNext()) {
                int intValue = it.next().intValue();
                String substring = this.ignoreCase ? str.substring(pair.getX().intValue(), pair.getX().intValue() + this.terms[intValue].length()) : this.terms[intValue];
                Mention mention = new Mention((String[]) this.termToIdsMap[intValue].clone(), pair.getX().intValue(), pair.getX().intValue() + substring.length(), substring);
                mention.setDocid(id);
                arrayList.add(mention);
            }
        }
        return arrayList;
    }

    private List<Integer> getMatchIds(List<Pair<Integer>> list, int i, String str) {
        Pair<Integer> pair = list.get(i);
        LinkedList linkedList = new LinkedList();
        int i2 = 0;
        do {
            String substring = str.substring(pair.getX().intValue(), list.get(i + i2).getY().intValue());
            int binarySearch = Arrays.binarySearch(this.terms, substring);
            if (binarySearch < 0) {
                if ((-binarySearch) - 1 < this.terms.length) {
                    if (!this.terms[(-binarySearch) - 1].startsWith(substring)) {
                        break;
                    }
                } else {
                    break;
                }
            } else {
                linkedList.add(Integer.valueOf(binarySearch));
            }
            i2++;
        } while (i + i2 < list.size());
        return linkedList;
    }

    @Override // martin.common.Sizeable
    public long sizeof() {
        if (this.terms == null || this.termToIdsMap == null) {
            init();
        }
        if (this.size != -1) {
            return this.size;
        }
        long j = 0;
        for (int i = 0; i < this.terms.length; i++) {
            j += r0[i].length() * 2;
        }
        for (int i2 = 0; i2 < this.termToIdsMap.length; i2++) {
            for (int i3 = 0; i3 < this.termToIdsMap[i2].length; i3++) {
                j += this.termToIdsMap[i2][i3].length() * 2;
            }
        }
        this.size = j;
        return j;
    }
}
