package de.julielab.jcore.ae.lingpipegazetteer.utils;

import com.aliasi.tokenizer.Tokenizer;
import com.aliasi.tokenizer.TokenizerFactory;
import com.ibm.icu.text.Transliterator;
import java.util.ArrayDeque;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

/* loaded from: input_file:de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.class */
public class StringNormalizerForChunking {
    private static Set<Character> charsToDelete = new HashSet();

    /* loaded from: input_file:de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking$Mode.class */
    public enum Mode {
        DELETE,
        REPLACE
    }

    /* loaded from: input_file:de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking$NormalizedString.class */
    public static class NormalizedString {
        public String string;
        private Map<Integer, Integer> offsetMap = new HashMap();
        private TreeSet<Integer> normalizedOffsetSet;

        public Map<Integer, Integer> getOffsetMap() {
            return this.offsetMap;
        }

        public Integer getOriginalOffset(int i) {
            Integer num = this.offsetMap.get(Integer.valueOf(i));
            if (num == null) {
                num = deriveOriginalOffset(i);
                this.offsetMap.put(Integer.valueOf(i), num);
            }
            return num;
        }

        private Integer deriveOriginalOffset(int i) {
            if (this.normalizedOffsetSet == null) {
                this.normalizedOffsetSet = new TreeSet<>(this.offsetMap.keySet());
            }
            Integer floor = this.normalizedOffsetSet.floor(Integer.valueOf(i));
            Integer num = this.offsetMap.get(floor);
            int abs = Math.abs(num.intValue() - floor.intValue());
            return num.intValue() > floor.intValue() ? Integer.valueOf(i + abs) : Integer.valueOf(i - abs);
        }
    }

    public static NormalizedString normalizeString(String str) {
        NormalizedString normalizedString = new NormalizedString();
        StringBuilder sb = new StringBuilder();
        int i = 0;
        for (int i2 = 0; i2 < str.length(); i2++) {
            char charAt = str.charAt(i2);
            if (charsToDelete.contains(Character.valueOf(charAt))) {
                i++;
            } else {
                sb.append(charAt);
            }
            int max = Math.max(0, i2 - i);
            if (null == normalizedString.offsetMap.get(Integer.valueOf(max))) {
                normalizedString.offsetMap.put(Integer.valueOf(max), Integer.valueOf(i2));
            }
        }
        normalizedString.string = sb.toString();
        return normalizedString;
    }

    public static NormalizedString normalizeString(String str, TokenizerFactory tokenizerFactory, Transliterator transliterator) {
        NormalizedString normalizedString = new NormalizedString();
        char[] charArray = str.toCharArray();
        Tokenizer tokenizer = tokenizerFactory.tokenizer(charArray, 0, charArray.length);
        StringBuilder sb = new StringBuilder();
        ArrayDeque arrayDeque = new ArrayDeque();
        HashMap hashMap = new HashMap();
        sb.append(tokenizer.nextWhitespace());
        normalizedString.offsetMap.put(0, 0);
        while (true) {
            String nextToken = tokenizer.nextToken();
            String str2 = nextToken;
            if (nextToken == null) {
                normalizedString.string = sb.toString();
                return normalizedString;
            }
            if (str2.equals("'")) {
                int length = sb.length() + sumOfStack(arrayDeque);
                int length2 = sb.length() + sumOfStack(arrayDeque) + str2.length();
                hashMap.put(Integer.valueOf(length), Integer.valueOf(tokenizer.lastTokenStartPosition()));
                hashMap.put(Integer.valueOf(length2), Integer.valueOf(tokenizer.lastTokenEndPosition()));
                arrayDeque.push(str2 + tokenizer.nextWhitespace());
            } else if (str2.equals("s") && arrayDeque.size() == 1) {
                int length3 = sb.length() + sumOfStack(arrayDeque);
                int length4 = sb.length() + sumOfStack(arrayDeque) + str2.length();
                hashMap.put(Integer.valueOf(length3), Integer.valueOf(tokenizer.lastTokenStartPosition()));
                hashMap.put(Integer.valueOf(length4), Integer.valueOf(tokenizer.lastTokenEndPosition()));
                arrayDeque.push(str2);
                String nextWhitespace = tokenizer.nextWhitespace();
                if (nextWhitespace.length() > 0) {
                    sb.append(nextWhitespace);
                    arrayDeque.clear();
                    hashMap.clear();
                }
            } else {
                if (!arrayDeque.isEmpty()) {
                    Iterator it = arrayDeque.iterator();
                    while (it.hasNext()) {
                        sb.append((String) it.next());
                    }
                    arrayDeque.clear();
                    normalizedString.offsetMap.putAll(hashMap);
                    hashMap.clear();
                }
                if (transliterator != null) {
                    str2 = transliterator.transform(str2);
                }
                sb.append(str2);
                int length5 = sb.length() - str2.length();
                int length6 = sb.length();
                normalizedString.offsetMap.put(Integer.valueOf(length5), Integer.valueOf(tokenizer.lastTokenStartPosition()));
                normalizedString.offsetMap.put(Integer.valueOf(length6), Integer.valueOf(tokenizer.lastTokenEndPosition()));
                sb.append(tokenizer.nextWhitespace());
            }
        }
    }

    private static int sumOfStack(Deque<String> deque) {
        int i = 0;
        Iterator<String> it = deque.iterator();
        while (it.hasNext()) {
            i += it.next().length();
        }
        return i;
    }

    public static NormalizedString normalizeString(String str, TokenizerFactory tokenizerFactory) {
        return normalizeString(str, tokenizerFactory, null);
    }

    static {
        charsToDelete.add('-');
        charsToDelete.add('+');
        charsToDelete.add(',');
        charsToDelete.add('.');
        charsToDelete.add(':');
        charsToDelete.add(';');
        charsToDelete.add('?');
        charsToDelete.add('!');
        charsToDelete.add('*');
        charsToDelete.add((char) 167);
        charsToDelete.add('$');
        charsToDelete.add('%');
        charsToDelete.add('&');
        charsToDelete.add('/');
        charsToDelete.add('\\');
        charsToDelete.add('(');
        charsToDelete.add(')');
        charsToDelete.add('<');
        charsToDelete.add('>');
        charsToDelete.add('[');
        charsToDelete.add(']');
        charsToDelete.add('=');
        charsToDelete.add('\'');
        charsToDelete.add('`');
        charsToDelete.add((char) 180);
        charsToDelete.add('\"');
        charsToDelete.add('#');
    }
}
