package ivory.core.tokenize;

import com.google.common.collect.Maps;
import edu.umd.cloud9.collection.Indexable;
import edu.umd.cloud9.util.array.ArrayListOfInts;
import ivory.core.data.dictionary.Dictionary;
import ivory.core.data.document.TermDocVector;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.log4j.Logger;

/* loaded from: input_file:ivory/core/tokenize/DocumentProcessingUtils.class */
public class DocumentProcessingUtils {
    private static final Logger LOG = Logger.getLogger(DocumentProcessingUtils.class);
    public static short TF_CUT = Short.MAX_VALUE;

    public static SortedMap<Integer, int[]> integerizeTermDocVector(TermDocVector termDocVector, Dictionary dictionary) {
        TreeMap newTreeMap = Maps.newTreeMap();
        try {
            TermDocVector.Reader reader = termDocVector.getReader();
            while (reader.hasMoreTerms()) {
                int id = dictionary.getId(reader.nextTerm());
                if (id > 0) {
                    newTreeMap.put(Integer.valueOf(id), reader.getPositions());
                }
            }
            return newTreeMap;
        } catch (IOException e) {
            throw new RuntimeException("Error getting TermDocVectorReader: " + e.getMessage());
        }
    }

    public static Map<String, ArrayListOfInts> parseDocument(Indexable indexable, Tokenizer tokenizer) {
        HashMap newHashMap = Maps.newHashMap();
        String[] processContent = tokenizer.processContent(indexable.getContent());
        for (int i = 0; i < processContent.length; i++) {
            String str = processContent[i];
            if (str.length() != 0 && str.length() < 127) {
                if (newHashMap.containsKey(str)) {
                    ((ArrayListOfInts) newHashMap.get(str)).add(i + 1);
                } else {
                    ArrayListOfInts arrayListOfInts = new ArrayListOfInts();
                    arrayListOfInts.add(i + 1);
                    newHashMap.put(str, arrayListOfInts);
                }
            }
        }
        int i2 = 0;
        Iterator it = newHashMap.entrySet().iterator();
        while (it.hasNext()) {
            Map.Entry entry = (Map.Entry) it.next();
            ArrayListOfInts arrayListOfInts2 = (ArrayListOfInts) entry.getValue();
            if (arrayListOfInts2.size() >= TF_CUT) {
                LOG.warn("Error: tf of " + entry.getValue() + " will overflow max short value. docno=" + indexable.getDocid() + ", term=" + ((String) entry.getKey()));
                it.remove();
            } else {
                arrayListOfInts2.trimToSize();
                i2 += arrayListOfInts2.size();
            }
        }
        if (newHashMap.size() == 0) {
            return newHashMap;
        }
        newHashMap.put("", new ArrayListOfInts(new int[]{i2}));
        return newHashMap;
    }
}
