package de.gwdg.metadataqa.api.uniqueness;

import com.jayway.jsonpath.Configuration;
import com.jayway.jsonpath.JsonPath;
import com.jayway.jsonpath.Predicate;
import com.jayway.jsonpath.spi.json.JsonProvider;
import de.gwdg.metadataqa.api.calculator.language.Multilinguality;
import de.gwdg.metadataqa.api.counter.FieldCounter;
import de.gwdg.metadataqa.api.json.DataElement;
import de.gwdg.metadataqa.api.schema.Schema;
import de.gwdg.metadataqa.api.util.Converter;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

/* loaded from: input_file:de/gwdg/metadataqa/api/uniqueness/TfIdfExtractor.class */
public class TfIdfExtractor {
    private static final JsonProvider JSON_PROVIDER = Configuration.defaultConfiguration().jsonProvider();
    private final Schema schema;
    private Map<String, List<TfIdf>> termsCollection;

    public TfIdfExtractor(Schema schema) {
        this.schema = schema;
    }

    public FieldCounter<Double> extract(String str, String str2) {
        return extract(str, str2, false);
    }

    public FieldCounter<Double> extract(String str, String str2, boolean z) {
        FieldCounter<Double> fieldCounter = new FieldCounter<>();
        this.termsCollection = new LinkedHashMap();
        LinkedHashMap linkedHashMap = (LinkedHashMap) JsonPath.read(JSON_PROVIDER.parse(str), String.format("$.termVectors.['%s']", str2), new Predicate[0]);
        for (DataElement dataElement : this.schema.getIndexFields()) {
            if (z) {
                this.termsCollection.put(dataElement.getPath(), new ArrayList());
            }
            String indexField = dataElement.getIndexField();
            double d = 0.0d;
            double d2 = 0.0d;
            if (linkedHashMap.containsKey(indexField)) {
                for (Map.Entry entry : ((LinkedHashMap) linkedHashMap.get(indexField)).entrySet()) {
                    String str3 = (String) entry.getKey();
                    Map map = (Map) entry.getValue();
                    double doubleValue = Converter.asDouble(map.get("tf-idf")).doubleValue();
                    if (z) {
                        this.termsCollection.get(dataElement.getLabel()).add(new TfIdf(str3, Converter.asInteger(map.get("tf")).intValue(), Converter.asInteger(map.get("df")).intValue(), doubleValue));
                    }
                    d += doubleValue;
                    d2 += 1.0d;
                }
            }
            double d3 = d2 > Multilinguality.NORMALIZED_LOW ? d / d2 : Multilinguality.NORMALIZED_LOW;
            fieldCounter.put(dataElement.getLabel() + ":sum", Double.valueOf(d));
            fieldCounter.put(dataElement.getLabel() + ":avg", Double.valueOf(d3));
        }
        return fieldCounter;
    }

    public Map<String, List<TfIdf>> getTermsCollection() {
        return this.termsCollection;
    }
}
