package de.gwdg.metadataqa.api.calculator;

import de.gwdg.metadataqa.api.counter.FieldCounter;
import de.gwdg.metadataqa.api.interfaces.Calculator;
import de.gwdg.metadataqa.api.interfaces.MetricResult;
import de.gwdg.metadataqa.api.json.DataElement;
import de.gwdg.metadataqa.api.model.selector.Selector;
import de.gwdg.metadataqa.api.problemcatalog.FieldCounterBasedResult;
import de.gwdg.metadataqa.api.schema.Schema;
import de.gwdg.metadataqa.api.uniqueness.SolrClient;
import de.gwdg.metadataqa.api.uniqueness.SolrConfiguration;
import de.gwdg.metadataqa.api.uniqueness.TfIdf;
import de.gwdg.metadataqa.api.uniqueness.TfIdfExtractor;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apache.commons.io.IOUtils;

/* loaded from: input_file:de/gwdg/metadataqa/api/calculator/TfIdfCalculator.class */
public class TfIdfCalculator implements Calculator, Serializable {
    public static final String CALCULATOR_NAME = "uniqueness";
    private static final int MEGABYTE = 1048576;
    private static final String SOLR_SEARCH_PARAMS = "tvrh/?q=id:\"%s\"&version=2.2&indent=on&qt=tvrh&tv=true&tv.all=true&f.includes.tv.tf=true&tv.fl=dc_title_txt,dc_description_txt,dcterms_alternative_txt&wt=json&json.nl=map&rows=1000&fl=id";
    private SolrConfiguration solrConfiguration;
    private String solrSearchPath;
    private Map<String, List<TfIdf>> termsCollection;
    private boolean termCollectionEnabled = false;
    private Schema schema;
    private SolrClient solrClient;
    private static final Logger LOGGER = Logger.getLogger(TfIdfCalculator.class.getCanonicalName());
    private static final HttpClient HTTP_CLIENT = new HttpClient();

    public TfIdfCalculator() {
    }

    public TfIdfCalculator(Schema schema) {
        this.schema = schema;
    }

    @Override // de.gwdg.metadataqa.api.interfaces.Calculator
    public String getCalculatorName() {
        return "uniqueness";
    }

    @Override // de.gwdg.metadataqa.api.interfaces.Calculator
    public List<MetricResult> measure(Selector selector) {
        String recordId = selector.getRecordId();
        if (recordId.startsWith("/")) {
            recordId = recordId.substring(1);
        }
        String tfIdfResponse = this.solrClient != null ? this.solrClient.getTfIdfResponse(String.format(SOLR_SEARCH_PARAMS, recordId).replace("\"", "%22"), recordId) : getSolrResponse(recordId);
        TfIdfExtractor tfIdfExtractor = new TfIdfExtractor(this.schema);
        FieldCounter<Double> extract = tfIdfExtractor.extract(tfIdfResponse, recordId, this.termCollectionEnabled);
        this.termsCollection = tfIdfExtractor.getTermsCollection();
        return List.of(new FieldCounterBasedResult(getCalculatorName(), extract));
    }

    private String getSolrResponse(String str) {
        String str2 = null;
        GetMethod getMethod = new GetMethod(String.format(getSolrSearchPath(), str).replace("\"", "%22"));
        HttpMethodParams httpMethodParams = new HttpMethodParams();
        httpMethodParams.setIntParameter("http.method.response.buffer.warnlimit", MEGABYTE);
        getMethod.setParams(httpMethodParams);
        try {
            try {
                if (HTTP_CLIENT.executeMethod(getMethod) != 200) {
                    LOGGER.severe("Method failed: " + getMethod.getStatusLine());
                }
                ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
                IOUtils.copy(getMethod.getResponseBodyAsStream(), byteArrayOutputStream);
                str2 = byteArrayOutputStream.toString(StandardCharsets.UTF_8);
                getMethod.releaseConnection();
            } catch (IOException e) {
                LOGGER.severe("Fatal transport error: " + e.getMessage());
                getMethod.releaseConnection();
            } catch (HttpException e2) {
                LOGGER.severe("Fatal protocol violation: " + e2.getMessage());
                getMethod.releaseConnection();
            }
            return str2;
        } catch (Throwable th) {
            getMethod.releaseConnection();
            throw th;
        }
    }

    public Map<String, List<TfIdf>> getTermsCollection() {
        return this.termsCollection;
    }

    public void enableTermCollection(boolean z) {
        this.termCollectionEnabled = z;
    }

    public boolean isTermCollectionEnabled() {
        return this.termCollectionEnabled;
    }

    @Override // de.gwdg.metadataqa.api.interfaces.Calculator
    public List<String> getHeader() {
        ArrayList arrayList = new ArrayList();
        for (DataElement dataElement : this.schema.getIndexFields()) {
            arrayList.add(dataElement.getLabel() + ":sum");
            arrayList.add(dataElement.getLabel() + ":avg");
        }
        return arrayList;
    }

    public void setSolrConfiguration(SolrConfiguration solrConfiguration) {
        this.solrConfiguration = solrConfiguration;
    }

    public String getSolrSearchPath() {
        if (this.solrSearchPath == null) {
            this.solrSearchPath = String.format("http://%s:%s/%s/%s", this.solrConfiguration.getSolrHost(), this.solrConfiguration.getSolrPort(), this.solrConfiguration.getSolrPath(), SOLR_SEARCH_PARAMS);
        }
        return this.solrSearchPath;
    }

    public void setSolrClient(SolrClient solrClient) {
        this.solrClient = solrClient;
    }
}
