package gate.learning;

import gate.Utils;
import gate.util.BomStrippingInputStreamReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;

/* loaded from: input_file:gate/learning/NLPFeaturesList.class */
public class NLPFeaturesList {
    public Map<String, Long> featuresList;
    public Map<String, Long> idfFeatures;
    int totalNumDocs = 0;
    public static final String SYMBOLNGARM = "<>";

    public NLPFeaturesList() {
        this.featuresList = null;
        this.idfFeatures = null;
        this.featuresList = new HashMap();
        this.idfFeatures = new HashMap();
    }

    public void loadFromFile(File file, String str, String str2) {
        File file2 = new File(file, str);
        if (!file2.exists()) {
            if (LogService.minVerbosityLevel > 0) {
                System.out.println("No feature list file in initialisation phrase.");
                return;
            }
            return;
        }
        try {
            BomStrippingInputStreamReader bomStrippingInputStreamReader = new BomStrippingInputStreamReader(new FileInputStream(file2), str2);
            String readLine = bomStrippingInputStreamReader.readLine();
            if (readLine != null) {
                this.totalNumDocs = new Integer(readLine.substring(readLine.lastIndexOf("=") + 1)).intValue();
            }
            while (true) {
                String readLine2 = bomStrippingInputStreamReader.readLine();
                if (readLine2 == null) {
                    bomStrippingInputStreamReader.close();
                    return;
                } else {
                    String[] split = readLine2.split(" ");
                    this.featuresList.put(split[0], new Long(split[1]));
                    this.idfFeatures.put(split[0], new Long(split[2]));
                }
            }
        } catch (IOException e) {
        }
    }

    public void writeListIntoFile(File file, String str, String str2) {
        File file2 = new File(file, str);
        if (LogService.minVerbosityLevel > 1) {
            System.out.println("Lengh of List = " + this.featuresList.size());
        }
        try {
            PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(new FileOutputStream(file2), str2));
            printWriter.println("totalNumDocs=" + this.totalNumDocs);
            ArrayList<String> arrayList = new ArrayList(this.featuresList.keySet());
            Collections.sort(arrayList);
            for (String str3 : arrayList) {
                printWriter.println(str3 + " " + this.featuresList.get(str3) + " " + this.idfFeatures.get(str3));
            }
            printWriter.close();
        } catch (IOException e) {
        }
    }

    public void addFeaturesFromDoc(NLPFeaturesOfDoc nLPFeaturesOfDoc) {
        long size = this.featuresList.size();
        for (int i = 0; i < nLPFeaturesOfDoc.numInstances; i++) {
            if (nLPFeaturesOfDoc.featuresInLine[i] != null) {
                String[] split = nLPFeaturesOfDoc.featuresInLine[i].toString().trim().split(ConstantParameters.ITEMSEPARATOR);
                for (int i2 = 0; i2 < split.length; i2++) {
                    if (split[i2] != null && Pattern.matches(".+\\[[-0-9]+\\]$", split[i2])) {
                        split[i2] = split[i2].substring(0, split[i2].lastIndexOf(91));
                    }
                    if (!split[i2].equals("")) {
                        String str = split[i2];
                        if (str.contains(SYMBOLNGARM)) {
                            str = str.substring(0, str.lastIndexOf(SYMBOLNGARM));
                        } else if (!str.equals(ConstantParameters.NAMENONFEATURE) && str.charAt(ConstantParameters.ITEMSEPREPLACEMENT.length()) != 'N') {
                            str = str.substring(0, str.lastIndexOf(ConstantParameters.ITEMSEPREPLACEMENT));
                        }
                        if (str.equals(ConstantParameters.NAMENONFEATURE)) {
                            continue;
                        } else if (size >= ConstantParameters.MAXIMUMFEATURES) {
                            if (Utils.isLoggedOnce("There are more NLP features from the training docuements than the pre-defined maximal number900000")) {
                                return;
                            }
                            System.out.println("There are more NLP features from the training docuements than the pre-defined maximal number900000");
                            return;
                        } else if (this.featuresList.containsKey(str)) {
                            this.idfFeatures.put(str, Long.valueOf(this.idfFeatures.get(str).longValue() + 1));
                        } else {
                            size++;
                            this.featuresList.put(str, Long.valueOf(size));
                            this.idfFeatures.put(str, 1L);
                        }
                    }
                }
            }
        }
        this.totalNumDocs += nLPFeaturesOfDoc.numInstances;
    }

    public void clearAllData() {
        this.featuresList.clear();
        this.idfFeatures.clear();
    }

    public void writeToLM(File file, String str, int i) {
        File file2 = new File(file, str);
        if (LogService.minVerbosityLevel > 1) {
            System.out.println("Lengh of List = " + this.featuresList.size());
        }
        try {
            PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(new FileOutputStream(file2), "UTF-8"));
            printWriter.println("## The following " + i + "-gram were obtained from " + this.totalNumDocs + " documents or examples");
            ArrayList arrayList = new ArrayList(this.featuresList.keySet());
            int size = arrayList.size();
            float[] fArr = new float[size];
            for (int i2 = 0; i2 < size; i2++) {
                fArr[i2] = Float.parseFloat(this.idfFeatures.get(arrayList.get(i2)).toString());
            }
            int[] iArr = new int[size];
            LightWeightLearningApi.sortFloatAscIndex(fArr, iArr, size, size);
            for (int i3 = 0; i3 < size; i3++) {
                String obj = arrayList.get(iArr[i3]).toString();
                if (obj.contains(SYMBOLNGARM)) {
                    printWriter.println(obj.substring(obj.indexOf("_", 1) + 1, obj.lastIndexOf(SYMBOLNGARM)) + " " + ((int) fArr[iArr[i3]]));
                }
            }
            printWriter.close();
        } catch (IOException e) {
        }
    }
}
