package org.apache.flink.ml.feature.regextokenizer;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.flink.ml.api.Transformer;
import org.apache.flink.ml.param.Param;
import org.apache.flink.ml.util.ParamUtils;
import org.apache.flink.ml.util.ReadWriteUtils;
import org.apache.flink.table.api.Expressions;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.expressions.Expression;
import org.apache.flink.table.functions.ScalarFunction;
import org.apache.flink.util.Preconditions;

/* loaded from: input_file:org/apache/flink/ml/feature/regextokenizer/RegexTokenizer.class */
public class RegexTokenizer implements Transformer<RegexTokenizer>, RegexTokenizerParams<RegexTokenizer> {
    private final Map<Param<?>, Object> paramMap = new HashMap();

    /* loaded from: input_file:org/apache/flink/ml/feature/regextokenizer/RegexTokenizer$RegexTokenizerUdf.class */
    public static class RegexTokenizerUdf extends ScalarFunction {
        public String[] eval(String str, String str2, Boolean bool, boolean z, int i) {
            Pattern compile = Pattern.compile(str2);
            String lowerCase = z ? str.toLowerCase() : str;
            ArrayList arrayList = new ArrayList();
            if (bool.booleanValue()) {
                for (String str3 : compile.split(lowerCase)) {
                    if (str3.length() >= i) {
                        arrayList.add(str3);
                    }
                }
            } else {
                Matcher matcher = compile.matcher(lowerCase);
                while (matcher.find()) {
                    String group = matcher.group();
                    if (group.length() >= i) {
                        arrayList.add(group);
                    }
                }
            }
            return (String[]) arrayList.toArray(new String[0]);
        }
    }

    public RegexTokenizer() {
        ParamUtils.initializeMapWithDefaultValues(this.paramMap, this);
    }

    public Table[] transform(Table... tableArr) {
        Preconditions.checkArgument(tableArr.length == 1);
        return new Table[]{tableArr[0].addColumns(new Expression[]{(Expression) Expressions.call(RegexTokenizerUdf.class, new Object[]{Expressions.$(getInputCol()), getPattern(), getGaps(), getToLowercase(), Integer.valueOf(getMinTokenLength())}).as(getOutputCol(), new String[0])})};
    }

    public void save(String str) throws IOException {
        ReadWriteUtils.saveMetadata(this, str);
    }

    public Map<Param<?>, Object> getParamMap() {
        return this.paramMap;
    }

    public static RegexTokenizer load(StreamTableEnvironment streamTableEnvironment, String str) throws IOException {
        return ReadWriteUtils.loadStageParam(str);
    }
}
