package org.jsoar.kernel.exploration;

import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.ListIterator;
import java.util.Map;
import org.jsoar.kernel.Agent;
import org.jsoar.kernel.exploration.ExplorationParameter;
import org.jsoar.kernel.learning.rl.ReinforcementLearning;
import org.jsoar.kernel.learning.rl.ReinforcementLearningParams;
import org.jsoar.kernel.memory.Preference;
import org.jsoar.kernel.memory.PreferenceType;
import org.jsoar.kernel.memory.Slot;
import org.jsoar.kernel.symbols.DoubleSymbolImpl;
import org.jsoar.kernel.symbols.SymbolImpl;
import org.jsoar.kernel.tracing.Trace;
import org.jsoar.util.adaptables.Adaptables;

/* loaded from: input_file:org/jsoar/kernel/exploration/Exploration.class */
public class Exploration {
    private final Agent context;
    private ReinforcementLearning rl;
    private Policy userSelectMode = Policy.USER_SELECT_SOFTMAX;
    private boolean autoUpdate = false;
    private NumericIndifferentMode numeric_indifferent_mode = NumericIndifferentMode.NUMERIC_INDIFFERENT_MODE_SUM;
    private Map<String, ExplorationParameter> parameters = new HashMap();

    /* loaded from: input_file:org/jsoar/kernel/exploration/Exploration$NumericIndifferentMode.class */
    public enum NumericIndifferentMode {
        NUMERIC_INDIFFERENT_MODE_AVG("avg"),
        NUMERIC_INDIFFERENT_MODE_SUM("sum");

        private final String modeName;

        NumericIndifferentMode(String str) {
            this.modeName = str;
        }

        public String getModeName() {
            return this.modeName;
        }

        public static NumericIndifferentMode findNumericIndifferentMode(String str) {
            for (NumericIndifferentMode numericIndifferentMode : values()) {
                if (numericIndifferentMode.modeName.equals(str)) {
                    return numericIndifferentMode;
                }
            }
            return null;
        }
    }

    /* loaded from: input_file:org/jsoar/kernel/exploration/Exploration$Policy.class */
    public enum Policy {
        USER_SELECT_BOLTZMANN("boltzmann"),
        USER_SELECT_E_GREEDY("epsilon-greedy"),
        USER_SELECT_FIRST("first"),
        USER_SELECT_LAST("last"),
        USER_SELECT_RANDOM("random-uniform"),
        USER_SELECT_SOFTMAX("softmax");

        private final String policyName;

        Policy(String str) {
            this.policyName = str;
        }

        public String getPolicyName() {
            return this.policyName;
        }

        public static Policy findPolicy(String str) {
            for (Policy policy : values()) {
                if (policy.policyName.equals(str)) {
                    return policy;
                }
            }
            return null;
        }
    }

    public Exploration(Agent agent) {
        this.context = agent;
        exploration_add_parameter(0.1d, new ExplorationValidateEpsilon(), "epsilon");
        exploration_add_parameter(25.0d, new ExplorationValidateTemperature(), "temperature");
    }

    public void initialize() {
        this.rl = (ReinforcementLearning) Adaptables.adapt(this.context, ReinforcementLearning.class);
    }

    public boolean exploration_set_policy(String str) {
        Policy findPolicy = Policy.findPolicy(str);
        if (findPolicy != null) {
            return exploration_set_policy(findPolicy);
        }
        return false;
    }

    public boolean exploration_set_policy(Policy policy) {
        if (policy == null) {
            return false;
        }
        this.userSelectMode = policy;
        return true;
    }

    public Policy exploration_get_policy() {
        return this.userSelectMode;
    }

    public boolean exploration_set_numeric_indifferent_mode(String str) {
        NumericIndifferentMode findNumericIndifferentMode = NumericIndifferentMode.findNumericIndifferentMode(str);
        if (findNumericIndifferentMode != null) {
            return exploration_set_numeric_indifferent_mode(findNumericIndifferentMode);
        }
        return false;
    }

    public boolean exploration_set_numeric_indifferent_mode(NumericIndifferentMode numericIndifferentMode) {
        if (numericIndifferentMode == null) {
            return false;
        }
        this.numeric_indifferent_mode = numericIndifferentMode;
        return true;
    }

    public NumericIndifferentMode exploration_get_numeric_indifferent_mode() {
        return this.numeric_indifferent_mode;
    }

    public ExplorationParameter exploration_add_parameter(double d, ExplorationValueFunction explorationValueFunction, String str) {
        ExplorationParameter explorationParameter = new ExplorationParameter();
        explorationParameter.value = d;
        explorationParameter.name = str;
        explorationParameter.reduction_policy = ExplorationParameter.ReductionPolicy.EXPLORATION_REDUCTION_EXPONENTIAL;
        explorationParameter.val_func = explorationValueFunction;
        explorationParameter.rates.put(ExplorationParameter.ReductionPolicy.EXPLORATION_REDUCTION_EXPONENTIAL, Double.valueOf(1.0d));
        explorationParameter.rates.put(ExplorationParameter.ReductionPolicy.EXPLORATION_REDUCTION_LINEAR, Double.valueOf(0.0d));
        this.parameters.put(str, explorationParameter);
        return explorationParameter;
    }

    public double exploration_get_parameter_value(String str) {
        ExplorationParameter explorationParameter = this.parameters.get(str);
        if (explorationParameter != null) {
            return explorationParameter.value;
        }
        return 0.0d;
    }

    public boolean exploration_valid_parameter(String str) {
        return this.parameters.get(str) != null;
    }

    public boolean exploration_valid_parameter_value(String str, double d) {
        ExplorationParameter explorationParameter = this.parameters.get(str);
        if (explorationParameter == null) {
            return false;
        }
        return explorationParameter.val_func.call(d);
    }

    boolean exploration_valid_parameter_value(ExplorationParameter explorationParameter, double d) {
        if (explorationParameter != null) {
            return explorationParameter.val_func.call(d);
        }
        return false;
    }

    public boolean exploration_set_parameter_value(String str, double d) {
        ExplorationParameter explorationParameter = this.parameters.get(str);
        if (explorationParameter == null) {
            return false;
        }
        explorationParameter.value = d;
        return true;
    }

    boolean exploration_set_parameter_value(ExplorationParameter explorationParameter, double d) {
        if (explorationParameter == null) {
            return false;
        }
        explorationParameter.value = d;
        return true;
    }

    public boolean exploration_get_auto_update() {
        return this.autoUpdate;
    }

    public boolean exploration_set_auto_update(boolean z) {
        this.autoUpdate = z;
        return true;
    }

    public void exploration_update_parameters() {
        if (exploration_get_auto_update()) {
            Iterator<ExplorationParameter> it = this.parameters.values().iterator();
            while (it.hasNext()) {
                it.next().update();
            }
        }
    }

    public ExplorationParameter.ReductionPolicy exploration_get_reduction_policy(String str) {
        ExplorationParameter explorationParameter = this.parameters.get(str);
        if (explorationParameter != null) {
            return explorationParameter.reduction_policy;
        }
        return null;
    }

    ExplorationParameter.ReductionPolicy exploration_get_reduction_policy(ExplorationParameter explorationParameter) {
        if (explorationParameter != null) {
            return explorationParameter.reduction_policy;
        }
        return null;
    }

    public boolean exploration_valid_reduction_policy(String str, String str2) {
        return (this.parameters.get(str) == null || ExplorationParameter.ReductionPolicy.findPolicy(str2) == null) ? false : true;
    }

    public boolean exploration_set_reduction_policy(String str, String str2) {
        ExplorationParameter.ReductionPolicy findPolicy;
        ExplorationParameter explorationParameter = this.parameters.get(str);
        if (explorationParameter == null || (findPolicy = ExplorationParameter.ReductionPolicy.findPolicy(str2)) == null) {
            return false;
        }
        explorationParameter.reduction_policy = findPolicy;
        return true;
    }

    public double exploration_get_reduction_rate(String str, String str2) {
        ExplorationParameter.ReductionPolicy findPolicy;
        ExplorationParameter explorationParameter = this.parameters.get(str);
        if (explorationParameter == null || (findPolicy = ExplorationParameter.ReductionPolicy.findPolicy(str2)) == null) {
            return 0.0d;
        }
        return explorationParameter.getReductionRate(findPolicy);
    }

    public boolean exploration_set_reduction_rate(String str, String str2, double d) {
        ExplorationParameter.ReductionPolicy findPolicy;
        ExplorationParameter explorationParameter = this.parameters.get(str);
        if (explorationParameter == null || (findPolicy = ExplorationParameter.ReductionPolicy.findPolicy(str2)) == null) {
            return false;
        }
        return explorationParameter.setReductionRate(findPolicy, d);
    }

    public Preference exploration_choose_according_to_policy(Slot slot, Preference preference) {
        Policy exploration_get_policy = exploration_get_policy();
        Preference preference2 = preference;
        while (true) {
            Preference preference3 = preference2;
            if (preference3 == null) {
                break;
            }
            exploration_compute_value_of_candidate(preference3, slot, 0.0d);
            preference2 = preference3.next_candidate;
        }
        boolean rl_enabled = this.rl.rl_enabled();
        ReinforcementLearningParams.LearningPolicy learningPolicy = rl_enabled ? (ReinforcementLearningParams.LearningPolicy) this.context.getProperties().get(ReinforcementLearningParams.LEARNING_POLICY) : ReinforcementLearningParams.LearningPolicy.q;
        double d = preference.numeric_value;
        boolean z = preference.rl_contribution;
        if (rl_enabled && learningPolicy == ReinforcementLearningParams.LearningPolicy.q) {
            Preference preference4 = preference;
            while (true) {
                Preference preference5 = preference4;
                if (preference5 == null) {
                    break;
                }
                if (preference5.numeric_value > d) {
                    d = preference5.numeric_value;
                    z = preference5.rl_contribution;
                }
                preference4 = preference5.next_candidate;
            }
        }
        Preference preference6 = null;
        switch (exploration_get_policy) {
            case USER_SELECT_FIRST:
                preference6 = preference;
                break;
            case USER_SELECT_LAST:
                Preference preference7 = preference;
                while (true) {
                    preference6 = preference7;
                    if (preference6.next_candidate == null) {
                        break;
                    } else {
                        preference7 = preference6.next_candidate;
                    }
                }
            case USER_SELECT_RANDOM:
                preference6 = exploration_randomly_select(preference);
                break;
            case USER_SELECT_SOFTMAX:
                preference6 = exploration_probabilistically_select(preference);
                break;
            case USER_SELECT_E_GREEDY:
                preference6 = exploration_epsilon_greedy_select(preference);
                break;
            case USER_SELECT_BOLTZMANN:
                preference6 = exploration_boltzmann_select(preference);
                break;
        }
        if (rl_enabled) {
            this.rl.rl_tabulate_reward_values();
            if (learningPolicy == ReinforcementLearningParams.LearningPolicy.sarsa) {
                this.rl.rl_perform_update(preference6.numeric_value, preference6.rl_contribution, slot.id);
            } else if (learningPolicy == ReinforcementLearningParams.LearningPolicy.q) {
                this.rl.rl_perform_update(d, z, slot.id);
                if (preference6.numeric_value != d) {
                    ReinforcementLearning.rl_watkins_clear(slot.id);
                }
            }
        }
        return preference6;
    }

    private Preference exploration_randomly_select(Preference preference) {
        return Preference.getCandidate(preference, this.context.getRandom().nextInt(Preference.countCandidates(preference)));
    }

    private Preference exploration_probabilistically_select(Preference preference) {
        double d = 0.0d;
        Preference preference2 = preference;
        while (true) {
            Preference preference3 = preference2;
            if (preference3 == null) {
                break;
            }
            if (preference3.numeric_value > 0.0d) {
                d += preference3.numeric_value;
            }
            preference2 = preference3.next_candidate;
        }
        if (d == 0.0d) {
            return exploration_randomly_select(preference);
        }
        double nextDouble = this.context.getRandom().nextDouble() * d;
        double d2 = 0.0d;
        Preference preference4 = preference;
        while (true) {
            Preference preference5 = preference4;
            if (preference5 == null) {
                return null;
            }
            if (preference5.numeric_value > 0.0d) {
                d2 += preference5.numeric_value;
                if (nextDouble <= d2) {
                    return preference5;
                }
            }
            preference4 = preference5.next_candidate;
        }
    }

    Preference exploration_boltzmann_select(Preference preference) {
        double exploration_get_parameter_value = exploration_get_parameter_value("temperature");
        double d = preference.numeric_value;
        Preference preference2 = preference.next_candidate;
        while (true) {
            Preference preference3 = preference2;
            if (preference3 == null) {
                break;
            }
            if (d < preference3.numeric_value) {
                d = preference3.numeric_value;
            }
            preference2 = preference3.next_candidate;
        }
        double d2 = 0.0d;
        LinkedList linkedList = new LinkedList();
        Preference preference4 = preference;
        while (true) {
            Preference preference5 = preference4;
            if (preference5 == null) {
                break;
            }
            double exp = Math.exp((preference5.numeric_value - d) / exploration_get_parameter_value);
            linkedList.add(Double.valueOf(exp));
            d2 += exp;
            preference4 = preference5.next_candidate;
        }
        Trace trace = this.context.getTrace();
        if (trace.isEnabled(Trace.Category.INDIFFERENT)) {
            ListIterator listIterator = linkedList.listIterator();
            Preference preference6 = preference;
            while (true) {
                Preference preference7 = preference6;
                if (preference7 == null) {
                    break;
                }
                double doubleValue = ((Double) listIterator.next()).doubleValue() / d2;
                trace.print("\n Candidate %s:  ", preference7.value);
                trace.print("Value (Sum) = %f, (Prob) = %f", Double.valueOf(preference7.numeric_value), Double.valueOf(doubleValue));
                preference6 = preference7.next_candidate;
            }
        }
        double nextDouble = this.context.getRandom().nextDouble() * d2;
        double d3 = 0.0d;
        linkedList.listIterator();
        ListIterator listIterator2 = linkedList.listIterator();
        for (Preference preference8 = preference; preference8 != null; preference8 = preference8.next_candidate) {
            d3 += ((Double) listIterator2.next()).doubleValue();
            if (d3 >= nextDouble) {
                return preference8;
            }
        }
        return null;
    }

    private Preference exploration_epsilon_greedy_select(Preference preference) {
        double exploration_get_parameter_value = exploration_get_parameter_value("epsilon");
        Trace trace = this.context.getTrace();
        if (trace.isEnabled(Trace.Category.INDIFFERENT)) {
            Preference preference2 = preference;
            while (true) {
                Preference preference3 = preference2;
                if (preference3 == null) {
                    break;
                }
                trace.print("\n Candidate %s:  Value (Sum) = %f", preference3.value, Double.valueOf(preference3.numeric_value));
                preference2 = preference3.next_candidate;
            }
        }
        return this.context.getRandom().nextDouble() < exploration_get_parameter_value ? exploration_randomly_select(preference) : exploration_get_highest_q_value_pref(preference);
    }

    private Preference exploration_get_highest_q_value_pref(Preference preference) {
        Preference preference2;
        Preference preference3 = preference;
        double d = preference.numeric_value;
        int i = 0;
        Preference preference4 = preference;
        while (true) {
            Preference preference5 = preference4;
            if (preference5 == null) {
                break;
            }
            if (preference5.numeric_value > d) {
                d = preference5.numeric_value;
                preference3 = preference5;
                i = 1;
            } else if (preference5.numeric_value == d) {
                i++;
            }
            preference4 = preference5.next_candidate;
        }
        if (i == 1) {
            return preference3;
        }
        int nextInt = this.context.getRandom().nextInt(i);
        Preference preference6 = preference;
        while (true) {
            preference2 = preference6;
            if (preference2.numeric_value == d) {
                break;
            }
            preference6 = preference2.next_candidate;
        }
        while (nextInt != 0) {
            preference2 = preference2.next_candidate;
            nextInt--;
            while (preference2.numeric_value != d) {
                preference2 = preference2.next_candidate;
            }
        }
        return preference2;
    }

    public void exploration_compute_value_of_candidate(Preference preference, Slot slot, double d) {
        if (preference == null) {
            return;
        }
        preference.total_preferences_for_candidate = 0;
        preference.numeric_value = 0.0d;
        preference.rl_contribution = false;
        Preference preferencesByType = slot.getPreferencesByType(PreferenceType.NUMERIC_INDIFFERENT);
        while (true) {
            Preference preference2 = preferencesByType;
            if (preference2 == null) {
                break;
            }
            if (preference.value == preference2.value) {
                preference.total_preferences_for_candidate++;
                preference.numeric_value += get_number_from_symbol(preference2.referent);
                if (preference2.inst.prod.rlRuleInfo != null) {
                    preference.rl_contribution = true;
                }
            }
            preferencesByType = preference2.next;
        }
        Preference preferencesByType2 = slot.getPreferencesByType(PreferenceType.BINARY_INDIFFERENT);
        while (true) {
            Preference preference3 = preferencesByType2;
            if (preference3 == null) {
                break;
            }
            if (preference.value == preference3.value) {
                preference.total_preferences_for_candidate++;
                preference.numeric_value += get_number_from_symbol(preference3.referent);
            }
            preferencesByType2 = preference3.next;
        }
        if (preference.total_preferences_for_candidate == 0) {
            preference.numeric_value = d;
            preference.total_preferences_for_candidate = 1;
        }
        if (this.numeric_indifferent_mode == NumericIndifferentMode.NUMERIC_INDIFFERENT_MODE_AVG) {
            preference.numeric_value /= preference.total_preferences_for_candidate;
        }
    }

    public static double get_number_from_symbol(SymbolImpl symbolImpl) {
        DoubleSymbolImpl asDouble = symbolImpl.asDouble();
        if (asDouble != null) {
            return asDouble.getValue();
        }
        if (symbolImpl.asInteger() != null) {
            return r0.getValue();
        }
        return 0.0d;
    }
}
