package org.jsoar.kernel.learning.rl;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.ListIterator;
import java.util.Map;
import org.jsoar.kernel.Agent;
import org.jsoar.kernel.Decider;
import org.jsoar.kernel.PredefinedSymbols;
import org.jsoar.kernel.Production;
import org.jsoar.kernel.ProductionType;
import org.jsoar.kernel.learning.Chunker;
import org.jsoar.kernel.learning.rl.ReinforcementLearningParams;
import org.jsoar.kernel.lhs.Condition;
import org.jsoar.kernel.lhs.GoalIdTest;
import org.jsoar.kernel.lhs.ImpasseIdTest;
import org.jsoar.kernel.lhs.PositiveCondition;
import org.jsoar.kernel.lhs.Test;
import org.jsoar.kernel.lhs.Tests;
import org.jsoar.kernel.lhs.ThreeFieldCondition;
import org.jsoar.kernel.memory.Instantiation;
import org.jsoar.kernel.memory.Preference;
import org.jsoar.kernel.memory.PreferenceType;
import org.jsoar.kernel.memory.RecognitionMemory;
import org.jsoar.kernel.memory.Slot;
import org.jsoar.kernel.memory.WmeImpl;
import org.jsoar.kernel.rete.ProductionAddResult;
import org.jsoar.kernel.rete.Rete;
import org.jsoar.kernel.rete.Token;
import org.jsoar.kernel.rhs.Action;
import org.jsoar.kernel.rhs.MakeAction;
import org.jsoar.kernel.rhs.ReordererException;
import org.jsoar.kernel.rhs.RhsSymbolValue;
import org.jsoar.kernel.symbols.IdentifierImpl;
import org.jsoar.kernel.symbols.StringSymbolImpl;
import org.jsoar.kernel.symbols.SymbolFactoryImpl;
import org.jsoar.kernel.symbols.SymbolImpl;
import org.jsoar.kernel.symbols.Symbols;
import org.jsoar.kernel.tracing.Printer;
import org.jsoar.kernel.tracing.Trace;
import org.jsoar.util.ByRef;
import org.jsoar.util.DefaultSourceLocation;
import org.jsoar.util.SourceLocation;
import org.jsoar.util.adaptables.Adaptable;
import org.jsoar.util.adaptables.Adaptables;
import org.jsoar.util.markers.DefaultMarker;
import org.jsoar.util.properties.PropertyChangeEvent;
import org.jsoar.util.properties.PropertyListener;
import org.jsoar.util.properties.PropertyManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/jsoar/kernel/learning/rl/ReinforcementLearning.class */
public class ReinforcementLearning {
    private static final Logger logger;
    private final PropertyManager properties;
    private final ReinforcementLearningParams params;
    private static final SourceLocation NEW_PRODUCTION_SOURCE;
    private int rl_template_count;
    private final Agent my_agent;
    private final Adaptable myContext;
    private SymbolFactoryImpl syms;
    private Decider decider;
    private Chunker chunker;
    private RecognitionMemory recMemory;
    private Rete rete;
    private PredefinedSymbols preSyms;
    private Trace trace;
    private Printer printer;
    static final /* synthetic */ boolean $assertionsDisabled;

    public ReinforcementLearning(Adaptable adaptable) {
        this.myContext = adaptable;
        this.properties = ((Agent) this.myContext).getProperties();
        this.params = new ReinforcementLearningParams(this.properties, this.syms);
        this.my_agent = (Agent) this.myContext;
    }

    public ReinforcementLearningParams getParams() {
        return this.params;
    }

    public void initialize() {
        this.syms = (SymbolFactoryImpl) Adaptables.require(getClass(), this.myContext, SymbolFactoryImpl.class);
        this.decider = (Decider) Adaptables.require(getClass(), this.myContext, Decider.class);
        this.chunker = (Chunker) Adaptables.require(getClass(), this.myContext, Chunker.class);
        this.recMemory = (RecognitionMemory) Adaptables.require(getClass(), this.myContext, RecognitionMemory.class);
        this.rete = (Rete) Adaptables.require(getClass(), this.myContext, Rete.class);
        this.preSyms = (PredefinedSymbols) Adaptables.require(getClass(), this.myContext, PredefinedSymbols.class);
        this.trace = (Trace) Adaptables.require(getClass(), this.myContext, Trace.class);
        this.printer = (Printer) Adaptables.require(getClass(), this.myContext, Printer.class);
        this.properties.addListener(ReinforcementLearningParams.LEARNING, new PropertyListener<ReinforcementLearningParams.Learning>() { // from class: org.jsoar.kernel.learning.rl.ReinforcementLearning.1
            @Override // org.jsoar.util.properties.PropertyListener
            public void propertyChanged(PropertyChangeEvent<ReinforcementLearningParams.Learning> propertyChangeEvent) {
                if (propertyChangeEvent.getNewValue() == ReinforcementLearningParams.Learning.off) {
                    ReinforcementLearning.this.rl_reset_data();
                }
            }
        });
        rl_initialize_template_tracking();
    }

    public boolean rl_enabled() {
        return this.params.learning.get() == ReinforcementLearningParams.Learning.on;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void rl_reset_data() {
        IdentifierImpl identifierImpl = this.decider.top_goal;
        while (true) {
            IdentifierImpl identifierImpl2 = identifierImpl;
            if (identifierImpl2 == null) {
                return;
            }
            ReinforcementLearningInfo reinforcementLearningInfo = identifierImpl2.goalInfo.rl_info;
            reinforcementLearningInfo.eligibility_traces.clear();
            reinforcementLearningInfo.prev_op_rl_rules.clear();
            reinforcementLearningInfo.previous_q = 0.0d;
            reinforcementLearningInfo.reward = 0.0d;
            reinforcementLearningInfo.gap_age = 0L;
            reinforcementLearningInfo.hrl_age = 0L;
            identifierImpl = identifierImpl2.goalInfo.lower_goal;
        }
    }

    private void rl_remove_refs_for_prod(Production production) {
        IdentifierImpl identifierImpl = this.decider.top_state;
        while (true) {
            IdentifierImpl identifierImpl2 = identifierImpl;
            if (identifierImpl2 == null) {
                return;
            }
            identifierImpl2.goalInfo.rl_info.eligibility_traces.remove(production);
            ListIterator<Production> listIterator = identifierImpl2.goalInfo.rl_info.prev_op_rl_rules.listIterator();
            while (listIterator.hasNext()) {
                if (listIterator.next() == production) {
                    listIterator.set(null);
                }
            }
            identifierImpl = identifierImpl2.goalInfo.lower_goal;
        }
    }

    static boolean rl_valid_template(Production production) {
        RhsSymbolValue asSymbolValue;
        boolean z = false;
        boolean z2 = false;
        int i = 0;
        Action firstAction = production.getFirstAction();
        while (true) {
            Action action = firstAction;
            if (action == null) {
                break;
            }
            i++;
            MakeAction asMakeAction = action.asMakeAction();
            if (asMakeAction != null) {
                if (action.preference_type == PreferenceType.NUMERIC_INDIFFERENT) {
                    z = true;
                } else if (action.preference_type == PreferenceType.BINARY_INDIFFERENT && (asSymbolValue = asMakeAction.referent.asSymbolValue()) != null && asSymbolValue.getSym().asVariable() != null) {
                    z2 = true;
                }
            }
            firstAction = action.next;
        }
        return i == 1 && (z || z2);
    }

    private static boolean rl_valid_rule(Production production) {
        boolean z = false;
        int i = 0;
        Action firstAction = production.getFirstAction();
        while (true) {
            Action action = firstAction;
            if (action == null) {
                break;
            }
            i++;
            if (action.asMakeAction() != null && action.preference_type == PreferenceType.NUMERIC_INDIFFERENT) {
                z = true;
            }
            firstAction = action.next;
        }
        return z && i == 1;
    }

    private static boolean is_natural_number(String str) {
        for (int i = 0; i < str.length(); i++) {
            if (!Character.isDigit(str.charAt(i))) {
                return false;
            }
        }
        return true;
    }

    private static int rl_get_template_id(String str) {
        int lastIndexOf;
        if (str.length() < 6 || !str.startsWith("rl*") || (lastIndexOf = str.lastIndexOf(42)) == -1 || lastIndexOf == str.length() - 1) {
            return -1;
        }
        String substring = str.substring(lastIndexOf + 1);
        if (is_natural_number(substring)) {
            return Integer.parseInt(substring);
        }
        return -1;
    }

    public void rl_initialize_template_tracking() {
        this.rl_template_count = 1;
    }

    private void rl_update_template_tracking(String str) {
        int rl_get_template_id = rl_get_template_id(str);
        if (rl_get_template_id == -1 || rl_get_template_id <= this.rl_template_count) {
            return;
        }
        this.rl_template_count = rl_get_template_id + 1;
    }

    private int rl_next_template_id() {
        int i = this.rl_template_count;
        this.rl_template_count = i + 1;
        return i;
    }

    private void rl_revert_template_id() {
        this.rl_template_count--;
    }

    private static void rl_get_symbol_constant(SymbolImpl symbolImpl, SymbolImpl symbolImpl2, Map<SymbolImpl, SymbolImpl> map) {
        if (symbolImpl.asVariable() != null) {
            if (symbolImpl2.asIdentifier() == null || symbolImpl2.asIdentifier().smem_lti != 0) {
                map.put(symbolImpl, symbolImpl2);
            }
        }
    }

    private static void rl_get_test_constant(Test test, Test test2, Map<SymbolImpl, SymbolImpl> map) {
        if (Tests.isBlank(test) || test.asEqualityTest() == null) {
            return;
        }
        rl_get_symbol_constant(test.asEqualityTest().getReferent(), test2.asEqualityTest().getReferent(), map);
    }

    private void rl_get_template_constants(Condition condition, Condition condition2, Map<SymbolImpl, SymbolImpl> map) {
        Condition condition3 = condition;
        Condition condition4 = condition2;
        while (true) {
            Condition condition5 = condition4;
            if (condition3 == null) {
                return;
            }
            ThreeFieldCondition asThreeFieldCondition = condition3.asThreeFieldCondition();
            if (asThreeFieldCondition != null) {
                rl_get_test_constant(asThreeFieldCondition.id_test, condition5.asThreeFieldCondition().id_test, map);
                rl_get_test_constant(asThreeFieldCondition.attr_test, condition5.asThreeFieldCondition().attr_test, map);
                rl_get_test_constant(asThreeFieldCondition.value_test, condition5.asThreeFieldCondition().value_test, map);
            } else if (condition3.asConjunctiveNegationCondition() != null) {
                rl_get_template_constants(condition3.asConjunctiveNegationCondition().top, condition5.asConjunctiveNegationCondition().top, map);
            }
            condition3 = condition3.next;
            condition4 = condition5.next;
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    public SymbolImpl rl_build_template_instantiation(Instantiation instantiation, Token token, WmeImpl wmeImpl) {
        String str;
        StringSymbolImpl stringSymbolImpl = null;
        RLTemplateInfo rLTemplateInfo = instantiation.prod.rlTemplateInfo;
        if (rLTemplateInfo.rl_template_conds == null) {
            rLTemplateInfo.rl_template_conds = this.rete.p_node_to_conditions_and_nots(instantiation.prod.getReteNode(), null, null, false).top;
        }
        if (rLTemplateInfo.rl_template_instantiations == null) {
            rLTemplateInfo.rl_template_instantiations = new HashSet();
        }
        HashMap hashMap = new HashMap();
        rl_get_template_constants(rLTemplateInfo.rl_template_conds, instantiation.top_of_instantiated_conditions, hashMap);
        if (rLTemplateInfo.rl_template_instantiations.add(hashMap)) {
            Production production = instantiation.prod;
            Action firstAction = production.getFirstAction();
            do {
                str = "rl*" + production.getName() + "*" + rl_next_template_id();
            } while (this.syms.findString(str) != null);
            StringSymbolImpl createString = this.syms.createString(str);
            ByRef create = ByRef.create(null);
            ByRef create2 = ByRef.create(null);
            Condition.copy_condition_list(instantiation.top_of_instantiated_conditions, create, create2);
            rl_add_goal_or_impasse_tests_to_conds((Condition) create.value);
            this.syms.getVariableGenerator().reset((Condition) create.value, null);
            this.chunker.variablization_tc = DefaultMarker.create();
            this.chunker.variablize_condition_list((Condition) create.value);
            this.chunker.variablize_nots_and_insert_into_conditions(instantiation.nots, (Condition) create.value);
            IdentifierImpl asIdentifier = this.recMemory.instantiate_rhs_value(firstAction.asMakeAction().id, -1, 's', token, wmeImpl).asIdentifier();
            SymbolImpl instantiate_rhs_value = this.recMemory.instantiate_rhs_value(firstAction.asMakeAction().attr, asIdentifier.level, 'a', token, wmeImpl);
            char firstLetter = instantiate_rhs_value.getFirstLetter();
            SymbolImpl instantiate_rhs_value2 = this.recMemory.instantiate_rhs_value(firstAction.asMakeAction().value, asIdentifier.level, firstLetter, token, wmeImpl);
            SymbolImpl instantiate_rhs_value3 = this.recMemory.instantiate_rhs_value(firstAction.asMakeAction().referent, asIdentifier.level, firstLetter, token, wmeImpl);
            MakeAction rl_make_simple_action = rl_make_simple_action(asIdentifier, instantiate_rhs_value, instantiate_rhs_value2, instantiate_rhs_value3);
            rl_make_simple_action.preference_type = PreferenceType.NUMERIC_INDIFFERENT;
            Production build = Production.newBuilder().type(ProductionType.USER).location(NEW_PRODUCTION_SOURCE).name(createString.toString()).conditions((Condition) create.value, (Condition) create2.value).actions(rl_make_simple_action).build();
            build.rlRuleInfo = new RLRuleInfo();
            double d = 0.0d;
            if (instantiate_rhs_value3.asInteger() != null) {
                d = instantiate_rhs_value3.asInteger().getValue();
            } else if (instantiate_rhs_value3.asDouble() != null) {
                d = instantiate_rhs_value3.asDouble().getValue();
            }
            build.rlRuleInfo.rl_ecr = 0.0d;
            build.rlRuleInfo.rl_efr = d;
            try {
                if (this.my_agent.getProductions().addProduction(build, false) == ProductionAddResult.DUPLICATE_PRODUCTION) {
                    rl_revert_template_id();
                    createString = null;
                }
            } catch (ReordererException e) {
                logger.error("Failed to add template instance production to rete", e);
            }
            stringSymbolImpl = createString;
        }
        return stringSymbolImpl;
    }

    private MakeAction rl_make_simple_action(IdentifierImpl identifierImpl, SymbolImpl symbolImpl, SymbolImpl symbolImpl2, SymbolImpl symbolImpl3) {
        MakeAction makeAction = new MakeAction();
        makeAction.id = this.chunker.variablize_symbol(identifierImpl).toRhsValue();
        makeAction.attr = this.chunker.variablize_symbol(symbolImpl).toRhsValue();
        makeAction.value = this.chunker.variablize_symbol(symbolImpl2).toRhsValue();
        makeAction.referent = this.chunker.variablize_symbol(symbolImpl3).toRhsValue();
        return makeAction;
    }

    private void rl_add_goal_or_impasse_tests_to_conds(Condition condition) {
        DefaultMarker create = DefaultMarker.create();
        Condition condition2 = condition;
        while (true) {
            Condition condition3 = condition2;
            if (condition3 == null) {
                return;
            }
            PositiveCondition asPositiveCondition = condition3.asPositiveCondition();
            if (asPositiveCondition != null) {
                IdentifierImpl asIdentifier = asPositiveCondition.id_test.asEqualityTest().getReferent().asIdentifier();
                if (asIdentifier.isGoal() && asIdentifier.tc_number != create) {
                    asPositiveCondition.id_test = Tests.add_new_test_to_test(asPositiveCondition.id_test, asIdentifier.isGoal() ? GoalIdTest.INSTANCE : ImpasseIdTest.INSTANCE);
                    asIdentifier.tc_number = create;
                }
            }
            condition2 = condition3.next;
        }
    }

    public void rl_tabulate_reward_value_for_goal(IdentifierImpl identifierImpl) {
        Slot find_slot;
        ReinforcementLearningInfo reinforcementLearningInfo = identifierImpl.goalInfo.rl_info;
        if (reinforcementLearningInfo.prev_op_rl_rules.isEmpty()) {
            return;
        }
        Slot find_slot2 = Slot.find_slot(identifierImpl.goalInfo.reward_header, this.preSyms.rl_sym_reward);
        double d = 0.0d;
        double doubleValue = this.params.discount_rate.get().doubleValue();
        if (find_slot2 != null) {
            WmeImpl wmes = find_slot2.getWmes();
            while (true) {
                WmeImpl wmeImpl = wmes;
                if (wmeImpl == null) {
                    break;
                }
                if (wmeImpl.value.asIdentifier() != null && (find_slot = Slot.find_slot(wmeImpl.value.asIdentifier(), this.preSyms.rl_sym_value)) != null) {
                    WmeImpl wmes2 = find_slot.getWmes();
                    while (true) {
                        WmeImpl wmeImpl2 = wmes2;
                        if (wmeImpl2 != null) {
                            if (wmeImpl2.value.asDouble() != null) {
                                d += wmeImpl2.value.asDouble().getValue();
                            } else if (wmeImpl2.value.asInteger() != null) {
                                d += wmeImpl2.value.asInteger().getValue();
                            }
                            wmes2 = wmeImpl2.next;
                        }
                    }
                }
                wmes = wmeImpl.next;
            }
            long j = reinforcementLearningInfo.hrl_age;
            if (this.params.temporal_discount.get() == ReinforcementLearningParams.TemporalDiscount.on) {
                j += reinforcementLearningInfo.gap_age;
            }
            reinforcementLearningInfo.reward += d * Math.pow(doubleValue, j);
        }
        if (identifierImpl == this.decider.bottom_goal || this.params.hrl_discount.get() != ReinforcementLearningParams.HrlDiscount.on) {
            return;
        }
        reinforcementLearningInfo.hrl_age++;
    }

    public void rl_tabulate_reward_values() {
        IdentifierImpl identifierImpl = this.decider.top_goal;
        while (true) {
            IdentifierImpl identifierImpl2 = identifierImpl;
            if (identifierImpl2 == null) {
                return;
            }
            rl_tabulate_reward_value_for_goal(identifierImpl2);
            identifierImpl = identifierImpl2.goalInfo.lower_goal;
        }
    }

    public void rl_store_data(IdentifierImpl identifierImpl, Preference preference) {
        ReinforcementLearningInfo reinforcementLearningInfo = identifierImpl.goalInfo.rl_info;
        SymbolImpl symbolImpl = preference.value;
        reinforcementLearningInfo.previous_q = preference.numeric_value;
        boolean z = this.params.temporal_extension.get() == ReinforcementLearningParams.TemporalExtension.on;
        int i = 0;
        Preference preferencesByType = identifierImpl.goalInfo.operator_slot.getPreferencesByType(PreferenceType.NUMERIC_INDIFFERENT);
        while (true) {
            Preference preference2 = preferencesByType;
            if (preference2 == null) {
                break;
            }
            if (symbolImpl == preference2.value && preference2.inst.prod.rlRuleInfo != null) {
                if (i == 0 && !reinforcementLearningInfo.prev_op_rl_rules.isEmpty()) {
                    reinforcementLearningInfo.prev_op_rl_rules.clear();
                }
                reinforcementLearningInfo.prev_op_rl_rules.push(preference2.inst.prod);
                i++;
            }
            preferencesByType = preference2.next;
        }
        if (i != 0) {
            reinforcementLearningInfo.previous_q = preference.numeric_value;
            return;
        }
        if (this.trace.isEnabled(Trace.Category.RL) && z && reinforcementLearningInfo.gap_age == 0 && !reinforcementLearningInfo.prev_op_rl_rules.isEmpty()) {
            this.trace.startNewLine().print(Trace.Category.RL, "gap started (%s)", identifierImpl);
        }
        if (z) {
            if (reinforcementLearningInfo.prev_op_rl_rules.isEmpty()) {
                return;
            }
            reinforcementLearningInfo.gap_age++;
        } else {
            if (!reinforcementLearningInfo.prev_op_rl_rules.isEmpty()) {
                reinforcementLearningInfo.prev_op_rl_rules.clear();
            }
            reinforcementLearningInfo.previous_q = preference.numeric_value;
        }
    }

    public void rl_perform_update(double d, boolean z, IdentifierImpl identifierImpl) {
        rl_perform_update(d, z, identifierImpl, true);
    }

    public void rl_perform_update(double d, boolean z, IdentifierImpl identifierImpl, boolean z2) {
        double d2;
        boolean z3 = this.params.temporal_extension.get() == ReinforcementLearningParams.TemporalExtension.on;
        if (!z3 || z) {
            ReinforcementLearningInfo reinforcementLearningInfo = identifierImpl.goalInfo.rl_info;
            if (!reinforcementLearningInfo.prev_op_rl_rules.isEmpty()) {
                double doubleValue = this.params.learning_rate.get().doubleValue();
                double doubleValue2 = this.params.et_decay_rate.get().doubleValue();
                double doubleValue3 = this.params.discount_rate.get().doubleValue();
                double doubleValue4 = this.params.et_tolerance.get().doubleValue();
                double doubleValue5 = this.params.meta_learning_rate.get().doubleValue();
                long j = reinforcementLearningInfo.hrl_age + 1;
                if (this.params.temporal_discount.get() == ReinforcementLearningParams.TemporalDiscount.on) {
                    j += reinforcementLearningInfo.gap_age;
                }
                double pow = Math.pow(doubleValue3, j);
                if (reinforcementLearningInfo.gap_age != 0 && z3 && this.trace.isEnabled(Trace.Category.RL)) {
                    this.trace.startNewLine().print("gap ended (%s)", identifierImpl);
                }
                if (doubleValue2 != 0.0d) {
                    Iterator<Map.Entry<Production, Double>> it = reinforcementLearningInfo.eligibility_traces.entrySet().iterator();
                    while (it.hasNext()) {
                        Map.Entry<Production, Double> next = it.next();
                        next.setValue(Double.valueOf(next.getValue().doubleValue() * doubleValue2 * pow));
                        if (next.getValue().doubleValue() < doubleValue4) {
                            it.remove();
                        }
                    }
                } else if (!reinforcementLearningInfo.eligibility_traces.isEmpty()) {
                    reinforcementLearningInfo.eligibility_traces.clear();
                }
                double d3 = 0.0d;
                double d4 = 0.0d;
                if (!reinforcementLearningInfo.prev_op_rl_rules.isEmpty()) {
                    double size = 1.0d / reinforcementLearningInfo.prev_op_rl_rules.size();
                    Iterator<Production> it2 = reinforcementLearningInfo.prev_op_rl_rules.iterator();
                    while (it2.hasNext()) {
                        Production next2 = it2.next();
                        if (next2 != null) {
                            d3 += next2.rlRuleInfo.rl_ecr;
                            d4 += next2.rlRuleInfo.rl_efr;
                            Double d5 = reinforcementLearningInfo.eligibility_traces.get(next2);
                            if (d5 != null) {
                                reinforcementLearningInfo.eligibility_traces.put(next2, Double.valueOf(d5.doubleValue() + size));
                            } else {
                                reinforcementLearningInfo.eligibility_traces.put(next2, Double.valueOf(size));
                            }
                        }
                    }
                }
                double d6 = (reinforcementLearningInfo.reward + (pow * d)) - (d3 + d4);
                for (Map.Entry<Production, Double> entry : reinforcementLearningInfo.eligibility_traces.entrySet()) {
                    Production key = entry.getKey();
                    if (!$assertionsDisabled && key.rlRuleInfo == null) {
                        throw new AssertionError();
                    }
                    double d7 = key.rlRuleInfo.rl_ecr;
                    double d8 = key.rlRuleInfo.rl_efr;
                    switch (this.params.decay_mode.get()) {
                        case exponential_decay:
                            d2 = 1.0d / (key.rlRuleInfo.rl_update_count + 1.0d);
                            break;
                        case logarithmic_decay:
                            d2 = 1.0d / (Math.log(key.rlRuleInfo.rl_update_count + 1.0d) + 1.0d);
                            break;
                        case delta_bar_delta_decay:
                            key.rlRuleInfo.rl_delta_bar_delta_beta += doubleValue5 * d6 * 1.0d * key.rlRuleInfo.rl_delta_bar_delta_h;
                            d2 = Math.exp(key.rlRuleInfo.rl_delta_bar_delta_beta);
                            double d9 = 1.0d - ((d2 * 1.0d) * 1.0d);
                            if (d9 < 0.0d) {
                                d9 = 0.0d;
                            }
                            key.rlRuleInfo.rl_delta_bar_delta_h = (key.rlRuleInfo.rl_delta_bar_delta_h * d9) + (d2 * d6 * 1.0d);
                            break;
                        case normal_decay:
                        default:
                            d2 = doubleValue;
                            break;
                    }
                    double doubleValue6 = d2 * entry.getValue().doubleValue() * (reinforcementLearningInfo.reward - d3);
                    double doubleValue7 = z2 ? d2 * entry.getValue().doubleValue() * ((pow * d) - d4) : 0.0d;
                    double d10 = d7 + doubleValue6;
                    double d11 = d8 + doubleValue7;
                    double d12 = d10 + d11;
                    if (this.trace.isEnabled(Trace.Category.RL)) {
                        String str = "RL update " + key.getName() + " " + d7 + " " + d8 + " " + (d7 + d8) + " -> " + d10 + " " + d11 + " " + d12 + "\n";
                        this.trace.startNewLine().print(str);
                        String str2 = this.params.update_log_path.get();
                        if (!str2.isEmpty()) {
                            BufferedWriter bufferedWriter = null;
                            try {
                                try {
                                    bufferedWriter = new BufferedWriter(new FileWriter(new File(str2)));
                                    bufferedWriter.write(String.format("%s%n", str));
                                    try {
                                        bufferedWriter.close();
                                    } catch (IOException e) {
                                        e.printStackTrace();
                                    }
                                } catch (Throwable th) {
                                    try {
                                        bufferedWriter.close();
                                    } catch (IOException e2) {
                                        e2.printStackTrace();
                                    }
                                    throw th;
                                }
                            } catch (IOException e3) {
                                e3.printStackTrace();
                                try {
                                    bufferedWriter.close();
                                } catch (IOException e4) {
                                    e4.printStackTrace();
                                }
                            }
                        }
                    }
                    key.getFirstAction().asMakeAction().referent = this.syms.createDouble(d12).toRhsValue();
                    key.rlRuleInfo.rl_update_count += 1.0d;
                    key.rlRuleInfo.rl_ecr = d10;
                    key.rlRuleInfo.rl_efr = d11;
                    if (this.params.meta.get() == ReinforcementLearningParams.Meta.on) {
                        key.setDocumentation(String.format("%s=%f;", "rl-updates", Double.valueOf(key.rlRuleInfo.rl_update_count)) + String.format("%s=%f;", "delta-bar-delta-h", Double.valueOf(key.rlRuleInfo.rl_delta_bar_delta_h)));
                    }
                    Instantiation instantiation = key.instantiations;
                    while (true) {
                        Instantiation instantiation2 = instantiation;
                        if (instantiation2 != null) {
                            Preference preference = instantiation2.preferences_generated;
                            while (true) {
                                Preference preference2 = preference;
                                if (preference2 != null) {
                                    preference2.referent = this.syms.createDouble(d12);
                                    preference = preference2.inst_next;
                                }
                            }
                            instantiation = instantiation2.nextInProdList;
                        }
                    }
                }
            }
            reinforcementLearningInfo.gap_age = 0L;
            reinforcementLearningInfo.hrl_age = 0L;
            reinforcementLearningInfo.reward = 0.0d;
        }
    }

    public static void rl_watkins_clear(IdentifierImpl identifierImpl) {
        identifierImpl.goalInfo.rl_info.eligibility_traces.clear();
    }

    public void addProduction(Production production) {
        production.rlRuleInfo = null;
        if (production.getType() != ProductionType.JUSTIFICATION && production.getType() != ProductionType.TEMPLATE && rl_valid_rule(production)) {
            production.rlRuleInfo = new RLRuleInfo();
            production.rlRuleInfo.rl_efr = Symbols.asDouble(production.getFirstAction().asMakeAction().referent.asSymbolValue().getSym());
        }
        rl_update_template_tracking(production.getName());
        if (production.getType() == ProductionType.TEMPLATE && !rl_valid_template(production)) {
            this.printer.print("Invalid Soar-RL template (%s)\n\n", production.getName());
            this.my_agent.getProductions().exciseProduction(production, false);
        } else {
            if (production == null || production.rlRuleInfo == null || production.getDocumentation() == null) {
                return;
            }
            rl_rule_meta(production);
        }
    }

    private void rl_rule_meta(Production production) {
        if (production.getDocumentation() == null || this.params.meta.get() != ReinforcementLearningParams.Meta.on) {
            return;
        }
        String documentation = production.getDocumentation();
        double docParam = getDocParam("rl-updates", documentation);
        if (docParam != Double.NaN) {
            production.rlRuleInfo.rl_update_count = docParam;
        }
        double docParam2 = getDocParam("delta-bar-delta-h", documentation);
        if (docParam != Double.NaN) {
            production.rlRuleInfo.rl_delta_bar_delta_h = docParam2;
        }
    }

    private double getDocParam(String str, String str2) {
        int length;
        int indexOf;
        String str3 = str + "=";
        int indexOf2 = str2.indexOf(str3);
        if (indexOf2 < 0 || (indexOf = str2.indexOf(";", (length = indexOf2 + str3.length()))) < 0) {
            return Double.NaN;
        }
        return Double.parseDouble(str2.substring(length, indexOf));
    }

    public void exciseProduction(Production production) {
        if (production.rlRuleInfo != null) {
            rl_remove_refs_for_prod(production);
        }
    }

    static {
        $assertionsDisabled = !ReinforcementLearning.class.desiredAssertionStatus();
        logger = LoggerFactory.getLogger(ReinforcementLearning.class);
        NEW_PRODUCTION_SOURCE = DefaultSourceLocation.newBuilder().file("*RL*").build();
    }
}
