def init_Q_Learn_if_needed(): global NEED_Q_LEARN_SETUP, ALL_STATES, ACTIONS, Q_from_QL, update_q_value global is_valid_goal_state, Terminal_state if NEED_Q_LEARN_SETUP: Q_Learn.setup(ALL_STATES, ACTIONS, Q_from_QL, update_q_value, is_valid_goal_state, Terminal_state) Q_Learn.set_starting_state(ALL_STATES[0]) update_qlearn_params() Vis.enable_QL_policy_item(True) NEED_Q_LEARN_SETUP = False
def update_qlearn_params(): global ALPHA, EPSILON, GAMMA global NEED_Q_LEARN_SETUP, ALL_STATES, ACTIONS, Q_from_QL if NEED_Q_LEARN_SETUP: Q_Learn.setup(ALL_STATES, ACTIONS, Q_from_QL,\ update_q_value, is_valid_goal_state,\ Terminal_state) Q_Learn.set_starting_state(ALL_STATES[0]) NEED_Q_LEARN_SETUP = False Q_Learn.set_learning_parameters(ALPHA, EPSILON, GAMMA)
def initialize_episode(): global LAST_REWARD, TERMINATED, Agent_state LAST_REWARD = 0 Agent_state = ALL_STATES[0] TERMINATED = False Q_Learn.set_starting_state(Agent_state)