def inc_Q(s, a, alpha, inc): Q[s][a] += alpha * inc * E[s][a] World.set_cell_score(s, a, Q[s][a])
actions = World.actions states = [] Q = {} E = {} for i in range(World.x): for j in range(World.y): states.append((i, j)) for state in states: temp = {} temp_e = {} for action in actions: temp[action] = 0.0 # Set to 0.1 if following greedy policy temp_e[action] = 0.0 World.set_cell_score(state, action, temp[action]) Q[state] = temp E[state] = temp_e for (i, j, c, w) in World.specials: for action in actions: Q[(i, j)][action] = w World.set_cell_score((i, j), action, w) def do_action(action): s = World.player r = -World.score if action == actions[0]: World.try_move(0, -1) elif action == actions[1]: