maze = Maze() π = create_policy_s(maze) # Value Iteration Θ = 0.00000000001 V = create_value_funtion(maze) iterations_number = 0 while True: iterations_number += 1 Δ = 0 for state in maze.all_states(): if not maze.is_terminal(state): v = V[state] actions = maze._actions[state] p = 1 / len(actions) for action in actions: maze.set_state(state) r = maze.move(action) calculation = p * (r + γ * V[maze.current_state()]) if V[state] < calculation: V[state] = calculation Δ = max(Δ, abs(v - V[state])) if Δ < Θ: print("Δ:",Δ)