-
Notifications
You must be signed in to change notification settings - Fork 0
/
policy_evaluation.py
62 lines (48 loc) · 1.43 KB
/
policy_evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import World
import threading
import time
def run():
World.render_cell_value()
H = 20
gamma = 0.9
noise = 0.2
for _ in range(H):
time.sleep(.1)
V = {}
for state in list(World.neighbour_states.keys()):
V[state] = World.R[state]
if state in World.end_states:
continue
# find all possible neighbour of this state
all_actions = World.available_actions[state]
action = policy[state]
state_val = 0
high_prob = (1 - noise)
low_prob = noise / (len(all_actions) - 1)
state_val += high_prob * (World.R[state] + gamma * World.V[World.next_state(state, action)])
# iterate stochastic probable state
for other in all_actions:
if other != action:
state_val += low_prob * (World.R[state] + gamma * World.V[World.next_state(state, other)])
V[state] = state_val
World.V = V
World.render_cell_value()
if __name__ == '__main__':
policy = {
(0, 0): 'r',
(0, 1): 'u',
(0, 2): 'u',
(1, 0): 'r',
(1, 2): 'r',
(2, 0): 'r',
(2, 1): 'u',
(2, 2): 'u',
(3, 2): 'l',
}
# Enabling specific grid
World.grid_value_mode()
World.create_arrow()
t = threading.Thread(target=run)
t.daemon = True
t.start()
World.start_game()