def test_probs(self): for state in states(): print state p = probs(state) s = sum(p.values()) assert sum(p.values()) - 1 <= 1e-10, sum(p.values()) assert all(sum(v) <= H for v in p.keys())
def test_probs(self): for state in states(): print state p = probs(state) s = sum(p.values()) assert sum(p.values()) - 1 <= 1e-10, sum(p.values()) assert all(sum(v)<=H for v in p.keys())
def value(s, r, V, gamma=0.9): """ Calculate the expected value of state s given reward r, value function V and discount factor gamma """ v = 0 sps = probs(s) for sp, p in sps.items(): v += p * (r + gamma * V[sp]) return v
def value(s, r, V, gamma = 0.9): """ Calculate the expected value of state s given reward r, value function V and discount factor gamma """ v = 0 sps = probs(s) for sp, p in sps.items(): v += p * (r + gamma * V[sp]) return v