示例#1
0
 def test_probs(self):
     for state in states():
         print state
         p = probs(state)
         s = sum(p.values())
         assert sum(p.values()) - 1 <= 1e-10, sum(p.values())
         assert all(sum(v) <= H for v in p.keys())
示例#2
0
 def test_probs(self):
     for state in states():
         print state
         p = probs(state)
         s = sum(p.values())
         assert sum(p.values()) - 1 <= 1e-10, sum(p.values())
         assert all(sum(v)<=H for v in p.keys())
示例#3
0
def value(s, r, V, gamma=0.9):
    """ Calculate the expected value of state s given reward r,
    value function V and discount factor gamma
    """
    v = 0
    sps = probs(s)
    for sp, p in sps.items():
        v += p * (r + gamma * V[sp])
    return v
示例#4
0
def value(s, r, V, gamma = 0.9):
    """ Calculate the expected value of state s given reward r,
    value function V and discount factor gamma
    """
    v = 0
    sps = probs(s)
    for sp, p in sps.items():
        v += p * (r + gamma * V[sp])
    return v