Пример #1
0
def greedyQPolicy(Qs):
    """ Find the greedy deterministic policy, 
    given the Q-values. """
    dim = len(Qs)
    numA = len(Qs[0])
    policy = zeros((dim, numA))
    for si in range(dim):
        actions = all_argmax(Qs[si])
        for a in actions:
            policy[si, a] = 1. / len(actions)
    return policy
Пример #2
0
def greedyQPolicy(Qs):
    """ Find the greedy deterministic policy, 
    given the Q-values. """
    dim = len(Qs)
    numA = len(Qs[0])
    policy = zeros((dim, numA))
    for si in range(dim):
        actions = all_argmax(Qs[si])
        for a in actions:
            policy[si, a] = 1. / len(actions)    
    return policy
Пример #3
0
def greedyPolicy(Ts, R, discountFactor, V):
    """ Find the greedy policy, (soft tie-breaking)
    given a value function and full transition model. """
    dim = len(V)
    numA = len(Ts)
    Vnext = V * discountFactor + R
    policy = zeros((dim, numA))
    for si in range(dim):
        actions = all_argmax([dot(T[si, :], Vnext) for T in Ts])
        for a in actions:
            policy[si, a] = 1. / len(actions)
    return policy, collapsedTransitions(Ts, policy)
Пример #4
0
def greedyPolicy(Ts, R, discountFactor, V):
    """ Find the greedy policy, (soft tie-breaking)
    given a value function and full transition model. """
    dim = len(V)
    numA = len(Ts)
    Vnext = V*discountFactor+R
    policy = zeros((dim, numA))
    for si in range(dim):
        actions = all_argmax([dot(T[si, :], Vnext) for T in Ts])
        for a in actions:
            policy[si, a] = 1. / len(actions)        
    return policy, collapsedTransitions(Ts, policy)