Пример #1
0
def improve_policy(optimal_policy, state_value, params):
    policy_stable   =   True
    n_states        =   (params.get('total_cars')+1) ** 2
    # Predefined actions
    actions_poss    =   np.array( range(-5,6,1) )
    for sii in range(n_states):
        ag2, ag1    =   ut_ind2sub.main(np.shape(state_value), [sii])
        temp        =   optimal_policy[ag2, ag1]
        # Loop on all possible actions and recompute the value
        for sjj in actions_poss:
            new_val =   update_value(sjj, state_value, (ag2, ag1), params)
            if new_val>state_value[ag2, ag1]:
                state_value[ag2, ag1]       =   new_val
                optimal_policy[ag2, ag1]    =   sjj
        if temp!=optimal_policy[ag2, ag1]:
            policy_stable   =   False
    return optimal_policy, policy_stable
Пример #2
0
def evaluate_policy(optimal_policy, state_value, params):
    # intialize change
    n_passes    =   0
    delta_v     =   float('Inf')
    while delta_v > params.get('theta'):
        delta_v = 0
        VV      = np.zeros([21,21])
        # Shuffle the order of value evaluation
        shufI   =   [[i] for i in range( (params.get('total_cars')+1) ** 2 )]
        shuffle(shufI)
        for sii in shufI:
            r,c     =   ut_ind2sub.main(np.shape(state_value), sii)
            temp    =   state_value[r,c]
            new_v   =   update_value(optimal_policy[r, c], state_value, (r,c), params)
            state_value[r,c]    =   new_v
            delta_v =   np.maximum(delta_v, np.abs(temp - new_v))
            VV[r,c] =   float(np.abs(temp - new_v))
        n_passes += 1
    return state_value, n_passes
Пример #3
0
def main(matrix):

    # Matrix dimensions
    shp     =   np.shape(matrix)
    nDiag   =   2 * shp[0] - 1
    sumD    =   np.zeros([1, nDiag])

    # Loop and sum along diagonals
    startI  =   list(range(shp[0]-1,-1,-1)) + list(range(shp[0],np.prod(shp)-1,shp[1]))
    nInd    =   list(range(shp[0])) + list(range(shp[0]-2,-1,-1))
    lElem   =   np.multiply( list(range(shp[0])), shp[0]+1)
    for ii  in range(nDiag):
        # list all indices
        allI    =   startI[ii] + lElem[:nInd[ii]+1]
        # Convert to subscripts
        r,c     =   ut_ind2sub.main(shp, allI)
        sumD[0,ii] = np.sum( [matrix[r[xxx]][c[xxx]] for xxx in list(range(len(r)))] )

    return sumD