示例#1
0
"""

import numpy as np
import matplotlib.pyplot as plt
from gridWorld import standardGrid, negativeGrid
from iterativePolicyEvaluation import printPolicy, printValues
from monteCarloControl import argMax
from td0Prediction import randomAction


GAMMA = 0.9
ALPHA = 0.1
ACTIONS = {'U','D','L','R'}

if __name__ == '__main__':
    grid = negativeGrid(stepCost = -0.1)
    
    print ("Rewards:")
    printValues(grid.rewards,grid)
    
    Q = {}
    
    states = grid.allStates()
    
    for s in states:
        Q[s] = {}
        for a in ACTIONS:
            Q[s][a] = 0
    
    
    updateCountsSarsa = {}
Created on Mon Jan  7 11:37:45 2019

@author: user
"""

import numpy as np
from gridWorld import standardGrid, negativeGrid
from iterativePolicyEvaluation import printValues, printPolicy

EPSILON = 10e-4
GAMMA = 0.9
ACTIONS = ('U', 'D', 'L', 'R')

if __name__ == '__main__':

    grid = negativeGrid()

    policy = {}
    for s in grid.actions.keys():
        policy[s] = np.random.choice(ACTIONS)

    print("Rewards:")
    printValues(grid.rewards, grid)

    print("Initial Policy:")
    printPolicy(policy, grid)

    V = {}
    states = grid.allStates()

    for s in states: