def initializeQLearningObject(self, rlParams): self.qLearn = QLearning.QLearning(self.numFeatures, self.features, self.numActions, 1024, 0.50, 32, 2, 0.95, 0.20, 0.95, self) self.nextState = State.State( self.numFeatures ) # this will hold the next state object for the bass2 layer
# [' ','#', 1,'#', 10], # ['S',' ',' ',' ',' '], # [-10,-10, -10, -10, -10]] # grid = [[' ',' ',' ',+1], # ['#','#',' ','#'], # [' ','#',' ',' '], # [' ','#','#',' '], # ['S',' ',' ',' ']] grid = [[ '#',-100, -100, -100, -100, -100, '#'], [ 1, 'S', ' ', ' ', ' ', ' ', 10], [ '#',-100, -100, -100, -100, -100, '#']] Grid = GridWorld(grid, 0.01, -1.0) M = MarkovDecisonProcess(0.4, Grid, 100) M.ValueIteration() values = M.getValues() actions = M.getActions() qvalues = M.getQValues() for i in values.keys(): print(f"{i} : {values[i]}, {actions[i]}") RL = QLearning(Grid, 50000, alpha = 0.6, epsilon = 0.2, discount = 0.4) RL.train() values = RL.getValues() actions = RL.getActions() qvalues = RL.getQValues() for i in values.keys(): print(f"{i} : {values[i]}, {actions[i]}")