def __init__(self, mcLearning = False, sarsaLearning = False, qLearning = False, randomSpwan=False, p1=0.8, p2=0.1, epsilon = 0.1, gamma = 0.9, alpha = 0.1): self.gridWorld = GridWorld.GridWorld({"grid": 3, "x" : 4 , "y" : 0}) self.gridWorld.pieceItTogether() self.mcLearning = mcLearning self.sarsaLearning = sarsaLearning self.qLearning = qLearning #Epsilon, Gamma, Alpha if self.mcLearning: self.policy = mc.monteCarlo(epsilon, gamma, alpha) if self.sarsaLearning: self.policy = sarsa.sarsaLearning(epsilon, gamma, alpha) if self.qLearning: self.policy = q.qlearning(epsilon, gamma, alpha)
def __init__(self, gridWorld, policy): #1 = North, 2 = East, 3 = South, 4 = West self.playerX = gridWorld.startPoint['x'] self.playerY = gridWorld.startPoint['y'] self.currentGrid = gridWorld.startPoint['grid'] self.reward = 0 self.policy = mc.monteCarlo(gridWorld.stateGrid(), gridWorld.stateGrid()) self.North = 1 self.East = 2 self.South = 3 self.West = 4 self.Done = False self.moveCount = 0 self.stateActionArray = [] gridWorld.insertAgent(self)
def __init__(self, mcLearning = False, sarsaLearning = False, qLearning = False, randomSpwan=False, plotresults = True): self.gridWorld = GridWorld.GridWorld({"grid": 3, "x" : 4 , "y" : 0}) self.gridWorld.pieceItTogether() self.mcLearning = mcLearning self.sarsaLearning = sarsaLearning self.qLearning = qLearning self.plotter = plotresults if self.mcLearning: self.policy = mc.monteCarlo() if plotresults: self.plotter = plotgrid.plotReward("Monte Carlo: alpha:{} , gamma: {}, epsilon: {}".format(self.policy.alpha,self.policy.gamma,self.policy.epsilon)) if self.sarsaLearning: self.policy = sarsa.sarsaLearning() if plotresults: self.plotter = plotgrid.plotReward("SARSA: alpha:{}, gamma: {}, epsilon: {} Drop".format(self.policy.alpha,self.policy.gamma,self.policy.epsilon)) if self.qLearning: self.policy = q.qlearning() if plotresults: self.plotter = plotgrid.plotReward("Q-Learning alpha:{}, gamma: {}, epsilon: {}".format(self.policy.alpha,self.policy.gamma,self.policy.epsilon))