Exemplo n.º 1
0
	def __init__(self, mcLearning = False, sarsaLearning = False, qLearning = False, randomSpwan=False, p1=0.8, p2=0.1, epsilon = 0.1, gamma = 0.9, alpha = 0.1):
		self.gridWorld = GridWorld.GridWorld({"grid": 3, "x" : 4 , "y" : 0})
		self.gridWorld.pieceItTogether()
		
		self.mcLearning = mcLearning
		self.sarsaLearning = sarsaLearning
		self.qLearning = qLearning
		#Epsilon, Gamma, Alpha
		if self.mcLearning: self.policy = mc.monteCarlo(epsilon, gamma, alpha)
		if self.sarsaLearning: self.policy = sarsa.sarsaLearning(epsilon, gamma, alpha)
		if self.qLearning: self.policy = q.qlearning(epsilon, gamma, alpha)
Exemplo n.º 2
0
 def __init__(self, gridWorld, policy):
     #1 = North, 2 = East, 3 = South, 4 = West
     self.playerX = gridWorld.startPoint['x']
     self.playerY = gridWorld.startPoint['y']
     self.currentGrid = gridWorld.startPoint['grid']
     self.reward = 0
     self.policy = mc.monteCarlo(gridWorld.stateGrid(),
                                 gridWorld.stateGrid())
     self.North = 1
     self.East = 2
     self.South = 3
     self.West = 4
     self.Done = False
     self.moveCount = 0
     self.stateActionArray = []
     gridWorld.insertAgent(self)
Exemplo n.º 3
0
	def __init__(self, mcLearning = False, sarsaLearning = False, qLearning = False, randomSpwan=False, plotresults = True):
		self.gridWorld = GridWorld.GridWorld({"grid": 3, "x" : 4 , "y" : 0})
		self.gridWorld.pieceItTogether()
		
		self.mcLearning = mcLearning
		self.sarsaLearning = sarsaLearning
		self.qLearning = qLearning
		self.plotter = plotresults
		if self.mcLearning: 
			self.policy = mc.monteCarlo()
			if plotresults: self.plotter = plotgrid.plotReward("Monte Carlo: alpha:{} , gamma: {}, epsilon: {}".format(self.policy.alpha,self.policy.gamma,self.policy.epsilon))
		if self.sarsaLearning:
			self.policy = sarsa.sarsaLearning()
			if plotresults: self.plotter = plotgrid.plotReward("SARSA: alpha:{}, gamma: {}, epsilon: {} Drop".format(self.policy.alpha,self.policy.gamma,self.policy.epsilon))
		if self.qLearning: 
			self.policy = q.qlearning()
			if plotresults: self.plotter = plotgrid.plotReward("Q-Learning alpha:{}, gamma: {}, epsilon: {}".format(self.policy.alpha,self.policy.gamma,self.policy.epsilon))