Python Agent示例，gridAgent.Agent Python示例

示例#1

0

显示文件

文件： gridworldtestbed.py 项目： Niktib/Enter-The-Grid

    def __init__(self, randomSpwan=False):
        start = {"grid": 1, "x": 2, "y": 2}
        goal = {"grid": 3, "x": 4, "y": 0}
        self.gridWorld = GridWorld.GridWorld(start=start, goal=goal)
        if randomSpwan:
            agent = gridAgent.Agent(self.gridWorld, None)
        else:
            agent = gridAgent.Agent(self.gridWorld, None)

        self.gridWorld.pieceItTogether()
        #self.plotgrid = plotgrid.plotReward("DunRun Reward Plot")
        self.episodeList = plotgrid.plotReward("Episode Graph")

示例#2

0

显示文件

	def run(self, iterations, episodes):
		totalSuccess = 0
		
		#agentArray= [[0,3,3],[0,2,3], [0,1,3], [1,4,3],[2,4,3],[random.randint(0,4), random.randint(0,4), 3],[random.randint(0,4), random.randint(0,4), 3],[random.randint(0,4), random.randint(0,4), 3],[random.randint(0,4), random.randint(0,4), 3]]
		
		#for i in range(episodes - len(agentArray) +1): agentArray.append([random.randint(0,4),random.randint(0,4),random.randint(0,len(self.gridWorld.arrayOfGrids)-1)+1])
		
		for iter in range(1, iterations+1):
			
			agent = gridAgent.Agent(2,2,1,self.policy)
			agent.playerStateSetUp(self.gridWorld.understandingState())
			for ep in range(1,episodes+1):
				steps = 0 #step variable
				while True:
					print("Iter: {}, Ep: {}, {}".format(iter, ep, agent.playerStatus()))
					#print(self.policy.epsilon)
					#self.gridWorld.printOut(agent.agentState(), True)
					agent.results(self.gridWorld.agentMove(agent.agentState(), agent.move()))
					#if self.plotter: self.plotter.LogResults(agent.reward)
					if self.sarsaLearning or self.qLearning: agent.sarsaUpdate()
					steps += 1
					print(self.policy.epsilon)
					print("Iter: {}, Ep: {}, {}".format(iter, ep, agent.playerStatus())) if debug else False#debug
					#Check if goal state has been reached?
					if self.gridWorld.finished: totalSuccess += 1
					if self.gridWorld.finished or steps > 5000:
						#Reset goal state
						self.gridWorld.finished = False
						if self.plotter: self.plotter.LogResults(steps,ep)
						break
				#At end of episode, update policy and grab it from agent to give to new agent
				if self.mcLearning: agent.mcUpdate()
				if self.plotter: self.plotter.plot()
				'''
				if ep % 75 == 0:
						agent.policy.epsilon -= 0.05
						if agent.policy.epsilon < 0:
							agent.policy.epsilon = 0
				'''
				self.policy = agent.policyRetrieval()
				
				agent = gridAgent.Agent(2,2,1,self.policy)
				#agent = gridAgent.Agent(agentArray[ep][0],agentArray[ep][1],agentArray[ep][2] ,self.policy)
		print("The total # of successful runs: {}".format(totalSuccess))
		self.policy.printOut()

示例#3

0

显示文件

文件： gridworldtestbed.py 项目： Niktib/Enter-The-Grid

	def run(self, iterations, episodes, steps, printInfo = False):
		totalSuccess = 0
		
		agentArray= []
		for i in range(episodes - len(agentArray) +1): agentArray.append([random.randint(0,4),random.randint(0,4),random.randint(0,len(self.gridWorld.arrayOfGrids)-1)+1])
		
		for iter in range(1, iterations+1):
			
			iterationStart = time.time()
			writeUp = ""
			agent = gridAgent.Agent(2,3,3,self.policy)
			agent.playerStateSetUp(self.gridWorld.understandingState())
			for ep in range(1,episodes+1):
				episodeStart = time.time()
				episodeAverageTime = 0
				while True:
					if printInfo:
						print("Iter: {}, Ep: {}, {}".format(iter, ep, agent.playerStatus()))
						self.gridWorld.printOut(agent.agentState(), True)
					agent.results(self.gridWorld.agentMove(agent.agentState(), agent.move()))
					if self.sarsaLearning or self.qLearning: agent.sarsaUpdate()
					print("Iter: {}, Ep: {}, {}".format(iter, ep, agent.playerStatus())) if debug else False#debug
					#Check if goal state has been reached?
					if self.gridWorld.finished: totalSuccess += 1
					if self.gridWorld.finished or agent.moves> steps:
						#Reset goal state
						self.gridWorld.finished = False
						break
				#At end of episode, update policy and grab it from agent to give to new agent
				episodeEnd = time.time()
				episodeAverageTime = episodeAverageTime + 1/ep * ( (episodeEnd - episodeStart) - episodeAverageTime)
				if self.mcLearning: agent.mcUpdate()
				
				self.policy = agent.policyRetrieval()
				writeUp += "Episode {} ".format(ep) + agent.agentInformation() + " Time Taken: {} \n".format(episodeEnd - episodeStart)
				#agent = gridAgent.Agent(1,2,3,self.policy)
				agent = gridAgent.Agent(agentArray[ep][0],agentArray[ep][1],agentArray[ep][2] ,self.policy)
			iterationEnd = time.time()
			
		print("Last Iterations Time is: {} \nAverage Episode time is: {} with a total # of successful runs: {}".format(iterationEnd - iterationStart, episodeAverageTime, totalSuccess))
		self.policy.printOut()
		f = open(os.path.dirname(os.path.realpath(__file__)) + "\Algorithm {} Epsilon={} Alpha={} Gamma={}.txt".format(self.policy.name, self.policy.epsilon, self.policy.alpha, self.policy.gamma),"w")
		f.write(writeUp)
		f.close()