def __init__(self, randomSpwan=False): start = {"grid": 1, "x": 2, "y": 2} goal = {"grid": 3, "x": 4, "y": 0} self.gridWorld = GridWorld.GridWorld(start=start, goal=goal) if randomSpwan: agent = gridAgent.Agent(self.gridWorld, None) else: agent = gridAgent.Agent(self.gridWorld, None) self.gridWorld.pieceItTogether() #self.plotgrid = plotgrid.plotReward("DunRun Reward Plot") self.episodeList = plotgrid.plotReward("Episode Graph")
def run(self, iterations, episodes): totalSuccess = 0 #agentArray= [[0,3,3],[0,2,3], [0,1,3], [1,4,3],[2,4,3],[random.randint(0,4), random.randint(0,4), 3],[random.randint(0,4), random.randint(0,4), 3],[random.randint(0,4), random.randint(0,4), 3],[random.randint(0,4), random.randint(0,4), 3]] #for i in range(episodes - len(agentArray) +1): agentArray.append([random.randint(0,4),random.randint(0,4),random.randint(0,len(self.gridWorld.arrayOfGrids)-1)+1]) for iter in range(1, iterations+1): agent = gridAgent.Agent(2,2,1,self.policy) agent.playerStateSetUp(self.gridWorld.understandingState()) for ep in range(1,episodes+1): steps = 0 #step variable while True: print("Iter: {}, Ep: {}, {}".format(iter, ep, agent.playerStatus())) #print(self.policy.epsilon) #self.gridWorld.printOut(agent.agentState(), True) agent.results(self.gridWorld.agentMove(agent.agentState(), agent.move())) #if self.plotter: self.plotter.LogResults(agent.reward) if self.sarsaLearning or self.qLearning: agent.sarsaUpdate() steps += 1 print(self.policy.epsilon) print("Iter: {}, Ep: {}, {}".format(iter, ep, agent.playerStatus())) if debug else False#debug #Check if goal state has been reached? if self.gridWorld.finished: totalSuccess += 1 if self.gridWorld.finished or steps > 5000: #Reset goal state self.gridWorld.finished = False if self.plotter: self.plotter.LogResults(steps,ep) break #At end of episode, update policy and grab it from agent to give to new agent if self.mcLearning: agent.mcUpdate() if self.plotter: self.plotter.plot() ''' if ep % 75 == 0: agent.policy.epsilon -= 0.05 if agent.policy.epsilon < 0: agent.policy.epsilon = 0 ''' self.policy = agent.policyRetrieval() agent = gridAgent.Agent(2,2,1,self.policy) #agent = gridAgent.Agent(agentArray[ep][0],agentArray[ep][1],agentArray[ep][2] ,self.policy) print("The total # of successful runs: {}".format(totalSuccess)) self.policy.printOut()
def run(self, iterations, episodes, steps, printInfo = False): totalSuccess = 0 agentArray= [] for i in range(episodes - len(agentArray) +1): agentArray.append([random.randint(0,4),random.randint(0,4),random.randint(0,len(self.gridWorld.arrayOfGrids)-1)+1]) for iter in range(1, iterations+1): iterationStart = time.time() writeUp = "" agent = gridAgent.Agent(2,3,3,self.policy) agent.playerStateSetUp(self.gridWorld.understandingState()) for ep in range(1,episodes+1): episodeStart = time.time() episodeAverageTime = 0 while True: if printInfo: print("Iter: {}, Ep: {}, {}".format(iter, ep, agent.playerStatus())) self.gridWorld.printOut(agent.agentState(), True) agent.results(self.gridWorld.agentMove(agent.agentState(), agent.move())) if self.sarsaLearning or self.qLearning: agent.sarsaUpdate() print("Iter: {}, Ep: {}, {}".format(iter, ep, agent.playerStatus())) if debug else False#debug #Check if goal state has been reached? if self.gridWorld.finished: totalSuccess += 1 if self.gridWorld.finished or agent.moves> steps: #Reset goal state self.gridWorld.finished = False break #At end of episode, update policy and grab it from agent to give to new agent episodeEnd = time.time() episodeAverageTime = episodeAverageTime + 1/ep * ( (episodeEnd - episodeStart) - episodeAverageTime) if self.mcLearning: agent.mcUpdate() self.policy = agent.policyRetrieval() writeUp += "Episode {} ".format(ep) + agent.agentInformation() + " Time Taken: {} \n".format(episodeEnd - episodeStart) #agent = gridAgent.Agent(1,2,3,self.policy) agent = gridAgent.Agent(agentArray[ep][0],agentArray[ep][1],agentArray[ep][2] ,self.policy) iterationEnd = time.time() print("Last Iterations Time is: {} \nAverage Episode time is: {} with a total # of successful runs: {}".format(iterationEnd - iterationStart, episodeAverageTime, totalSuccess)) self.policy.printOut() f = open(os.path.dirname(os.path.realpath(__file__)) + "\Algorithm {} Epsilon={} Alpha={} Gamma={}.txt".format(self.policy.name, self.policy.epsilon, self.policy.alpha, self.policy.gamma),"w") f.write(writeUp) f.close()