def get_agent(name): a = TpFinalPlayer(name) b = RandomPlayer('PC') game = TpFinalGame([a, b]) print ('Training with a random player, please wait...') game.agents = [a, b] per = PerformanceCounter(game.agents, ['QLearnerA', 'QLearnerB']) for i in range(3000): game.run() per.show_statistics() return a
else: return -1 return 0 if __name__ == '__main__': a = TicTacToePlayer('X') b = RandomPlayer('O') c = HumanPlayer('O') game = TicTacToeGame([a, b]) print ('Training with a random player, please wait...') game.agents = [a, b] for i in range(3000): game.run() a.dump('qlearner_agent') d = TicTacToePlayer.load('qlearner_agent') d.play_with = 'O' game.agents = [a, d] per = PerformanceCounter(game.agents, ['QLearnerA', 'QLearnerD']) for i in range(3000): game.run() per.show_statistics() game.agents = [a, c] print ('Do you like to play?') game.run() print game.state
self.last_agent = None random.shuffle(self.agents) self.run() def reward(self, state, agent): if self.is_completed(state): if self.last_agent is agent: return -1 else: return 1 return 0 def get_agent(name): res = Player(name) rand = RandomPlayer('random') env = TrainingEnvironment([res, rand]) for i in range(3000): env.play() return res if __name__ == '__main__': res = Player('name') rand = RandomPlayer('random') env = TrainingEnvironment([res, rand]) per = PerformanceCounter(env.agents, ['q', 'random']) for i in range(3000): env.play() per.show_statistics()
def is_completed(self, state): return state in self.rewards.keys() def reward(self, state, agent): return self.rewards.get(state, -0.08) class WumpusProblem(RLProblem): def actions(self, state): actions = ['up', 'down', 'left', 'rigth'] return actions if __name__ == '__main__': agent = TDQLearner(WumpusProblem(), temperature_function=make_exponential_temperature(1000, 0.01), discount_factor=0.8) game = WumpusEnvironment(agent) p = PerformanceCounter([agent], ['Q-learner Agent']) print 'Training...' for i in range(10000): game.run() p.show_statistics() game.run(viewer=WumpusViewer(game))
def is_completed(self, state): return state in self.rewards.keys() def make_reward(self, state, agent): return self.rewards.get(state, -0.08) class GridProblem(RLProblem): def actions(self, state): actions = ['up', 'down', 'left', 'rigth'] return actions if __name__ == '__main__': agent = TDQLearner(GridProblem(), temperature_function=make_exponential_temperature(1000, 0.005), discount_factor=0.9) game = GridGame(agent) p = PerformanceCounter([agent], ['Q-learner Agent']) for i in range(3000): game.run() p.show_statistics() game.run(viewer=Viewer())