def run(self): log_dir = "./logs/bastille/DeepQLearning_Vs_TabularQLearning/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(DeepQLearningAgent(8, 12), TabularQLearningAgent(), checkpoint=100, log_dir=log_dir).run(1000000))
def run(self): log_dir = "./logs/bastille/TabularQLearning_ReinforceClassic/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(TabularQLearningAgent(), ReinforceClassicAgent(8, 12), checkpoint=100, log_dir=log_dir).run(1000000))
def run(self): log_dir = "./logs/bastille/TabularQLearning_DoubleQLearning/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(TabularQLearningAgent(), DoubleQLearningAgent(), checkpoint=100, log_dir=log_dir).run(1000000))
def run(self): log_dir = "./logs/bastille/ReinforceClassicWithMultipleTrajectories_Vs_TabularQLearningAgent/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner( ReinforceClassicWithMultipleTrajectoriesAgent(8, 12), TabularQLearningAgent(), checkpoint=100, log_dir=log_dir).run(1000000))
def run(self): log_dir = "./logs/bastille/MOISMCTSWithRandomRollouts_Vs_TabularQLearning/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(MOISMCTSWithRandomRolloutsAgent( 100, SafeWindJammersRunner(RandomAgent(), RandomAgent())), TabularQLearningAgent(), checkpoint=100, log_dir=log_dir).run(1000000))
def run(self): log_dir = "./logs/bastille/TabularQLearning_PPOWithMultipleTrajectoriesMultiOutputs" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner( TabularQLearningAgent(), PPOWithMultipleTrajectoriesMultiOutputsAgent(8, 12), checkpoint=100, log_dir=log_dir).run(1000000))
def run(self): log_dir = "./logs/bastille/TabularQLearning_MOISMCTSWithRandomRolloutsExpertThenApprentice/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner( TabularQLearningAgent(), MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent( 100, SafeWindJammersRunner(RandomAgent(), RandomAgent()), 8, 12), checkpoint=100, log_dir=log_dir).run(1000000))
def run(self): log_dir = "./logs/bastille/TabularQLearning_MOISMCTSWithValueNetwork/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(TabularQLearningAgent(), MOISMCTSWithValueNetworkAgent( 100, SafeWindJammersRunner( RandomAgent(), RandomAgent())), checkpoint=100, log_dir=log_dir).run(1000000))
def run(self): log_dir = "./logs/bastille/TabularQLearning_RandomRollout_100/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(TabularQLearningAgent(), RandomRolloutAgent( 100, SafeWindJammersRunner( RandomAgent(), RandomAgent())), checkpoint=100, log_dir=log_dir).run(1000000))
def run(): log_dir = "./logs/bastilleMP/ReinforceWithMultipleTraj_Vs_TabularQLearning/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner( ReinforceClassicWithMultipleTrajectoriesAgent(8, 12), TabularQLearningAgent(), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/DeepQLearning_Vs_TabularQLearning/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(DeepQLearningAgent(8, 12), TabularQLearningAgent(), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/MOISMCTSWithRandomRollouts_Vs_TabularQLearning/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(MOISMCTSWithRandomRolloutsAgent( 100, SafeWindJammersRunner(RandomAgent(), RandomAgent())), TabularQLearningAgent(), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/ReinforceClassicWithMultipleTrajectories_Vs_TabularQLearningAgent/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner( ReinforceClassicWithMultipleTrajectoriesAgent(8, 12), TabularQLearningAgent(), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/Random_Vs_TabularQLearningAgent/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(RandomAgent(), TabularQLearningAgent(), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/Random_Vs_RandomRollout_100/" + str(time()) print(str(log_dir)) print( TensorboardWindJammersRunner(RandomAgent(), RandomRolloutAgent( 100, SafeWindJammersRunner( RandomAgent(), RandomAgent())), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/Random_Vs_DeepQLearning/" + str(time()) print(str(log_dir)) print( TensorboardWindJammersRunner(RandomAgent(), DeepQLearningAgent(8, 12), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/Random_Vs_DoubleQLearning/" + str(time()) print(str(log_dir)) print( TensorboardWindJammersRunner(RandomAgent(), DoubleQLearningAgent(), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/Random_Vs_ReinforceClassic/" + str(time()) print(str(log_dir)) print( TensorboardWindJammersRunner(RandomAgent(), ReinforceClassicAgent(8, 12), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/Random_Vs_ReinforceClassicWithMultipleTrajectories/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner( RandomAgent(), ReinforceClassicWithMultipleTrajectoriesAgent(8, 12), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/Random_Vs_PPOWithMultipleTrajectoriesMultiOutputs" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner( RandomAgent(), PPOWithMultipleTrajectoriesMultiOutputsAgent(8, 12), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/Random_Vs_MOISMCTSWithRandomRollouts/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(RandomAgent(), MOISMCTSWithRandomRolloutsAgent( 100, SafeWindJammersRunner( RandomAgent(), RandomAgent())), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/Random_Vs_MOISMCTSWithRandomRolloutsExpertThenApprentice/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner( RandomAgent(), MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent( 100, SafeWindJammersRunner(RandomAgent(), RandomAgent()), 8, 12), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/Random_Vs_MOISMCTSWithValueNetwork/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(RandomAgent(), MOISMCTSWithValueNetworkAgent( 100, SafeWindJammersRunner( RandomAgent(), RandomAgent())), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/TabularQLearning_RandomRollout_100/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(TabularQLearningAgent(), RandomRolloutAgent( 100, SafeWindJammersRunner( RandomAgent(), RandomAgent())), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/TabularQLearning_DeepQLearning/" + str(time()) print(str(log_dir)) print( TensorboardWindJammersRunner(TabularQLearningAgent(), DeepQLearningAgent(8, 12), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/TabularQLearning_DoubleQLearning/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(TabularQLearningAgent(), DoubleQLearningAgent(), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/TabularQLearning_ReinforceClassic/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(TabularQLearningAgent(), ReinforceClassicAgent(8, 12), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/TabularQLearning_ReinforceClassicWithMultipleTrajectories/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner( TabularQLearningAgent(), ReinforceClassicWithMultipleTrajectoriesAgent(8, 12), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/TabularQLearning_PPOWithMultipleTrajectoriesMultiOutputs" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner( TabularQLearningAgent(), PPOWithMultipleTrajectoriesMultiOutputsAgent(8, 12), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/TabularQLearning_MOISMCTSWithRandomRollouts/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(TabularQLearningAgent(), MOISMCTSWithRandomRolloutsAgent( 100, SafeWindJammersRunner( RandomAgent(), RandomAgent())), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/TabularQLearning_MOISMCTSWithRandomRolloutsExpertThenApprentice/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner( TabularQLearningAgent(), MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent( 100, SafeWindJammersRunner(RandomAgent(), RandomAgent()), 8, 12), checkpoint=100, log_dir=log_dir).run(100000)) log_dir = "./logs/bastilleMP/TabularQLearning_MOISMCTSWithValueNetwork/" + str( time()) print(str(log_dir)) print( TensorboardWindJammersRunner(TabularQLearningAgent(), MOISMCTSWithValueNetworkAgent( 100, SafeWindJammersRunner( RandomAgent(), RandomAgent())), checkpoint=100, log_dir=log_dir).run(100000))
def run(self): if self.opponent == "RandomAgent": log_dir1 = self.log_dir_root + "DoubleQLearningAgent_VS_RandomAgent_" + self.time print(log_dir1) print(TensorboardTicTacToeRunner(DoubleQLearningAgent(), RandomAgent(), log_and_reset_score_history_threshold=10000, log_dir=log_dir1).run(100000000)) elif self.opponent == "TabularQLearningAgent": log_dir2 = self.log_dir_root + "DoubleQLearningAgent_VS_TabularQLearningAgent_" + self.time print(log_dir2) print(TensorboardTicTacToeRunner(DoubleQLearningAgent(), TabularQLearningAgent(), log_and_reset_score_history_threshold=10000, log_dir=log_dir2).run(100000000)) elif self.opponent == "DeepQLearningAgent": log_dir3 = self.log_dir_root + "DoubleQLearningAgent_VS_DeepQLearningAgent_" + self.time print(log_dir3) print(TensorboardTicTacToeRunner(DoubleQLearningAgent(), DeepQLearningAgent(9, 9), log_and_reset_score_history_threshold=10000, log_dir=log_dir3).run(100000000)) elif self.opponent == "ReinforceClassicAgent": log_dir4 = self.log_dir_root + "DoubleQLearningAgent_VS_ReinforceClassicAgent_" + self.time print(log_dir4) print(TensorboardTicTacToeRunner(DoubleQLearningAgent(), ReinforceClassicAgent(9, 9), log_and_reset_score_history_threshold=10000, log_dir=log_dir4).run(100000000)) elif self.opponent == "ReinforceClassicWithMultipleTrajectoriesAgent": log_dir5 = self.log_dir_root + "DoubleQLearningAgent_VS_ReinforceClassicWithMultipleTrajectoriesAgent_" + self.time print(log_dir5) print(TensorboardTicTacToeRunner(DoubleQLearningAgent(), ReinforceClassicWithMultipleTrajectoriesAgent(9, 9), log_and_reset_score_history_threshold=10000, log_dir=log_dir5).run(100000000)) elif self.opponent == "PPOWithMultipleTrajectoriesMultiOutputsAgent": log_dir6 = self.log_dir_root + "DoubleQLearningAgent_VS_PPOWithMultipleTrajectoriesMultiOutputsAgent_" + self.time print(log_dir6) print(TensorboardTicTacToeRunner(DoubleQLearningAgent(), PPOWithMultipleTrajectoriesMultiOutputsAgent(9, 9), log_and_reset_score_history_threshold=10000, log_dir=log_dir6).run(100000000)) elif self.opponent == "MOISMCTSWithRandomRolloutsAgent": log_dir7 = self.log_dir_root + "DoubleQLearningAgent_VS_MOISMCTSWithRandomRolloutsAgent_" + self.time print(log_dir7) print(TensorboardTicTacToeRunner(DoubleQLearningAgent(), MOISMCTSWithRandomRolloutsAgent(100, SafeTicTacToeRunner(RandomAgent(), RandomAgent())), log_and_reset_score_history_threshold=10000, log_dir=log_dir7).run(1000000000)) elif self.opponent == "MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent": log_dir8 = self.log_dir_root + "DoubleQLearningAgent_VS_MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent_" + self.time print(log_dir8) print(TensorboardTicTacToeRunner(DoubleQLearningAgent(), MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent(100, SafeTicTacToeRunner( RandomAgent(), RandomAgent()),9,9), log_and_reset_score_history_threshold=10000, log_dir=log_dir8).run(1000000000)) elif self.opponent == "MOISMCTSWithValueNetworkAgent": log_dir9 = self.log_dir_root + "DoubleQLearningAgent_VS_MOISMCTSWithValueNetworkAgent_" + self.time print(log_dir9) print(TensorboardTicTacToeRunner(DoubleQLearningAgent(), MOISMCTSWithValueNetworkAgent(100, SafeTicTacToeRunner(RandomAgent(), RandomAgent())), log_and_reset_score_history_threshold=10000, log_dir=log_dir9).run(1000000000)) elif self.opponent == "DoubleQLearningAgent": log_dir10 = self.log_dir_root + "DoubleQLearningAgent_VS_DoubleQLearningAgent_" + self.time print(log_dir10) print(TensorboardTicTacToeRunner(DoubleQLearningAgent(), DoubleQLearningAgent(), log_and_reset_score_history_threshold=10000, log_dir=log_dir9).run(1000000000)) elif self.opponent == "RandomRolloutAgent": nb_rollouts = 3 log_dir11 = self.log_dir_root + "RandomAgent_VS_RandomRolloutAgent(" + str(nb_rollouts) + ")_" + self.time print(log_dir11) print(TensorboardTicTacToeRunner(RandomAgent(), RandomRolloutAgent(nb_rollouts, SafeTicTacToeRunner( RandomAgent(), RandomAgent())), log_and_reset_score_history_threshold=10000, log_dir=log_dir11).run(1000000000)) else: print("Unknown opponent")
round_step += 1 self.writer.add_summary( tf.Summary(value=[ tf.Summary.Value( tag="agent1_action_mean_duration", simple_value=self.mean_action_duration_sum[0] / round_step), tf.Summary.Value( tag="agent2_action_mean_duration", simple_value=self.mean_action_duration_sum[1] / round_step), tf.Summary.Value( tag="agent1_accumulated_reward", simple_value=self.mean_accumulated_reward_sum[0]), tf.Summary.Value( tag="agent2_accumulated_reward", simple_value=self.mean_accumulated_reward_sum[1]) ], ), episode_id) episode_id += 1 if __name__ == "__main__": print("Rdm vs Rdm") print( TensorboardInstrumentedWindJammersRunner( TabularQLearningAgent(), RandomAgent(), log_dir_root="./logs/Rdm_Vs_Rdm").run(1000))
self.stuck_on_same_score = 0 if (self.replace_player1_with_commandline_after_similar_results is not None and self.stuck_on_same_score >= self.replace_player1_with_commandline_after_similar_results): self.agents = (CommandLineAgent(), self.agents[1]) self.stuck_on_same_score = 0 score_history = np.array((0, 0, 0)) self.execution_time = np.array((0.0, 0.0)) return tuple(score_history) if __name__ == "__main__": number = [1000, 10000, 100000, 1000000] versus_name = ['RandomAgent', 'Tabular', 'DQN', 'DDQN', 'Reinforce', 'Reinforce A2C Style', 'PPO', 'MCTS'] versus_agent = [RandomAgent(), TabularQLearningAgent(), DeepQLearningAgent(9, 9), DoubleDeepQLearningAgent(9, 9), ReinforceClassicAgent(9, 9), ReinforceClassicWithMultipleTrajectoriesAgent(9, 9), PPOWithMultipleTrajectoriesMultiOutputsAgent(9, 9), MOISMCTSWithValueNetworkAgent(9, 9, 2)] versus = [versus_name, versus_agent] for num in number: for i in range(len(versus_name)): with open("D:/DEEP_LEARNING/Reinforcement/TabularVS" + str(versus[0][i]) +"_NB_"+ str(num) + ".csv", 'w+') as f: #Ici change TabularVS par le nom de l'agent que tu lance contre tout le reste print("New Fight" + str(versus[0][i]) + " " + str(num)) begin = time() f.write("scoreJ1;execJ1;scoreJ2;execJ2;scoreEqual\n")