def __init__(self, game): super().__init__(name="SUB WATTEN BAGGING AGENT") self.game = game x, y = game.get_observation_size() nnet1 = SubWattenNNet(x, y, 1, game.get_action_size()) nnet2 = AsymmetricSubWattenNNet(x, y, 1, game.get_action_size()) # Setting up the symmetric and asymmetric nnet self.agent_symmetric = AgentNNet(nnet1) self.agent_asymmetric = AgentNNet(nnet2) try: self.agent_symmetric.load("games/sub_watten/training/best.h5") self.agent_asymmetric.load( "games/asymmetric_sub_watten/training/v2/best.h5") except OSError: print("File not found with games/sub_watten/training/best.h5") print( "Maybe you are creating an agent for test purposes. I'll try to load the model from a different path" ) self.agent_symmetric.load("../../sub_watten/training/best.h5") self.agent_asymmetric.load( "../../asymmetric_sub_watten/training/v2/best.h5")
def build_hand_watten_evaluate_agent(self, agent_profile, native_multi_gpu_enabled=False): game = self.game_mapping[agent_profile.game] agent_nnet = None if agent_profile == EnvironmentSelector.HAND_WATTEN_EVALUATE: x, y = game.get_observation_size() nnet = HandWattenNNet(x, y, 1, game.get_action_size()) agent_nnet = AgentNNet(nnet) elif agent_profile == EnvironmentSelector.HAND_WATTEN_EVALUATE_M_M: x, y = game.get_observation_size() nnet = MediumMediumNNet(x, y, 1, game.get_action_size()) agent_nnet = AgentNNet(nnet) elif agent_profile == EnvironmentSelector.HAND_WATTEN_EVALUATE_S_S: x, y = game.get_observation_size() nnet = EasyEasyNNet(x, y, 1, game.get_action_size()) agent_nnet = AgentNNet(nnet) elif agent_profile == EnvironmentSelector.HAND_WATTEN_EVALUATE_CNN: x, y, z = game.get_observation_size() nnet = CNNWatten(x, y, z, game.get_action_size()) agent_nnet = AgentNNet(nnet) return agent_nnet
def build_hand_watten_train_agent(self, agent_profile, native_multi_gpu_enabled=False): game = self.game_mapping[agent_profile.game] if agent_profile == EnvironmentSelector.HAND_WATTEN_TRAIN: x, y = game.get_observation_size() nnet = HandWattenNNet(x, y, 1, game.get_action_size()) agent_nnet = AgentNNet(nnet) print("Configuring build_hand_watten_train_agent...") return AgentMCTS(agent_nnet, exp_rate=AgentMCTS.EXPLORATION_RATE_MEDIUM, numMCTSSims=30, max_predict_time=10, num_threads=1) elif agent_profile == EnvironmentSelector.HAND_WATTEN_TRAIN_S_S: x, y = game.get_observation_size() nnet = EasyEasyNNet(x, y, 1, game.get_action_size()) agent_nnet = AgentNNet(nnet) print("Configuring build_hand_watten_s_s...") return AgentMCTS(agent_nnet, exp_rate=AgentMCTS.EXPLORATION_RATE_MEDIUM, numMCTSSims=30, max_predict_time=10, num_threads=1) elif agent_profile == EnvironmentSelector.HAND_WATTEN_TRAIN_M_M: x, y = game.get_observation_size() nnet = MediumMediumNNet(x, y, 1, game.get_action_size()) agent_nnet = AgentNNet(nnet) print("Configuring build_hand_watten_m_m...") return AgentMCTS(agent_nnet, exp_rate=AgentMCTS.EXPLORATION_RATE_MEDIUM, numMCTSSims=30, max_predict_time=10, num_threads=1) elif agent_profile == EnvironmentSelector.HAND_WATTEN_TRAIN_CNN: x, y, z = game.get_observation_size() nnet = CNNWatten(x, y, z, game.get_action_size()) agent_nnet = AgentNNet(nnet) print("Configuring build hand watten CNN....") return AgentMCTS(agent_nnet, exp_rate=AgentMCTS.EXPLORATION_RATE_MEDIUM, numMCTSSims=30, max_predict_time=10, num_threads=1) return None
def build_watten_train_4_512_agent(self, agent_profile, native_multi_gpu_enabled=False): game = self.game_mapping[agent_profile.game] x, y = game.get_observation_size() nnet = WattenNNet4x512(x, y, 1, game.get_action_size()) agent_nnet = AgentNNet(nnet) if agent_profile == EnvironmentSelector.WATTEN_AGENT_4_512_TRAIN: print("Configuring build_watten_train_4_512_agent...") return AgentMCTS(agent_nnet, exp_rate=AgentMCTS.EXPLORATION_RATE_MEDIUM, numMCTSSims=100, max_predict_time=10, num_threads=16) elif agent_profile == EnvironmentSelector.WATTEN_AGENT_4_512_EVALUATE: print("Configuring build_watten_evaluate_4_512_agent...") return AgentMCTS(agent_nnet, exp_rate=AgentMCTS.EXPLORATION_RATE_MEDIUM, numMCTSSims=30, max_predict_time=10, num_threads=16, name="build_watten_evaluate_4_512_agent") elif agent_profile == EnvironmentSelector.WATTEN_AGENT_HUMAN: return WattenHumanAgent(game) elif agent_profile == EnvironmentSelector.WATTEN_AGENT_NNET: agent_nnet.load( os.path.abspath( "../games/watten/training/best-4-512-new-4.h5")) return agent_nnet return None
def build_watten_train_big_agent(self, agent_profile, native_multi_gpu_enabled=False): game = self.game_mapping[agent_profile.game] x, y = game.get_observation_size() nnet = WattenNNetFirstLayerBig(x, y, 1, game.get_action_size()) agent_nnet = AgentNNet(nnet) if agent_profile == EnvironmentSelector.WATTEN_AGENT_BIG_TRAIN: print("Configuring build_watten_train_big_agent...") return AgentMCTS(agent_nnet, exp_rate=AgentMCTS.EXPLORATION_RATE_MEDIUM, numMCTSSims=100, max_predict_time=10, num_threads=16) elif agent_profile == EnvironmentSelector.WATTEN_AGENT_BIG_EVALUATE: print("Configuring build_watten_evaluate_big_agent...") return AgentMCTS(agent_nnet, exp_rate=AgentMCTS.EXPLORATION_RATE_MEDIUM, numMCTSSims=2, max_predict_time=10, num_threads=16, name="build_watten_evaluate_big_agent") elif agent_profile == EnvironmentSelector.WATTEN_AGENT_HUMAN: return WattenHumanAgent(game) return None
def build_sub_watten_train_agent(self, agent_profile, native_multi_gpu_enabled=False): game = self.game_mapping[agent_profile.game] x, y = game.get_observation_size() if agent_profile == EnvironmentSelector.SUB_WATTEN_AGENT_TRAIN: nnet = SubWattenNNet(x, y, 1, game.get_action_size()) elif agent_profile == EnvironmentSelector.SUB_WATTEN_AGENT_TRAIN_SIMPLE: nnet = SubWattenSimplerNNet(x, y, 1, game.get_action_size()) agent_nnet = AgentNNet(nnet) if agent_profile == EnvironmentSelector.SUB_WATTEN_AGENT_TRAIN or \ agent_profile == EnvironmentSelector.SUB_WATTEN_AGENT_TRAIN_SIMPLE: print("Configuring build_sub_watten_train_agent...") return AgentMCTS(agent_nnet, exp_rate=AgentMCTS.EXPLORATION_RATE_MEDIUM, numMCTSSims=100, max_predict_time=10, num_threads=1) return None
def build_horovod_checkers_agent(self, agent_profile, native_multi_gpu_enabled=False): from games.checkers.nnet.CheckersResNNetDistributed import CheckersResNNetDistributed assert not native_multi_gpu_enabled, "ERROR: Horovod NNet does not support native multi-gpu mode!" game = self.game_mapping[agent_profile.game] nnet = CheckersResNNetDistributed(game.get_observation_size()[0], game.get_observation_size()[1], game.get_observation_size()[2], game.get_action_size(), horovod_distributed=True) agent_nnet = AgentNNet(nnet) if agent_profile == EnvironmentSelector.CHECKERS_AGENT_TRAIN_RCNN_DISTRIBUTED: return AgentMCTS(agent_nnet, exp_rate=AgentMCTS.EXPLORATION_RATE_INIT, numMCTSSims=1500, max_predict_time=5) elif agent_profile == EnvironmentSelector.CHECKERS_AGENT_TEST_AGENT_RCNN_DISTRIBUTED: return AgentMCTS(agent_nnet, exp_rate=AgentMCTS.NO_EXPLORATION, numMCTSSims=1500, max_predict_time=10) else: return None
def build_tpu_checkers_agent(self, agent_profile, native_multi_gpu_enabled=False): from games.checkers.nnet.CheckersResNNetTPU import CheckersResNNetTPU assert not native_multi_gpu_enabled, "ERROR: TPU NNet does not support native multi-gpu mode!" game = self.game_mapping[agent_profile.game] nnet = CheckersResNNetTPU(game.get_observation_size()[0], game.get_observation_size()[1], game.get_observation_size()[2], game.get_action_size()) agent_nnet = AgentNNet(nnet) if agent_profile == EnvironmentSelector.CHECKERS_AGENT_TRAIN_RCNN_TPU: return AgentMCTS(agent_nnet, exp_rate=AgentMCTS.EXPLORATION_RATE_INIT, numMCTSSims=200, max_predict_time=None, verbose=False, num_threads=1) else: return None
def build_native_checkers_rcnn_agent(self, agent_profile, native_multi_gpu_enabled=False): game = self.game_mapping[agent_profile.game] if not native_multi_gpu_enabled: nnet = CheckersResNNet(game.get_observation_size()[0], game.get_observation_size()[1], game.get_observation_size()[2], game.get_action_size()) else: nnet = CheckersResNNet(game.get_observation_size()[0], game.get_observation_size()[1], game.get_observation_size()[2], game.get_action_size(), multi_gpu=True, multi_gpu_n=len(GPUtil.getGPUs())) agent_nnet = AgentNNet(nnet) if agent_profile == EnvironmentSelector.CHECKERS_AGENT_TRAIN_RCNN_DEFAULT: return AgentMCTS(agent_nnet, exp_rate=AgentMCTS.EXPLORATION_RATE_INIT, numMCTSSims=1500, max_predict_time=3, num_threads=1) elif agent_profile == EnvironmentSelector.CHECKERS_AGENT_TEST_AGENT_RCNN_DEFAULT: return AgentMCTS(agent_nnet, exp_rate=AgentMCTS.NO_EXPLORATION, numMCTSSims=1500, max_predict_time=10, num_threads=2, verbose=True) else: return None
def build_asymmetric_sub_watten_evaluate_agent( self, agent_profile, native_multi_gpu_enabled=False): game = self.game_mapping[agent_profile.game] x, y = game.get_observation_size() nnet = AsymmetricSubWattenNNet(x, y, 1, game.get_action_size()) agent_nnet = AgentNNet(nnet) return agent_nnet
def build_sub_watten_evaluate_agent(self, agent_profile, native_multi_gpu_enabled=False): game = self.game_mapping[agent_profile.game] x, y = game.get_observation_size() if agent_profile == EnvironmentSelector.SUB_WATTEN_AGENT_EVALUATE: nnet = SubWattenNNet(x, y, 1, game.get_action_size()) elif agent_profile == EnvironmentSelector.SUB_WATTEN_AGENT_EVALUATE_SIMPLE: nnet = SubWattenSimplerNNet(x, y, 1, game.get_action_size()) agent_nnet = AgentNNet(nnet) return agent_nnet
def build_durak_train_agent(self, agent_profile, native_multi_gpu_enabled=False): game = self.game_mapping[agent_profile.game] x, y = game.get_observation_size() nnet = DurakNNet(x, y, 1, game.get_action_size()) agent_nnet = AgentNNet(nnet) if agent_profile == EnvironmentSelector.DURAK_AGENT_TRAIN: return AgentMCTS(agent_nnet, exp_rate=AgentMCTS.EXPLORATION_RATE_MEDIUM, numMCTSSims=100, max_predict_time=10) return None
def build_tictactoe_train_agent(self, agent_profile, native_multi_gpu_enabled=False): game = self.game_mapping[agent_profile.game] nnet = TicTacToeNNet(game.get_observation_size()[0], game.get_observation_size()[1], game.get_observation_size()[2], game.get_action_size()) agent_nnet = AgentNNet(nnet) if agent_profile == EnvironmentSelector.TICTACTOE_AGENT_TRAIN: return AgentMCTS(agent_nnet, exp_rate=AgentMCTS.EXPLORATION_RATE_MEDIUM, numMCTSSims=100, max_predict_time=10) return None
def test_nn_agent_prediction(self): sub_watten_game = WattenSubGame() clone_sub_watten_game = sub_watten_game.clone() x, y = sub_watten_game.get_observation_size() nnet = SubWattenNNet(x, y, 1, sub_watten_game.get_action_size()) agent_nnet = AgentNNet(nnet) agent_nnet.load("../../sub_watten/training/best.h5") pi_values, v = agent_nnet.predict(sub_watten_game, sub_watten_game.get_cur_player()) clone_pi_values, clone_v = agent_nnet.predict( clone_sub_watten_game, clone_sub_watten_game.get_cur_player()) self.assertEqual(pi_values.all(), clone_pi_values.all()) self.assertEqual(v, clone_v)
def sub_watten_non_human_agent_for_total_watten(self): game = WattenSubGame() x, y = game.get_observation_size() nnet = SubWattenNNet(x, y, 1, game.get_action_size()) agent_nnet = AgentNNet(nnet) print('Building sub_watten non human agent for total_watten') # load here best sub_watten model try: agent_nnet.load("games/sub_watten/training/default_nn/best.h5") except OSError: print("File not found with games/sub_watten/training/best.h5") print( "Maybe you are creating an agent for test purposes. I'll try to load the model from a different path" ) agent_nnet.load("../../sub_watten/training/default_nn/best.h5") return agent_nnet