Exemplo n.º 1
0
    def __init__(self, game):
        super().__init__(name="SUB WATTEN BAGGING AGENT")
        self.game = game

        x, y = game.get_observation_size()
        nnet1 = SubWattenNNet(x, y, 1, game.get_action_size())

        nnet2 = AsymmetricSubWattenNNet(x, y, 1, game.get_action_size())

        # Setting up the symmetric and asymmetric nnet

        self.agent_symmetric = AgentNNet(nnet1)
        self.agent_asymmetric = AgentNNet(nnet2)

        try:
            self.agent_symmetric.load("games/sub_watten/training/best.h5")
            self.agent_asymmetric.load(
                "games/asymmetric_sub_watten/training/v2/best.h5")
        except OSError:
            print("File not found with games/sub_watten/training/best.h5")
            print(
                "Maybe you are creating an agent for test purposes. I'll try to load the model from a different path"
            )
            self.agent_symmetric.load("../../sub_watten/training/best.h5")
            self.agent_asymmetric.load(
                "../../asymmetric_sub_watten/training/v2/best.h5")
    def build_hand_watten_evaluate_agent(self,
                                         agent_profile,
                                         native_multi_gpu_enabled=False):
        game = self.game_mapping[agent_profile.game]

        agent_nnet = None

        if agent_profile == EnvironmentSelector.HAND_WATTEN_EVALUATE:
            x, y = game.get_observation_size()
            nnet = HandWattenNNet(x, y, 1, game.get_action_size())
            agent_nnet = AgentNNet(nnet)
        elif agent_profile == EnvironmentSelector.HAND_WATTEN_EVALUATE_M_M:
            x, y = game.get_observation_size()
            nnet = MediumMediumNNet(x, y, 1, game.get_action_size())
            agent_nnet = AgentNNet(nnet)
        elif agent_profile == EnvironmentSelector.HAND_WATTEN_EVALUATE_S_S:
            x, y = game.get_observation_size()
            nnet = EasyEasyNNet(x, y, 1, game.get_action_size())
            agent_nnet = AgentNNet(nnet)
        elif agent_profile == EnvironmentSelector.HAND_WATTEN_EVALUATE_CNN:
            x, y, z = game.get_observation_size()
            nnet = CNNWatten(x, y, z, game.get_action_size())
            agent_nnet = AgentNNet(nnet)

        return agent_nnet
    def build_hand_watten_train_agent(self,
                                      agent_profile,
                                      native_multi_gpu_enabled=False):
        game = self.game_mapping[agent_profile.game]

        if agent_profile == EnvironmentSelector.HAND_WATTEN_TRAIN:
            x, y = game.get_observation_size()
            nnet = HandWattenNNet(x, y, 1, game.get_action_size())
            agent_nnet = AgentNNet(nnet)

            print("Configuring build_hand_watten_train_agent...")
            return AgentMCTS(agent_nnet,
                             exp_rate=AgentMCTS.EXPLORATION_RATE_MEDIUM,
                             numMCTSSims=30,
                             max_predict_time=10,
                             num_threads=1)
        elif agent_profile == EnvironmentSelector.HAND_WATTEN_TRAIN_S_S:
            x, y = game.get_observation_size()
            nnet = EasyEasyNNet(x, y, 1, game.get_action_size())
            agent_nnet = AgentNNet(nnet)
            print("Configuring build_hand_watten_s_s...")
            return AgentMCTS(agent_nnet,
                             exp_rate=AgentMCTS.EXPLORATION_RATE_MEDIUM,
                             numMCTSSims=30,
                             max_predict_time=10,
                             num_threads=1)
        elif agent_profile == EnvironmentSelector.HAND_WATTEN_TRAIN_M_M:
            x, y = game.get_observation_size()
            nnet = MediumMediumNNet(x, y, 1, game.get_action_size())
            agent_nnet = AgentNNet(nnet)
            print("Configuring build_hand_watten_m_m...")
            return AgentMCTS(agent_nnet,
                             exp_rate=AgentMCTS.EXPLORATION_RATE_MEDIUM,
                             numMCTSSims=30,
                             max_predict_time=10,
                             num_threads=1)
        elif agent_profile == EnvironmentSelector.HAND_WATTEN_TRAIN_CNN:
            x, y, z = game.get_observation_size()
            nnet = CNNWatten(x, y, z, game.get_action_size())
            agent_nnet = AgentNNet(nnet)
            print("Configuring build hand watten CNN....")
            return AgentMCTS(agent_nnet,
                             exp_rate=AgentMCTS.EXPLORATION_RATE_MEDIUM,
                             numMCTSSims=30,
                             max_predict_time=10,
                             num_threads=1)

        return None
    def build_watten_train_4_512_agent(self,
                                       agent_profile,
                                       native_multi_gpu_enabled=False):

        game = self.game_mapping[agent_profile.game]

        x, y = game.get_observation_size()
        nnet = WattenNNet4x512(x, y, 1, game.get_action_size())

        agent_nnet = AgentNNet(nnet)

        if agent_profile == EnvironmentSelector.WATTEN_AGENT_4_512_TRAIN:
            print("Configuring build_watten_train_4_512_agent...")
            return AgentMCTS(agent_nnet,
                             exp_rate=AgentMCTS.EXPLORATION_RATE_MEDIUM,
                             numMCTSSims=100,
                             max_predict_time=10,
                             num_threads=16)
        elif agent_profile == EnvironmentSelector.WATTEN_AGENT_4_512_EVALUATE:
            print("Configuring build_watten_evaluate_4_512_agent...")
            return AgentMCTS(agent_nnet,
                             exp_rate=AgentMCTS.EXPLORATION_RATE_MEDIUM,
                             numMCTSSims=30,
                             max_predict_time=10,
                             num_threads=16,
                             name="build_watten_evaluate_4_512_agent")
        elif agent_profile == EnvironmentSelector.WATTEN_AGENT_HUMAN:
            return WattenHumanAgent(game)
        elif agent_profile == EnvironmentSelector.WATTEN_AGENT_NNET:
            agent_nnet.load(
                os.path.abspath(
                    "../games/watten/training/best-4-512-new-4.h5"))
            return agent_nnet

        return None
    def build_watten_train_big_agent(self,
                                     agent_profile,
                                     native_multi_gpu_enabled=False):

        game = self.game_mapping[agent_profile.game]

        x, y = game.get_observation_size()
        nnet = WattenNNetFirstLayerBig(x, y, 1, game.get_action_size())

        agent_nnet = AgentNNet(nnet)

        if agent_profile == EnvironmentSelector.WATTEN_AGENT_BIG_TRAIN:
            print("Configuring build_watten_train_big_agent...")
            return AgentMCTS(agent_nnet,
                             exp_rate=AgentMCTS.EXPLORATION_RATE_MEDIUM,
                             numMCTSSims=100,
                             max_predict_time=10,
                             num_threads=16)
        elif agent_profile == EnvironmentSelector.WATTEN_AGENT_BIG_EVALUATE:
            print("Configuring build_watten_evaluate_big_agent...")
            return AgentMCTS(agent_nnet,
                             exp_rate=AgentMCTS.EXPLORATION_RATE_MEDIUM,
                             numMCTSSims=2,
                             max_predict_time=10,
                             num_threads=16,
                             name="build_watten_evaluate_big_agent")
        elif agent_profile == EnvironmentSelector.WATTEN_AGENT_HUMAN:
            return WattenHumanAgent(game)

        return None
    def build_sub_watten_train_agent(self,
                                     agent_profile,
                                     native_multi_gpu_enabled=False):

        game = self.game_mapping[agent_profile.game]

        x, y = game.get_observation_size()

        if agent_profile == EnvironmentSelector.SUB_WATTEN_AGENT_TRAIN:
            nnet = SubWattenNNet(x, y, 1, game.get_action_size())
        elif agent_profile == EnvironmentSelector.SUB_WATTEN_AGENT_TRAIN_SIMPLE:
            nnet = SubWattenSimplerNNet(x, y, 1, game.get_action_size())

        agent_nnet = AgentNNet(nnet)

        if agent_profile == EnvironmentSelector.SUB_WATTEN_AGENT_TRAIN or \
                agent_profile == EnvironmentSelector.SUB_WATTEN_AGENT_TRAIN_SIMPLE:
            print("Configuring build_sub_watten_train_agent...")
            return AgentMCTS(agent_nnet,
                             exp_rate=AgentMCTS.EXPLORATION_RATE_MEDIUM,
                             numMCTSSims=100,
                             max_predict_time=10,
                             num_threads=1)

        return None
Exemplo n.º 7
0
    def build_horovod_checkers_agent(self,
                                     agent_profile,
                                     native_multi_gpu_enabled=False):
        from games.checkers.nnet.CheckersResNNetDistributed import CheckersResNNetDistributed

        assert not native_multi_gpu_enabled, "ERROR: Horovod NNet does not support native multi-gpu mode!"

        game = self.game_mapping[agent_profile.game]

        nnet = CheckersResNNetDistributed(game.get_observation_size()[0],
                                          game.get_observation_size()[1],
                                          game.get_observation_size()[2],
                                          game.get_action_size(),
                                          horovod_distributed=True)

        agent_nnet = AgentNNet(nnet)

        if agent_profile == EnvironmentSelector.CHECKERS_AGENT_TRAIN_RCNN_DISTRIBUTED:
            return AgentMCTS(agent_nnet,
                             exp_rate=AgentMCTS.EXPLORATION_RATE_INIT,
                             numMCTSSims=1500,
                             max_predict_time=5)
        elif agent_profile == EnvironmentSelector.CHECKERS_AGENT_TEST_AGENT_RCNN_DISTRIBUTED:
            return AgentMCTS(agent_nnet,
                             exp_rate=AgentMCTS.NO_EXPLORATION,
                             numMCTSSims=1500,
                             max_predict_time=10)
        else:
            return None
Exemplo n.º 8
0
    def build_tpu_checkers_agent(self,
                                 agent_profile,
                                 native_multi_gpu_enabled=False):
        from games.checkers.nnet.CheckersResNNetTPU import CheckersResNNetTPU

        assert not native_multi_gpu_enabled, "ERROR: TPU NNet does not support native multi-gpu mode!"

        game = self.game_mapping[agent_profile.game]

        nnet = CheckersResNNetTPU(game.get_observation_size()[0],
                                  game.get_observation_size()[1],
                                  game.get_observation_size()[2],
                                  game.get_action_size())

        agent_nnet = AgentNNet(nnet)

        if agent_profile == EnvironmentSelector.CHECKERS_AGENT_TRAIN_RCNN_TPU:
            return AgentMCTS(agent_nnet,
                             exp_rate=AgentMCTS.EXPLORATION_RATE_INIT,
                             numMCTSSims=200,
                             max_predict_time=None,
                             verbose=False,
                             num_threads=1)
        else:
            return None
Exemplo n.º 9
0
    def build_native_checkers_rcnn_agent(self,
                                         agent_profile,
                                         native_multi_gpu_enabled=False):
        game = self.game_mapping[agent_profile.game]

        if not native_multi_gpu_enabled:
            nnet = CheckersResNNet(game.get_observation_size()[0],
                                   game.get_observation_size()[1],
                                   game.get_observation_size()[2],
                                   game.get_action_size())
        else:
            nnet = CheckersResNNet(game.get_observation_size()[0],
                                   game.get_observation_size()[1],
                                   game.get_observation_size()[2],
                                   game.get_action_size(),
                                   multi_gpu=True,
                                   multi_gpu_n=len(GPUtil.getGPUs()))

        agent_nnet = AgentNNet(nnet)

        if agent_profile == EnvironmentSelector.CHECKERS_AGENT_TRAIN_RCNN_DEFAULT:
            return AgentMCTS(agent_nnet,
                             exp_rate=AgentMCTS.EXPLORATION_RATE_INIT,
                             numMCTSSims=1500,
                             max_predict_time=3,
                             num_threads=1)
        elif agent_profile == EnvironmentSelector.CHECKERS_AGENT_TEST_AGENT_RCNN_DEFAULT:
            return AgentMCTS(agent_nnet,
                             exp_rate=AgentMCTS.NO_EXPLORATION,
                             numMCTSSims=1500,
                             max_predict_time=10,
                             num_threads=2,
                             verbose=True)
        else:
            return None
    def build_asymmetric_sub_watten_evaluate_agent(
            self, agent_profile, native_multi_gpu_enabled=False):
        game = self.game_mapping[agent_profile.game]

        x, y = game.get_observation_size()
        nnet = AsymmetricSubWattenNNet(x, y, 1, game.get_action_size())

        agent_nnet = AgentNNet(nnet)

        return agent_nnet
    def build_sub_watten_evaluate_agent(self,
                                        agent_profile,
                                        native_multi_gpu_enabled=False):
        game = self.game_mapping[agent_profile.game]

        x, y = game.get_observation_size()
        if agent_profile == EnvironmentSelector.SUB_WATTEN_AGENT_EVALUATE:
            nnet = SubWattenNNet(x, y, 1, game.get_action_size())
        elif agent_profile == EnvironmentSelector.SUB_WATTEN_AGENT_EVALUATE_SIMPLE:
            nnet = SubWattenSimplerNNet(x, y, 1, game.get_action_size())

        agent_nnet = AgentNNet(nnet)

        return agent_nnet
    def build_durak_train_agent(self,
                                agent_profile,
                                native_multi_gpu_enabled=False):

        game = self.game_mapping[agent_profile.game]

        x, y = game.get_observation_size()
        nnet = DurakNNet(x, y, 1, game.get_action_size())

        agent_nnet = AgentNNet(nnet)

        if agent_profile == EnvironmentSelector.DURAK_AGENT_TRAIN:
            return AgentMCTS(agent_nnet,
                             exp_rate=AgentMCTS.EXPLORATION_RATE_MEDIUM,
                             numMCTSSims=100,
                             max_predict_time=10)
        return None
    def build_tictactoe_train_agent(self,
                                    agent_profile,
                                    native_multi_gpu_enabled=False):

        game = self.game_mapping[agent_profile.game]

        nnet = TicTacToeNNet(game.get_observation_size()[0],
                             game.get_observation_size()[1],
                             game.get_observation_size()[2],
                             game.get_action_size())

        agent_nnet = AgentNNet(nnet)

        if agent_profile == EnvironmentSelector.TICTACTOE_AGENT_TRAIN:
            return AgentMCTS(agent_nnet,
                             exp_rate=AgentMCTS.EXPLORATION_RATE_MEDIUM,
                             numMCTSSims=100,
                             max_predict_time=10)
        return None
    def test_nn_agent_prediction(self):
        sub_watten_game = WattenSubGame()

        clone_sub_watten_game = sub_watten_game.clone()

        x, y = sub_watten_game.get_observation_size()
        nnet = SubWattenNNet(x, y, 1, sub_watten_game.get_action_size())

        agent_nnet = AgentNNet(nnet)

        agent_nnet.load("../../sub_watten/training/best.h5")

        pi_values, v = agent_nnet.predict(sub_watten_game,
                                          sub_watten_game.get_cur_player())

        clone_pi_values, clone_v = agent_nnet.predict(
            clone_sub_watten_game, clone_sub_watten_game.get_cur_player())

        self.assertEqual(pi_values.all(), clone_pi_values.all())
        self.assertEqual(v, clone_v)
    def sub_watten_non_human_agent_for_total_watten(self):

        game = WattenSubGame()

        x, y = game.get_observation_size()
        nnet = SubWattenNNet(x, y, 1, game.get_action_size())

        agent_nnet = AgentNNet(nnet)

        print('Building sub_watten non human agent for total_watten')

        # load here best sub_watten model
        try:
            agent_nnet.load("games/sub_watten/training/default_nn/best.h5")
        except OSError:
            print("File not found with games/sub_watten/training/best.h5")
            print(
                "Maybe you are creating an agent for test purposes. I'll try to load the model from a different path"
            )
            agent_nnet.load("../../sub_watten/training/default_nn/best.h5")

        return agent_nnet