def test_lines_wins_for_player_0(self):
     gs = self.gs
     for i in range(3):
         gs = Power4GameState()
         gs.step(0, i * 3 + 0)
         gs.step(1, ((i + 1) % 3) * 3 + 0)
         gs.step(0, i * 3 + 1)
         gs.step(1, ((i + 1) % 3) * 3 + 1)
         _, rew, term = gs.step(0, i * 3 + 2)
         assert (rew, term) == (1, True)
 def test_columns_wins_for_player_0(self):
     gs = self.gs
     for i in range(3):
         gs = Power4GameState()
         gs.step(0, i + 0)
         gs.step(1, ((i + 1) % 3) + 0)
         gs.step(0, i + 1 * 3)
         gs.step(1, ((i + 1) % 3) + 1 * 3)
         _, rew, term = gs.step(0, i + 2 * 3)
         assert (rew, term) == (1, True)
    def run(
        self,
        max_rounds: int = -1,
        initial_game_state: Power4GameState = Power4GameState()
    ) -> 'Tuple[float]':
        round_id = 0

        time()
        score_history = np.array((0, 0, 0))
        while round_id < max_rounds or round_id == -1:
            if round_id == 0:
                gs = initial_game_state.copy_game_state()
            else:
                gs.newGameState()
            terminal = False
            while not terminal:
                current_player = gs.get_current_player_id()
                action_ids = gs.get_available_actions_id_for_player()
                info_state = gs.get_information_state_for_player(
                    current_player)
                action = self.agents[current_player].act(
                    current_player, info_state, action_ids)

                # WARNING : Two Players Zero Sum Game Hypothesis
                (gs, score, terminal) = gs.step(current_player, action)
                self.agents[current_player].observe(
                    (1 if current_player == 0 else -1) * score, terminal)

                if terminal:
                    score_history += (1 if score == 1 else 0,
                                      1 if score == -1 else 0,
                                      1 if score == 0 else 0)
                    other_player = (current_player + 1) % 2
                    self.agents[other_player].observe(
                        (1 if other_player == 0 else -1) * score, terminal)

            if round_id != -1:
                round_id += 1

        return tuple(score_history)
    def test_diagonal_wins_for_player_0(self):
        gs = self.gs
        gs.step(0, 0)
        gs.step(1, 1)
        gs.step(0, 4)
        gs.step(1, 5)
        _, rew, term = gs.step(0, 8)

        gs = Power4GameState()
        gs.step(0, 2)
        gs.step(1, 1)
        gs.step(0, 4)
        gs.step(1, 5)
        _, rew, term = gs.step(0, 6)
        assert (rew, term) == (1, True)
Exemplo n.º 5
0
 def create_game_state_from_information_state(self):
     from games.power4.Power4GameState import Power4GameState
     gs = Power4GameState()
     gs.board = self.board.copy()
     gs.current_player = self.current_player
     return gs
    def run(
        self,
        max_rounds: int = -1,
        initial_game_state: Power4GameState = Power4GameState()
    ) -> 'Tuple[float]':
        round_id = 0

        # AJOUT LOG#
        filename = "../logs/" + type(self.agents[0]).__name__ + "_VS_" + type(
            self.agents[1]).__name__ + str(time.time()) + ".txt"
        logs_scores_file = open(filename, "w")
        logs_scores_file.close()
        print(type(self.agents[1]).__name__)
        #####

        score_history = np.array((0, 0, 0))
        while round_id < max_rounds or round_id == -1:
            gs = initial_game_state.copy_game_state()
            terminal = False
            while not terminal:
                current_player = gs.get_current_player_id()
                action_ids = gs.get_available_actions_id_for_player(
                    current_player)
                info_state = gs.get_information_state_for_player(
                    current_player)

                #### AJOUT TIMER
                if round_id == 1000 or round_id == 10000 or round_id == 100000 or round_id == 1000000:
                    timer = time.time()
                ######

                action = self.agents[current_player].act(
                    current_player, info_state, action_ids)
                ###AJOUT
                if round_id == 1000 or round_id == 10000 or round_id == 100000 or round_id == 1000000:
                    logs_scores_file = open(filename, "a")
                    logs_scores_file.write("TIMER " + str(round_id) +
                                           " PLAYER " + str(current_player) +
                                           " == " + str(time.time() - timer) +
                                           "\n")
                    logs_scores_file.close()
                ####

                # WARNING : Two Players Zero Sum Game Hypothesis
                (gs, score, terminal) = gs.step(current_player, action)
                self.agents[current_player].observe(
                    (1 if current_player == 0 else -1) * score, terminal)

                if terminal:
                    score_history += (1 if score == 1 else 0,
                                      1 if score == -1 else 0,
                                      1 if score == 0 else 0)
                    other_player = (current_player + 1) % 2
                    self.agents[other_player].observe(
                        (1 if other_player == 0 else -1) * score, terminal)

            if round_id != -1:
                round_id += 1
                if round_id == 1000 or round_id == 10000 or round_id == 100000 or round_id == 1000000:
                    logs_scores_file = open(filename, "a")
                    logs_scores_file.write(
                        "CHECKPOINT " + str(round_id) + " == " +
                        str(score_history /
                            self.print_and_reset_score_history_threshold) +
                        "\n")
                    logs_scores_file.close()
                if self.print_and_reset_score_history_threshold is not None and \
                        round_id % self.print_and_reset_score_history_threshold == 0:
                    print(score_history /
                          self.print_and_reset_score_history_threshold)
                    if self.prev_history is not None and \
                            score_history[0] == self.prev_history[0] and \
                            score_history[1] == self.prev_history[1] and \
                            score_history[2] == self.prev_history[2]:
                        self.stuck_on_same_score += 1
                    else:
                        self.prev_history = score_history
                        self.stuck_on_same_score = 0
                    if (self.
                            replace_player1_with_commandline_after_similar_results
                            is not None and self.stuck_on_same_score >= self.
                            replace_player1_with_commandline_after_similar_results
                        ):
                        self.agents = (CommandLineAgent(), self.agents[1])
                        self.stuck_on_same_score = 0
                    score_history = np.array((0, 0, 0))
        logs_scores_file.close()
        return tuple(score_history)
Exemplo n.º 7
0
    def run(
        self,
        max_rounds: int = -1,
        initial_game_state: Power4GameState = Power4GameState()
    ) -> 'Tuple[float]':
        round_id = 0

        time()
        score_history = np.array((0, 0, 0))
        while round_id < max_rounds or round_id == -1:
            if round_id == 0:
                gs = initial_game_state.copy_game_state()
            else:
                gs.newGameState()
            terminal = False
            while not terminal:
                # print(gs)
                # sleep(0.1)
                current_player = gs.get_current_player_id()
                action_ids = gs.get_available_actions_id_for_player()
                info_state = gs.get_information_state_for_player(
                    current_player)
                action_time = time()
                action = self.agents[current_player].act(
                    current_player, info_state, action_ids)
                self.mean_action_duration_sum[current_player] += time(
                ) - action_time

                # WARNING : Two Players Zero Sum Game Hypothesis
                (gs, score, terminal) = gs.step(current_player, action)
                self.agents[current_player].observe(
                    (1 if current_player == 0 else -1) * score, terminal)

                if terminal:
                    score_history += (1 if score == 1 else 0,
                                      1 if score == -1 else 0,
                                      1 if score == 0 else 0)
                    other_player = (current_player + 1) % 2
                    self.agents[other_player].observe(
                        (1 if other_player == 0 else -1) * score, terminal)

            if round_id != -1:
                round_id += 1
                if self.log_and_reset_score_history_threshold is not None and \
                        round_id % self.log_and_reset_score_history_threshold == 0:
                    print(score_history /
                          self.log_and_reset_score_history_threshold)
                    if self.prev_history is not None and \
                            score_history[0] == self.prev_history[0] and \
                            score_history[1] == self.prev_history[1] and \
                            score_history[2] == self.prev_history[2]:
                        self.stuck_on_same_score += 1
                    else:
                        self.prev_history = score_history
                        self.stuck_on_same_score = 0
                    if (self.
                            replace_player1_with_commandline_after_similar_results
                            is not None and self.stuck_on_same_score >= self.
                            replace_player1_with_commandline_after_similar_results
                        ):
                        self.agents = (CommandLineAgent(), self.agents[1])
                        self.stuck_on_same_score = 0

                    self.writerTimeAgent1.add_summary(
                        tf.Summary(value=[
                            tf.Summary.Value(
                                tag="Time",
                                simple_value=self.mean_action_duration_sum[0] /
                                round_id)
                        ], ), round_id)
                    self.writerTimeAgent1.flush()
                    self.writerTimeAgent2.add_summary(
                        tf.Summary(value=[
                            tf.Summary.Value(
                                tag="Time",
                                simple_value=self.mean_action_duration_sum[1] /
                                round_id)
                        ], ), round_id)
                    self.writerTimeAgent2.flush()
                    self.writerAgent1.add_summary(
                        tf.Summary(value=[
                            tf.Summary.Value(
                                tag="Score",
                                simple_value=score_history[0] /
                                self.log_and_reset_score_history_threshold)
                        ], ), round_id)
                    self.writerAgent1.flush()
                    self.writerAgent2.add_summary(
                        tf.Summary(value=[
                            tf.Summary.Value(
                                tag="Score",
                                simple_value=score_history[1] /
                                self.log_and_reset_score_history_threshold)
                        ], ), round_id)
                    self.writerAgent2.flush()
                    self.writerDraw.add_summary(
                        tf.Summary(value=[
                            tf.Summary.Value(
                                tag="Score",
                                simple_value=score_history[2] /
                                self.log_and_reset_score_history_threshold)
                        ], ), round_id)
                    self.writerDraw.flush()

                    self.mean_action_duration_sum = {0: 0.0, 1: 0.0}
                    self.mean_accumulated_reward_sum = {0: 0.0, 1: 0.0}
                    score_history = np.array((0, 0, 0))
        return tuple(score_history)
 def setUp(self):
     self.gs = Power4GameState()