Exemplo n.º 1
0
    def select_move(self, game_state):
        candidates = []
        for r in range(1, game_state.board.num_rows + 1):
            for c in range(1, game_state.board.num_cols + 1):
                candidate = Point(row=r, col=c)
                if game_state.is_valid_move(
                        Move.play(candidate)) and not is_point_an_eye(
                            game_state.board, candidate,
                            game_state.next_player):
                    candidates.append(candidate)
        if not candidates:
            return Move.pass_turn()

        return Move.play(random.choice(candidates))
Exemplo n.º 2
0
    def select_move(self, game_state):
        """Choose a random valid move that preserves our own eyes."""
        dim = (game_state.board.num_rows, game_state.board.num_cols)
        if dim != self.dim:
            self._update_cache(dim)

        idx = np.arange(len(self.point_cache))
        np.random.shuffle(idx)
        for i in idx:
            p = self.point_cache[i]
            if game_state.is_valid_move(Move.play(p)) and \
                    not is_point_an_eye(game_state.board,
                                        p,
                                        game_state.next_player):
                return Move.play(p)
        return Move.pass_turn()
Exemplo n.º 3
0
 def select_move(self, game_state):
     num_moves = self.encoder.board_width * self.encoder.board_height
     move_probs = self.predict(game_state)
     move_probs = move_probs**3  # <1>
     eps = 1e-6
     move_probs = np.clip(move_probs, eps, 1 - eps)  # <2>
     move_probs = move_probs / np.sum(move_probs)  # <3>
     # <1> Increase the distance between the move likely and least likely moves.
     # <2> Prevent move probs from getting stuck at 0 or 1
     # <3> Re-normalize to get another probability distribution.
     candidates = np.arange(num_moves)  # <1>
     ranked_moves = np.random.choice(candidates,
                                     num_moves,
                                     replace=False,
                                     p=move_probs)  # <2>
     for point_idx in ranked_moves:
         point = self.encoder.decode_point_index(point_idx)
         if game_state.is_valid_move(goboard.Move.play(point)) and \
                 not is_point_an_eye(game_state.board, point, game_state.next_player):  # <3>
             return goboard.Move.play(point)
     return goboard.Move.pass_turn()  # <4>
Exemplo n.º 4
0
    def select_move(self, game_state):
        num_moves = self._encoder.board_width * self._encoder.board_height

        board_tensor = self._encoder.encode(game_state)
        X = np.array([board_tensor])

        if np.random.random() < self._temperature:
            # Explore random moves.
            move_probs = np.ones(num_moves) / num_moves
        else:
            # Follow our current policy.
            move_probs = self._model.predict(X)[0]

        # Prevent move probs from getting stuck at 0 or 1.
        eps = 1e-5
        move_probs = np.clip(move_probs, eps, 1 - eps)
        # Re-normalize to get another probability distribution.
        move_probs = move_probs / np.sum(move_probs)

        # Turn the probabilities into a ranked list of moves.
        candidates = np.arange(num_moves)
        ranked_moves = np.random.choice(candidates,
                                        num_moves,
                                        replace=False,
                                        p=move_probs)
        for point_idx in ranked_moves:
            point = self._encoder.decode_point_index(point_idx)
            if game_state.is_valid_move(goboard.Move.play(point)) and \
                    not is_point_an_eye(game_state.board,
                                        point,
                                        game_state.next_player):
                if self._collector is not None:
                    self._collector.record_decision(state=board_tensor,
                                                    action=point_idx)
                return goboard.Move.play(point)
        # No legal, non-self-destructive moves less.
        return goboard.Move.pass_turn()