示例#1
0
def evaluate_model_at_gamestate(gamestate, model):
    """
    Evaluates the model at a given GameState
    :param gamestate: GameState: represents the state of the game
    :return: int: value, list: policy
    """
    # Transform full board into 2 boards of 0s and 1s
    board = gamestate.board
    player1_board = np.array(
        [[0 if val == 0 or val == 2 else 1 for val in row] for row in board])
    player2_board = np.array(
        [[0 if val == 0 or val == 1 else 1 for val in row] for row in board])

    # Player is the player who created this state (NOT next)
    if other_player(gamestate.next_player) == 1:
        channels = [player1_board, player2_board]
    else:
        channels = [player2_board, player1_board]

    x_train = np.stack(channels).reshape(1, 2, 8, 8)

    value, policy = _evaluate(x_train, load_model(model))
    value_np = value.detach().numpy(
    )  # value.cpu().detach().numpy() when running remotely
    policy_np = policy.detach().numpy(
    )  # policy.cpu().detach().numpy() when running remotely

    return value_np[0], np.exp(policy_np.flatten().reshape(8, 8))
示例#2
0
文件: run.py 项目: kedillon/othello
    def export_game_data(self, path):
        """
        Appends game data to a json file.
        :param path: string: path to json file.
        """
        game_meta = [
            {
                "created_by": other_player(state[0].next_player),
                "board": state[0].board,
                "move_visits": state[1],
                "winloss": state[2]
            }
            for state in self.all_game_states
        ]

        try:
            with open(path) as infile:
                data = json.load(infile)
                data.extend(game_meta)
                print("TRAINING SET SIZE: {}".format(len(data)))

            with open(path, 'w') as outfile:
                json.dump(data, outfile)
        except:
            with open(path, 'w+') as outfile:
                json.dump(game_meta, outfile)
示例#3
0
文件: game.py 项目: kedillon/othello
 def promising(r, c):
     """
     Check that piece at row r and column c was played by opponent.
     :param r: int: Represents the row.
     :param c: int: Represents the column.
     :return: bool. True if piece was placed by opponent, False otherwise.
     """
     if in_bounds(r, c) and \
             self.board[r][c] == other_player(action.player):
         return True
     return False
示例#4
0
文件: run.py 项目: kedillon/othello
    def store_game_result(self, result):
        """
        Adds the result to each training example for this game.
        :param result: int: win/loss value for the game.
        """
        for state in self.all_game_states:

            # Other player created this state, other player won
            if other_player(state[0].next_player) == result:
                state.append(1)
            # This player created this state, other player won
            elif state[0].next_player == result:
                state.append(0)
            else:
                state.append(0.5)
示例#5
0
文件: game.py 项目: kedillon/othello
    def game_result(self):
        """
        Returns player who won (1 or 2) or None if winner is unknown.
        :return: int: winning player
        """
        next_player_legal_moves = self.get_player_legal_moves(self.next_player)
        other_legal_moves = self.get_player_legal_moves(
            other_player(self.next_player))

        # Neither player can make legal moves
        if not next_player_legal_moves and not other_legal_moves:
            total_next = sum(row.count(self.next_player) for row in self.board)
            total_other = sum(
                row.count(other_player(self.next_player))
                for row in self.board)

            if total_next > total_other:
                return self.next_player
            elif total_other > total_next:
                return other_player(self.next_player)
            else:
                return 0
        # At least one player can still make a legal move
        return None
示例#6
0
    def backpropagate(self, game_result):
        """
        Updates visit and win values for all nodes in path.
        :param game_result: int: result of the game after rollout.
        """
        self.visit_count += 1
        # other_player would have created this state
        # Add to other player's wins
        if game_result == other_player(self.state.next_player):
            self.win_score += 1
        # Game ended in a Draw
        elif game_result == 0:
            self.win_score += 0.5

        # Backpropogate from parent node
        if self.parent:
            self.parent.backpropagate(game_result)
示例#7
0
文件: game.py 项目: kedillon/othello
    def move(self, action):
        """
        Make a move and return updated game state.
        :param action: Move: represents a move
        :return: GameState: updated game state
        """
        # If no move was made by player, copy state and switch player.
        # TODO: In future iterations, the "pass" move should be treated as
        #  its own kind of move. Currently, the nn ignores passes and the mcts
        #  doesn't backpropogate a pass. This could affect the model's loss
        #  significantly if players are passing often and this information is
        #  not being reflected.
        if action.row is None and action.col is None:
            new_state = copy.deepcopy(self.board)
            return GameState(other_player(action.player), new_state)

        if not self.move_is_legal(action):
            raise Exception("Illegal move")
            return None
        new_state = copy.deepcopy(self.board)
        new_state[action.row][action.col] = action.player

        # Checks that piece was placed by opponent
        def promising(r, c):
            """
            Check that piece at row r and column c was played by opponent.
            :param r: int: Represents the row.
            :param c: int: Represents the column.
            :return: bool. True if piece was placed by opponent, False otherwise.
            """
            if in_bounds(r, c) and \
                    self.board[r][c] == other_player(action.player):
                return True
            return False

        def flip_in_direction(direction):
            """
            Flip opponent's pieces in all valid directions.
            :param direction: string: Represents the direction.
                   One of: ["N", "S", "E", "W", "NE", "NW", "SE", "SW"]
            :return: GameState: Represents the updated GameState after flips.
            """
            row, col = increment_row_col(action.row, action.col, direction)
            if promising(row, col):
                flips = []
                while promising(row, col):
                    flips.append([row, col])
                    row, col = increment_row_col(row, col, direction)
                if in_bounds(row,
                             col) and self.board[row][col] == action.player:
                    for location in flips:
                        new_state[location[0]][location[1]] = action.player

        flip_in_direction("N")
        flip_in_direction("S")
        flip_in_direction("E")
        flip_in_direction("W")
        flip_in_direction("NE")
        flip_in_direction("NW")
        flip_in_direction("SE")
        flip_in_direction("SW")

        return GameState(other_player(action.player), new_state)