def value_policy(board: chess.Board):
    env = ChessEnv(board)
    game_over, score = env.is_game_over()
    if game_over:
        return score, []
    stockfish = Stockfish()
    value = stockfish.stockfish_eval(env.board, timeout=100)
    next_states = []
    for move in env.board.legal_moves:
        board_copy = env.board.copy()
        board_copy.push(move)
        next_states.append(board_copy)

    actions_value = []
    for state in next_states:
        actions_value.append(evaluate_state(state))

    policy = softmax(actions_value)

    index_list = [Config.MOVETOINDEX[move.uci()] for move in env.board.legal_moves]
    map = np.zeros((5120,))
    for index, pi in zip(index_list, policy):
        map[index] = pi
    assert policy.sum() > 0.999
    return value, map
def evaluate_state(board):
    # print(fen)
    env = ChessEnv(board=board)
    # env.step(move)
    game_over, score = env.is_game_over()
    if game_over:
        return score
    value = env.stockfish.stockfish_eval(env.board, timeout=100)
    return value
    def generate_game(self, model: PolicyValNetwork_Giraffe):
        np.random.seed()
        triplets = []
        step_game = 0
        temperature = 1
        # env = ChessEnv()
        # env.reset()
        game_over = False
        moves = 0
        # game_over, z = env.is_game_over(moves)
        env = ChessEnv()
        env.reset()
        root_node = Node(env, Config.EXPLORE_FACTOR)
        while not game_over:
            moves += 1
            step_game += 1
            if step_game == 50:
                temperature = 10e-6

            start = time.time()
            _, successor, root_node = MCTS(temp=temperature,
                                           network=model,
                                           root=root_node)
            game_over, z = root_node.env.is_game_over(moves, res_check=True)

            root_node = successor

            successor, root_node = human_play(
                root=root_node, explore_factor=Config.EXPLORE_FACTOR)

            #print("Calculated next move in {}ms".format(time.time() - start))
            feature = board_to_feature(root_node.env.board)
            #print('')
            #print(root_node.env.board)
            #print("Running on {} ".format(mp.current_process()))
            root_node = successor
            game_over, z = root_node.env.is_game_over(moves, res_check=True)

        return
    def run_tournament(self, candidate, candidate_alpha_scores,
                       incumbent_alpha_scores, _):
        moves = 0
        temperature = 10e-6

        p = np.random.binomial(1, 0.5) == 1
        white, black = (self.current_policy,
                        candidate) if p else (candidate, self.current_policy)
        env = ChessEnv()
        env.reset()
        root_node = Node(env, Config.EXPLORE_FACTOR)
        game_over = False

        while not game_over:
            if root_node.env.white_to_move:
                player = white
            else:
                player = black

            pi, successor, root_node = MCTS(temp=temperature,
                                            network=player,
                                            root=root_node)
            root_node = successor
            moves += 1
            game_over, z = root_node.env.is_game_over(moves, res_check=True)

        # from white perspective

        if white == candidate:
            candidate_alpha_scores.append(+z)
            incumbent_alpha_scores.append(-z)
            print("Candidate won!")
        else:
            candidate_alpha_scores.append(-z)
            incumbent_alpha_scores.append(+z)
            print("Incumbent won!")
def main():
    network = load_model(args.newnetwork)

    score_net = 0
    score_random = 0
    for game in range(args.numgames):
        moves = 0
        temperature = 10e-6
        black = None
        p = np.random.binomial(1, 0.5) == 1
        white = (network) if p else (None)
        if white == None:
            black = network

        env = ChessEnv()
        env.reset()
        root_node = Node(env, Config.EXPLORE_FACTOR)
        game_over = False

        while not game_over:
            if root_node.env.white_to_move:
                player = white
            else:
                player = black
            #print(root_node.env.board)
            start = time.time()
            if player == network:
                pi, successor, root_node = MCTS(temp=temperature,
                                                network=player,
                                                root=root_node)
                print("MCTS completed move {} in: {}".format(
                    moves,
                    time.time() - start))
                root_node = successor
            else:
                if (root_node.children == None):
                    root_node.children = [None] * len(root_node.legal_moves)

                move = np.random.randint(0, (len(root_node.legal_moves)))
                if (root_node.children[move] is None):
                    next_env = root_node.env.copy()

                    next_env.step(root_node.legal_moves[move])

                    root_node.children[move] = Node(next_env,
                                                    temperature,
                                                    parent=root_node,
                                                    child_id=move)
                root_node = root_node.children[move]
            moves = moves + 1

            game_over, z = root_node.env.is_game_over(moves, res_check=True)

        # from white perspective

        if white == network:
            if z >= 1:
                score_net += 1
            else:
                score_random += 1
        else:
            if z <= -1:
                score_net += 1
            else:
                score_random += 1

        print("Game {} complete. Net: {} Random: {}".format(
            game, score_net, score_random))

    print("New network score total wins: {} Average Score: {}".format(
        score_net, score_net / args.numgames))
    print("Random play score total wins: {} Average Score: {}".format(
        score_random, score_random / args.numgames))
示例#6
0
def main():
    old_network = PolicyValNetwork_Giraffe(pretrain=False)
    new_network, _ = load_model(args.newnetwork)
    if args.oldnetwork is None:
        list_of_files = glob.glob('./*.pt')
        if len(list_of_files) > 0:
            new_network = load_model(max(list_of_files, key=os.path.getctime))
            print('New network will be: {}'.format(new_network))
        else:
            print("No new network to test.")
            quit()

    score1 = 0
    score2 = 0
    for game in range(args.numgames):
        moves = 0
        temperature = 10e-6

        p = np.random.binomial(1, 0.5) == 1
        white, black = (new_network, old_network) if p else (new_network,
                                                             old_network)

        env = ChessEnv()
        env.reset()
        root_node = Node(env, Config.EXPLORE_FACTOR)
        game_over = False

        while not game_over:
            if root_node.env.white_to_move:
                player = white
            else:
                player = black

            start = time.time()
            pi, successor, root_node = MCTS(temp=temperature,
                                            network=player,
                                            root=root_node)
            print("MCTS completed move {} in: {}".format(
                moves,
                time.time() - start))

            root_node = successor
            moves = moves + 1

            game_over, z = root_node.env.is_game_over(moves, res_check=True)

        print("Game {} complete. New: {} Old: {}")

        # from white perspective

        if white == new_network:
            if z >= 1:
                score1 = score1 + 1
            else:
                score2 = score2 + 1
        else:
            if z <= -1:
                score1 = score1 + 1
            else:
                score2 = score2 + 1

        print("Game {} complete. New: {} Old: {}".format(game, score1, score2))

    print("New network score total wins: {} Average Score: {}".format(
        score1, score1 / args.numgames))
    print("Old network score total wins: {} Average Score: {}".format(
        score2, score2 / args.numgames))