def value_policy(board: chess.Board):
    env = ChessEnv(board)
    game_over, score = env.is_game_over()
    if game_over:
        return score, []
    stockfish = Stockfish()
    value = stockfish.stockfish_eval(env.board, timeout=100)
    next_states = []
    for move in env.board.legal_moves:
        board_copy = env.board.copy()
        board_copy.push(move)
        next_states.append(board_copy)

    actions_value = []
    for state in next_states:
        actions_value.append(evaluate_state(state))

    policy = softmax(actions_value)

    index_list = [Config.MOVETOINDEX[move.uci()] for move in env.board.legal_moves]
    map = np.zeros((5120,))
    for index, pi in zip(index_list, policy):
        map[index] = pi
    assert policy.sum() > 0.999
    return value, map
Пример #2
0
def pretrain(model):
    feature_batch = []
    targets_batch = []
    board_positions = get_board_position()
    shuffle(board_positions)
    print("Pretraining on {} board positions...".format(len(board_positions)))
    stockfish = Stockfish()

    for batch in range(Config.PRETRAIN_EPOCHS):
        for index, board_position in enumerate(board_positions):
            if (index + 1) % Config.minibatch_size != 0:
                feature_batch.append(board_to_feature(board_position))
                targets_batch.append(
                    stockfish.stockfish_eval(board_position, 10))
            else:
                feature_batch = torch.FloatTensor(feature_batch)
                targets_batch = Variable(torch.FloatTensor(targets_batch))
                do_backprop(feature_batch, targets_batch, model)
                feature_batch = []
                targets_batch = []
        print("Completed batch {} of {}".format(batch, Config.PRETRAIN_EPOCHS))