def get_buffer(config, game) -> (ChessEnv, list):
    """
    Gets data to load into the buffer by playing a game using PGN data.
    :param Config config: config to use to play the game
    :param pgn.Game game: game to play
    :return list(str,list(float)): data from this game for the SupervisedLearningWorker.buffer
    """
    env = ChessEnv().reset()
    white = ChessPlayer(config, dummy=True)
    black = ChessPlayer(config, dummy=True)
    result = game.headers["Result"]
    white_elo, black_elo = int(game.headers["WhiteElo"]), int(game.headers["BlackElo"])
    white_weight = clip_elo_policy(config, white_elo)
    black_weight = clip_elo_policy(config, black_elo)
    
    actions = []
    while not game.is_end():
        game = game.variation(0)
        actions.append(game.move.uci())
    k = 0
    while not env.done and k < len(actions):
        if env.white_to_move:
            action = white.sl_action(env.observation, actions[k], weight=white_weight) #ignore=True
        else:
            action = black.sl_action(env.observation, actions[k], weight=black_weight) #ignore=True
        env.step(action, False)
        k += 1

    if not env.board.is_game_over() and result != '1/2-1/2':
        env.resigned = True
    if result == '1-0':
        env.winner = Winner.white
        black_win = -1
    elif result == '0-1':
        env.winner = Winner.black
        black_win = 1
    else:
        env.winner = Winner.draw
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    data = []
    for i in range(len(white.moves)):
        data.append(white.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    return env, data
Пример #2
0
def get_buffer(config, game) -> (ChessEnv, list):
    """
	Gets data to load into the buffer by playing a game using PGN data.
	:param Config config: config to use to play the game
	:param pgn.Game game: game to play
	:return list(str,list(float)): data from this game for the SupervisedLearningWorker.buffer
	"""
    env = ChessEnv().reset()
    white = ChessPlayer(config, dummy=True)
    black = ChessPlayer(config, dummy=True)
    result = game.headers["Result"]

    # Rare cases where elo ratings are not in the headers
    if "WhiteElo" not in game.headers or "BlackElo" not in game.headers:
        return None, None, None, None, False

    white_elo, black_elo = int(game.headers["WhiteElo"]), int(
        game.headers["BlackElo"])
    white_weight = clip_elo_policy(config, white_elo)
    black_weight = clip_elo_policy(config, black_elo)

    actions = []
    while not game.is_end():
        game = game.variation(0)
        actions.append(game.move.uci())
    k = 0
    while not env.done and k < len(actions):
        if env.white_to_move:
            action = white.sl_action(env.observation,
                                     actions[k],
                                     weight=white_weight)  # ignore=True
        else:
            action = black.sl_action(env.observation,
                                     actions[k],
                                     weight=black_weight)  # ignore=True
        env.step(action, False)
        k += 1

    if not env.board.is_game_over() and result != '1/2-1/2':
        env.resigned = True
    if result == '1-0':
        env.winner = Winner.white
        black_win = -1
    elif result == '0-1':
        env.winner = Winner.black
        black_win = 1
    else:
        env.winner = Winner.draw
        black_win = 0

    black.finish_game(black_win)
    white.finish_game(-black_win)

    fen_data = []
    moves_array = np.zeros(
        (len(white.moves) + len(black.moves), white.labels_n),
        dtype=np.float16)
    scores = np.zeros((len(white.moves) + len(black.moves)), dtype=np.int8)
    for i in range(len(white.moves)):
        fen_data.append(white.moves[i][0])
        moves_array[i * 2] = white.moves[i][1]
        scores[i * 2] = white.moves[i][2]
        if i < len(black.moves):
            fen_data.append(black.moves[i][0])
            moves_array[i * 2 + 1] = black.moves[i][1]
            scores[i * 2 + 1] = black.moves[i][2]

    return env, fen_data, moves_array, scores, True