def get_agent(num_filters: int) -> AlphaZeroArgMaxAgent:
    path_to_weights = os.path.join('pretrained', f'ttt_4blocks_{num_filters}filters.pth')
    config['num_resnet_filters'] = num_filters
    config['value_head_hidden_dim'] = num_filters
    net = dual_resnet(game, config)
    net.load_state_dict(torch.load(path_to_weights))
    return AlphaZeroArgMaxAgent(game, state_encoder, net, config)
def get_agent(model_name: str) -> AlphaZeroArgMaxAgent:
    path_to_weights = os.path.join('pretrained', f'ttt_{model_name}.pth')
    if model_name.startswith('dualres'):
        config['num_res_blocks'] = 4
        net = dual_resnet(game, config)
    else:
        config['num_res_blocks'] = 8
        net = resnet(game, config)
    net.load_state_dict(torch.load(path_to_weights))
    return AlphaZeroArgMaxAgent(game, state_encoder, net, config)
Exemplo n.º 3
0
def get_agent(num_blocks: int) -> AlphaZeroArgMaxAgent:
    path_to_weights = os.path.join('pretrained', f'ttt_{num_blocks}blocks_256filters.pth')
    config['num_res_blocks'] = num_blocks
    net = dual_resnet(game, config)
    net.load_state_dict(torch.load(path_to_weights))
    return AlphaZeroArgMaxAgent(game, state_encoder, net, config)
Exemplo n.º 4
0
    config = yaml.safe_load(f)

config['device'] = 'cuda' if torch.cuda.is_available() else 'cpu'


def read_move(player: TicTacToePlayer) -> TicTacToeMove:
    x, y = input(f"{player.name} move: ").split()
    x, y = int(x), int(y)
    return TicTacToeMove(x, y)


if __name__ == '__main__':
    game = TicTacToeGame(config['game_size'])
    state_encoder = TicTacToeStateEncoder(config['device'])

    net = dual_resnet(game, config)
    mcts = MonteCarloTreeSearch(game=game,
                                state_encoder=state_encoder,
                                nn=net,
                                config=config)

    net.load_state_dict(
        torch.load(os.path.join('pretrained', 'ttt_dualres_comp.pth')))
    # net.load_state_dict(torch.load(os.path.join(config['log_dir'], 'best.pth')))
    net.eval()
    agent = AlphaZeroArgMaxAgent(game, state_encoder, net, config)

    agent_role = random.choice([TicTacToePlayer.X, TicTacToePlayer.O])

    while not game.is_over:
        game.show_board()