Exemplo n.º 1
0
def eval_latest(env,seed,nS,nA,nB,training_params,network_params,duplicate_decks):
    device = network_params['device']
    weight_paths = load_paths(training_params['actor_path'])
    model_names = list(weight_paths.keys())
    model_names.sort(key=lambda l: int(grep("\d+", l)))
    latest_actor = model_names[-1]
    latest_net = OmahaActor(seed,nS,nA,nB,network_params).to(device)
    load_weights(latest_net,weight_paths[latest_actor])
    # Build matchups
    last_n_models = min(len(model_names),3)
    matchups = [(latest_actor,model) for model in model_names[-last_n_models:-1]]
    # create array to store results
    result_array = np.zeros(len(matchups))
    data_row_dict = {model:i for i,model in enumerate(model_names[-last_n_models:-1])}
    for match in matchups:
        net2 = OmahaActor(seed,nS,nA,nB,network_params).to(device)
        net2_path = weight_paths[match[1]]
        load_weights(net2,net2_path)
        results,stats = tournament(env,latest_net,net2,match,training_params,duplicate_decks)
        result_array[data_row_dict[match[1]]] = (results[match[0]]['SB'] + results[match[0]]['BB']) - (results[match[1]]['SB'] + results[match[1]]['BB'])
        print_stats(stats)
    # Create Results Table
    table = PrettyTable(["Model Name", *model_names[-last_n_models:-1]])
    table.add_row([latest_actor,*result_array])
    print(table)
Exemplo n.º 2
0
def load_villain(seed,nS,nA,nB,network_params,device,baseline_path):
    baseline_path = return_latest_baseline_path(baseline_path)
    if baseline_path:
        villain = OmahaActor(seed,nS,nA,nB,network_params).to(device)
        load_weights(villain,baseline_path)
    else:
        villain = BetAgent()
    return villain
Exemplo n.º 3
0
def load_villain(rank, network_params, baseline_path):
    baseline_path = return_latest_baseline_path(baseline_path)
    if baseline_path:
        seed = network_params['seed']
        nS = network_params['nS']
        nA = network_params['nA']
        nB = network_params['nB']
        villain = OmahaActor(seed, nS, nA, nB, network_params).to(rank)
        load_weights(villain, baseline_path)
    else:
        villain = BetAgent()
    return villain
Exemplo n.º 4
0
                 e + 1) * training_params['training_epochs']
     # save weights
     torch.save(alphaPoker.state_dict(),
                os.path.join(path, 'OmahaCombinedFinal'))
     print(
         f'Saved model weights to {os.path.join(path,"OmahaCombinedFinal")}'
     )
 elif args.network_type == 'dual':
     actor = OmahaActor(seed, nS, nA, nB, network_params).to(device)
     critic = OmahaObsQCritic(seed, nS, nA, nB, network_params).to(device)
     if args.resume:
         latest_actor_path = return_latest_training_model_path(
             training_params['actor_path'])
         latest_critic_path = return_latest_training_model_path(
             training_params['critic_path'])
         load_weights(actor, latest_actor_path, 0, ddp=False)
         load_weights(critic, latest_critic_path, 0, ddp=False)
     elif args.frozen:
         # Load pretrained hand recognizer
         copy_weights(actor, network_params['actor_hand_recognizer_path'])
         copy_weights(critic, network_params['critic_hand_recognizer_path'])
     actor.summary
     critic.summary
     target_actor = OmahaActor(seed, nS, nA, nB, network_params).to(device)
     target_critic = OmahaObsQCritic(seed, nS, nA, nB,
                                     network_params).to(device)
     hard_update(actor, target_actor)
     hard_update(critic, target_critic)
     actor_optimizer = optim.Adam(actor.parameters(),
                                  lr=config.agent_params['actor_lr'],
                                  weight_decay=config.agent_params['L2'])
Exemplo n.º 5
0
     # load all file paths
     weight_paths = load_paths(training_params['actor_path'])
     print('weight_paths',weight_paths)
     # all combinations
     model_names = list(weight_paths.keys())
     model_names.sort(key=lambda l: int(grep("\d+", l)))
     matchups = list(combinations(model_names,2))
     # create array to store results
     result_array = np.zeros((len(model_names),len(model_names)))
     data_row_dict = {model:i for i,model in enumerate(model_names)}
     for match in matchups:
         net1 = OmahaActor(seed,nS,nA,nB,network_params).to(device)
         net2 = OmahaActor(seed,nS,nA,nB,network_params).to(device)
         net1_path = weight_paths[match[0]]
         net2_path = weight_paths[match[1]]
         load_weights(net1,net1_path)
         load_weights(net2,net2_path)
         results,stats = tournament(env,net1,net2,match,training_params,duplicate_decks)
         result_array[data_row_dict[match[0]],data_row_dict[match[1]]] = results[match[0]]['SB'] + results[match[0]]['BB']
         result_array[data_row_dict[match[1]],data_row_dict[match[0]]] = results[match[1]]['SB'] + results[match[1]]['BB']
     # Create Results Table
     table = PrettyTable(["Model Name", *model_names])
     for i,model in enumerate(model_names):
         row = list(result_array[i])
         row[i] = 'x'
         table.add_row([model,*row])
     print(table)
 else:
     print(f'Evaluating {model_name}, from {os.path.join(training_params["actor_path"],model_name)}')
     load_weights(trained_model,os.path.join(training_params['actor_path'],model_name))
     if args.baseline == 'hardcoded':
Exemplo n.º 6
0
     print(table)
 else:
     print(
         f'Evaluating {model_name}, from {os.path.join(training_params["actor_path"],model_name)}'
     )
     trained_model.load_state_dict(
         torch.load(os.path.join(training_params['actor_path'],
                                 model_name)))
     if args.baseline == 'hardcoded':
         baseline_evaluation = BetAgent()
     else:
         baseline_evaluation = OmahaActor(seed, nS, nA, nB,
                                          network_params).to(device)
         baseline_path = return_latest_baseline_path(config.baseline_path)
         print('baseline_path', baseline_path)
         load_weights(baseline_evaluation, baseline_path)
         # Get latest baseline
     model_names = ['baseline_evaluation', 'trained_model']
     results, stats = tournament(env, baseline_evaluation, trained_model,
                                 model_names, training_params)
     print(results)
     for model, data in stats.items():
         print(model)
         table = PrettyTable([
             'Street', 'Hand Category', 'Check', 'Fold', 'Call', 'Bet',
             'Raise', 'Hand Counts'
         ])
         for street in tuple(data.keys()):
             values = data[street]
             if street == pdt.StreetStrs.RIVER:
                 counts = values['counts']