示例#1
0
文件: nfsp.py 项目: Michael-Z/rlpoker
 def objective(hypers):
     tf.reset_default_graph()
     cards = get_deck(num_values=args.num_values, num_suits=args.num_suits)
     _, exploitabilities = nfsp(LeducNFSP(cards), hypers, max_train_steps=args.max_train_steps)
     val = max(exploitabilities.values())
     print("OBJECTIVE. Exploitabilities: {}, Value: {}".format(exploitabilities, val))
     return val
示例#2
0
文件: nfsp.py 项目: Michael-Z/rlpoker
def run_nfsp(hypers_list):
    exploitabilities_list = []
    for i, hypers in enumerate(hypers_list):
        print("NFSP run {}".format(i))
        print("Using hyperparameters: {}".format(hypers))
        print("Training for {} steps".format(args.max_train_steps))
        tf.reset_default_graph()
        cards = get_deck(num_values=args.num_values, num_suits=args.num_suits)
        _, exploitabilities = nfsp(LeducNFSP(cards), hypers, max_train_steps=args.max_train_steps)
        exploitabilities_list.append(exploitabilities)

    return hypers_list, exploitabilities_list
示例#3
0
    parser.add_argument(
        '--num_values',
        default=3,
        type=int,
        help='In OneCardPoker or Leduc, pass the number of cards to use.')
    parser.add_argument('--num_suits',
                        default=2,
                        type=int,
                        help='In Leduc, pass the number of suits to use.')

    args = parser.parse_args()

    if args.game == 'Leduc':
        print("Solving Leduc Hold'em")
        cards = card.get_deck(num_values=args.num_values,
                              num_suits=args.num_suits)
        n_game = leduc.create_neural_leduc(cards)
    elif args.game == 'RockPaperScissors':
        print("Solving rock paper scissors")
        n_game = rock_paper_scissors.create_neural_rock_paper_scissors()

    strategy, exploitabilities = deep_cfr.deep_cfr(
        n_game,
        num_iters=args.num_iters,
        num_traversals=args.num_traversals,
        advantage_maxlen=args.advantage_maxlen,
        strategy_maxlen=args.strategy_maxlen,
        batch_size=args.batch_size,
        num_sgd_updates=args.num_sgd_updates)

    exploitability = compute_exploitability(n_game.extensive_game, strategy)