def objective(hypers): tf.reset_default_graph() cards = get_deck(num_values=args.num_values, num_suits=args.num_suits) _, exploitabilities = nfsp(LeducNFSP(cards), hypers, max_train_steps=args.max_train_steps) val = max(exploitabilities.values()) print("OBJECTIVE. Exploitabilities: {}, Value: {}".format(exploitabilities, val)) return val
def run_nfsp(hypers_list): exploitabilities_list = [] for i, hypers in enumerate(hypers_list): print("NFSP run {}".format(i)) print("Using hyperparameters: {}".format(hypers)) print("Training for {} steps".format(args.max_train_steps)) tf.reset_default_graph() cards = get_deck(num_values=args.num_values, num_suits=args.num_suits) _, exploitabilities = nfsp(LeducNFSP(cards), hypers, max_train_steps=args.max_train_steps) exploitabilities_list.append(exploitabilities) return hypers_list, exploitabilities_list
parser.add_argument( '--num_values', default=3, type=int, help='In OneCardPoker or Leduc, pass the number of cards to use.') parser.add_argument('--num_suits', default=2, type=int, help='In Leduc, pass the number of suits to use.') args = parser.parse_args() if args.game == 'Leduc': print("Solving Leduc Hold'em") cards = card.get_deck(num_values=args.num_values, num_suits=args.num_suits) n_game = leduc.create_neural_leduc(cards) elif args.game == 'RockPaperScissors': print("Solving rock paper scissors") n_game = rock_paper_scissors.create_neural_rock_paper_scissors() strategy, exploitabilities = deep_cfr.deep_cfr( n_game, num_iters=args.num_iters, num_traversals=args.num_traversals, advantage_maxlen=args.advantage_maxlen, strategy_maxlen=args.strategy_maxlen, batch_size=args.batch_size, num_sgd_updates=args.num_sgd_updates) exploitability = compute_exploitability(n_game.extensive_game, strategy)