start_chips=10000,

            # You can specify one or both modes. Choosing both is useful to compare them.
            eval_modes_of_algo=(
                EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
                # EvalAgentDeepCFR.EVAL_MODE_AVRG_NET
            ),

            # enables simplified obs. Default works also for 3+ players
            use_simplified_headsup_obs=True,
            log_verbose=True,
            lbr_args=LBRArgs(
                lbr_bet_set=bet_sets.PL_2,
                n_lbr_hands_per_seat=1,
                lbr_check_to_round=Poker.TURN,
                # recommended to set to Poker.TURN for 4-round games.
                n_parallel_lbr_workers=1,
                use_gpu_for_batch_eval=False,
                DISTRIBUTED=False,
            ),
        ),
        eval_methods={
            "lbr": 99,  # lbr, br, h2h
        },
        n_iterations=64)

    # here we find all particular aces and their avg regrets grouped by actions where round=preflop
    hand = np.array([[12, 2], [12, 0]])
    player = 0
    g_round = Poker.TURN
    eq = hand_and_round_eq(hand=hand, round=g_round, ctrl=ctrl)
    def __init__(self):
        t_prof = TrainingProfile(
            name="NLH_EXPLOITABILITY_PLO",
            nn_type="feedforward",
            DISTRIBUTED=False,
            CLUSTER=False,
            n_learner_actor_workers=2,  # 20 workers
            max_buffer_size_adv=1e6,
            max_buffer_size_avrg=1e6,
            export_each_net=False,
            checkpoint_freq=8,
            eval_agent_export_freq=4,  # produces GBs!

            # How many actions out of all legal on current step to branch randomly = action bredth limit
            n_actions_traverser_samples=
            4,  # 3 is the default, 4 is current max for b_2
            # number of traversals gives some amount of otcomes to train network on
            # mult = 1...4, buffer appends every() step with new data
            n_traversals_per_iter=30,
            # number of mini_batch fetches and model updates on each step
            n_batches_adv_training=801,  # 1024
            n_batches_avrg_training=2048,  # 2048
            use_pre_layers_adv=True,
            n_cards_state_units_adv=192,
            n_merge_and_table_layer_units_adv=64,
            n_units_final_adv=64,

            # amount of batch to feed to NN at once, fetched from buffer randomly.
            mini_batch_size_adv=512,  # 256
            mini_batch_size_avrg=512,  # 512
            init_adv_model=
            "random",  # warm start neural weights with init from last iter
            init_avrg_model="random",
            # use_pre_layers_avrg=False,  # shallower nets
            lr_avrg=0.001,
            game_cls=DiscretizedNLHoldem,  # PLO or DiscretizedNLHoldem
            env_bldr_cls=VanillaEnvBuilder,
            agent_bet_set=bet_sets.PL_2,
            n_seats=2,
            start_chips=10000,

            # You can specify one or both modes. Choosing both is useful to compare them.
            eval_modes_of_algo=(
                EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
                # EvalAgentDeepCFR.EVAL_MODE_AVRG_NET
            ),

            # enables simplified obs. Default works also for 3+ players
            use_simplified_headsup_obs=True,
            log_verbose=True,
            lbr_args=LBRArgs(
                lbr_bet_set=bet_sets.PL_2,
                n_lbr_hands_per_seat=100,
                lbr_check_to_round=Poker.TURN,
                # recommended to set to Poker.TURN for 4-round games.
                n_parallel_lbr_workers=1,
                use_gpu_for_batch_eval=False,
                DISTRIBUTED=True,
            ),
        )

        self._eval_env_bldr = _util.get_env_builder_lbr(t_prof=t_prof)
        stk = [10000, 10000]
        self._env = self._eval_env_bldr.get_new_env(is_evaluating=True,
                                                    stack_size=stk)
        self.t_prof = t_prof
Exemplo n.º 3
0
                                         # You can specify one or both modes. Choosing both is useful to compare them.
                                         eval_modes_of_algo=(
                                             EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
                                         ),

                                         DISTRIBUTED=True,
                                         log_verbose=True,
                                         rl_br_args=RLBRArgs(rlbr_bet_set=None,
                                                             n_hands_each_seat=200,
                                                             n_workers=1,
                                                             # Training
                                                             DISTRIBUTED=False,
                                                             n_iterations=100,
                                                             play_n_games_per_iter=50,
                                                             # The DDQN
                                                             batch_size=512,
                                                             ),
                                         lbr_args=LBRArgs(n_lbr_hands_per_seat=30000,
                                                          n_parallel_lbr_workers=10,
                                                          DISTRIBUTED=True,
                                                          ),
                                         ),
                  eval_methods={'br': 1,
                                #'rlbr': 1,
                                'lbr': 1,
                  },
                  n_iterations=12)
    ctrl.run()
    pdb.set_trace()
Exemplo n.º 4
0
# from PokerRL.eval.lbr.LBRArgs import LBRArgs
# agent_prof.module_args['lbr'] = LBRArgs()
# lbr_chief = Chief(t_prof=agent_prof)
# eval_master = LBRMaster(t_prof=agent_to_eval.t_prof,
#                                chief_handle=lbr_chief)
# num_workers = 3
# LBR_workers = [LBRWorker(t_prof=agent_prof, chief_handle=lbr_chief, eval_agent_cls=EvalAgentDeepCFR) for _ in range(num_workers)]
# eval_master.set_worker_handles(*LBR_workers)
# #
#
# #eval_master.evaluate(0)

#-------- Driver Approach
from DeepCFR.workers.driver.Driver import Driver
from PokerRL.eval.lbr.LBRArgs import LBRArgs
agent_prof.module_args['lbr'] = LBRArgs()
ctrl = Driver(agent_prof, eval_methods={'lbr': 1})
ctrl.chief_handle._strategy_buffers = agent_to_eval._strategy_buffers
w, info = ctrl.chief_handle._pull_current_eval_strategy()

ctrl.eval_masters['lbr'][0]._eval_agent = agent_to_eval
ctrl.eval_masters['lbr'][0].weights_for_eval_agent = w
ctrl.eval_masters['lbr'][0].evaluate(0)

# ctrl.eval_masters['br'][0]._eval_agent = agent_to_eval
# ctrl.eval_masters['br'][0].evaluate(0)

# agent_prof.module_args['lbr'] = LBRArgs()
# ctrl = Driver(agent_prof, eval_methods={'lbr': 1})
#
# ctrl.eval_masters['lbr'][0]._eval_agent = agent_to_eval