示例#1
0
    def __init__(self, t_prof, chief_handle):
        assert t_prof.n_seats == 2

        EvaluatorMasterBase.__init__(
            self,
            t_prof=t_prof,
            eval_env_bldr=_util.get_env_builder_lbr(t_prof=t_prof),
            chief_handle=chief_handle,
            eval_type="LBR",
            log_conf_interval=True)

        self.lbr_args = t_prof.module_args["lbr"]

        self.weights_for_eval_agent = None
        self.alive_worker_handles = None
示例#2
0
    def __init__(self, t_prof, chief_handle, eval_agent_cls):
        assert t_prof.n_seats == 2

        self.t_prof = t_prof
        self.lbr_args = t_prof.module_args["lbr"]
        self._eval_env_bldr = _util.get_env_builder_lbr(t_prof=t_prof)
        self.check_to_round = self.lbr_args.lbr_check_to_round

        self.chief_handle = chief_handle

        self.agent = _AgentWrapper(t_prof=t_prof, lbr_args=self.lbr_args, eval_agent_cls=eval_agent_cls)

        # has different raise sizes than agent's env! This needs to be considered when updating the envs after opp acts
        self._env = None
        self.agent_range = PokerRange(env_bldr=self._eval_env_bldr)

        assert self.check_to_round is None or (self.check_to_round in self._eval_env_bldr.rules.ALL_ROUNDS_LIST)
    def __init__(self):
        t_prof = TrainingProfile(
            name="NLH_EXPLOITABILITY_PLO",
            nn_type="feedforward",
            DISTRIBUTED=False,
            CLUSTER=False,
            n_learner_actor_workers=2,  # 20 workers
            max_buffer_size_adv=1e6,
            max_buffer_size_avrg=1e6,
            export_each_net=False,
            checkpoint_freq=8,
            eval_agent_export_freq=4,  # produces GBs!

            # How many actions out of all legal on current step to branch randomly = action bredth limit
            n_actions_traverser_samples=
            4,  # 3 is the default, 4 is current max for b_2
            # number of traversals gives some amount of otcomes to train network on
            # mult = 1...4, buffer appends every() step with new data
            n_traversals_per_iter=30,
            # number of mini_batch fetches and model updates on each step
            n_batches_adv_training=801,  # 1024
            n_batches_avrg_training=2048,  # 2048
            use_pre_layers_adv=True,
            n_cards_state_units_adv=192,
            n_merge_and_table_layer_units_adv=64,
            n_units_final_adv=64,

            # amount of batch to feed to NN at once, fetched from buffer randomly.
            mini_batch_size_adv=512,  # 256
            mini_batch_size_avrg=512,  # 512
            init_adv_model=
            "random",  # warm start neural weights with init from last iter
            init_avrg_model="random",
            # use_pre_layers_avrg=False,  # shallower nets
            lr_avrg=0.001,
            game_cls=DiscretizedNLHoldem,  # PLO or DiscretizedNLHoldem
            env_bldr_cls=VanillaEnvBuilder,
            agent_bet_set=bet_sets.PL_2,
            n_seats=2,
            start_chips=10000,

            # You can specify one or both modes. Choosing both is useful to compare them.
            eval_modes_of_algo=(
                EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
                # EvalAgentDeepCFR.EVAL_MODE_AVRG_NET
            ),

            # enables simplified obs. Default works also for 3+ players
            use_simplified_headsup_obs=True,
            log_verbose=True,
            lbr_args=LBRArgs(
                lbr_bet_set=bet_sets.PL_2,
                n_lbr_hands_per_seat=100,
                lbr_check_to_round=Poker.TURN,
                # recommended to set to Poker.TURN for 4-round games.
                n_parallel_lbr_workers=1,
                use_gpu_for_batch_eval=False,
                DISTRIBUTED=True,
            ),
        )

        self._eval_env_bldr = _util.get_env_builder_lbr(t_prof=t_prof)
        stk = [10000, 10000]
        self._env = self._eval_env_bldr.get_new_env(is_evaluating=True,
                                                    stack_size=stk)
        self.t_prof = t_prof