Пример #1
0
def new_tp(i):
    TP = TrainingProfile(name=exp_name,
                         nn_type="feedforward",

                         eval_agent_export_freq=3,
                         checkpoint_freq=3,

                         max_buffer_size_adv=1e6,
                         n_traversals_per_iter=500,
                         n_batches_adv_training=250,
                         mini_batch_size_adv=2048,

                         game_cls=StandardLeduc,


                         n_units_final_adv=64,
                         n_merge_and_table_layer_units_adv=64,
                         init_adv_model="random",  # warm start neural weights with init from last iter
                         use_pre_layers_adv=False,  # shallower nets
                         use_pre_layers_avrg=False,  # shallower nets

                         # You can specify one or both modes. Choosing both is useful to compare them.
                         eval_modes_of_algo=(
                             EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
                         ),

                         DISTRIBUTED=True,
                         log_verbose=True,
                         rl_br_args=RLBRArgs(rlbr_bet_set=None,

                                             #n_hands_each_seat=5*i,
                                             n_workers=5,
                                             # Training
                                             DISTRIBUTED=True,
                                             n_iterations=1000*(i+1),
                                             play_n_games_per_iter=50,
                                             # The DDQN
                                             batch_size=512,
                                             )
                         #lbr_args = LBRArgs()
                         )
    return TP
Пример #2
0
    ctrl = Driver(t_prof=TrainingProfile(
#                                            name="SD-CFR_LEDUC_EXAMPLE",
#                                         nn_type="feedforward",
#                                         n_learner_actor_workers=N_WORKERS,
#                                         max_buffer_size_adv=3e6,
#                                         eval_agent_export_freq=20,  # export API to play against the agent
#                                         n_traversals_per_iter=1500,
#                                         n_batches_adv_training=750,
#                                         n_batches_avrg_training=2000,
#                                         n_merge_and_table_layer_units_adv=64,
#                                         n_merge_and_table_layer_units_avrg=64,
#                                         n_units_final_adv=64,
#                                         n_units_final_avrg=64,
#                                         mini_batch_size_adv=2048,
#                                         mini_batch_size_avrg=2048,
#                                         init_adv_model="last",
#                                         init_avrg_model="last",
#                                         use_pre_layers_adv=False,
#                                         use_pre_layers_avrg=False,

                                         eval_agent_export_freq=9999999,  # Don't export
                                         
                                         max_buffer_size_adv=3.636e5,  # 364k * 11 = ~4M
                                         max_buffer_size_avrg=3.636e5,  # 364k * 11 = ~4M
                                         
                                         n_traversals_per_iter=5,  # 800 * 11 = 8,800
                                         
                                         n_batches_adv_training=12,
                                         n_batches_avrg_training=100,  # trained far more than necessary
                                         n_merge_and_table_layer_units_adv=64,
                                         n_merge_and_table_layer_units_avrg=64,
                                         n_units_final_adv=64,
                                         n_units_final_avrg=64,
                                         n_cards_state_units_adv=64,
                                         n_cards_state_units_avrg=64,
                                         mini_batch_size_adv=8,  # 256 * 11 = 2,816
                                         mini_batch_size_avrg=16,  # 512 * 11 = 5,632
                                         init_adv_model="last",  # warm start neural weights with init from last iter
                                         init_avrg_model="random",
                                         use_pre_layers_adv=True,
                                         use_pre_layers_avrg=True,
                                         
                                         
                                         
                                         
                                         name="DISNLHT_DISTRIBUTED_LH_RNN",

                                         DISTRIBUTED=False,
                                         n_learner_actor_workers=N_WORKERS,

                                         nn_type="recurrent",

                                         game_cls=DiscretizedNLHoldem,
                                         agent_bet_set=bet_sets.B_5,
#                                         checkpoint_freq=1,

#                                         use_pre_layers_br=True,
#                                         use_pre_layers_avg=True,
#                                         n_units_final_br=64,
#                                         n_units_final_avg=64,
#                                         n_merge_and_table_layer_units_br=64,
#                                         n_merge_and_table_layer_units_avg=64,
#                                         rnn_units_br=64,
#                                         rnn_units_avg=64,
#                                         n_cards_state_units_br=128,
#                                         n_cards_state_units_avg=128,
#                                         
#                                         cir_buf_size_each_la=6e5 / N_WORKERS,
#                                         res_buf_size_each_la=2e6,
#                                         n_envs=128,
#                                         n_steps_per_iter_per_la=128,
#
#                                         lr_br=0.1,
#                                         lr_avg=0.01,
#
#                                         mini_batch_size_br_per_la=64,
#                                         mini_batch_size_avg_per_la=64,
#                                         n_br_updates_per_iter=1,
#                                         n_avg_updates_per_iter=1,
#
#                                         eps_start=0.08,
#                                         eps_const=0.007,
#                                         eps_exponent=0.5,
#                                         eps_min=0.0,

                                         lbr_args=LBRArgs(
                                             lbr_bet_set=bet_sets.B_5,
                                             n_lbr_hands_per_seat=8,
                                             lbr_check_to_round=Poker.TURN,
#                                             lbr_check_to_round=None,
                                             n_parallel_lbr_workers=N_LBR_WORKERS,
                                             use_gpu_for_batch_eval=False,
                                             DISTRIBUTED=True,
                                         )
                                         ),
        t_prof=TrainingProfile(
            name="NLH_1.5m_10mX2-b2048-last-patience200-Leaky-lr0.004",
            nn_type="feedforward",
            DISTRIBUTED=False,
            CLUSTER=False,
            n_learner_actor_workers=1,  # 20 workers
            max_buffer_size_adv=1500000,  # 1.5e6
            export_each_net=False,
            # path_strategy_nets="",
            checkpoint_freq=5,  # produces A SHITLOAD of Gbs!
            eval_agent_export_freq=1,  # produces GBs!

            # How many actions out of all legal on current step to branch randomly = action bredth limit
            n_actions_traverser_samples=4,
            # 3 is the default, 4 is the current max for b_2
            # number of traversals gives some amount of otcomes to train network on
            # mult = 1...4, buffer appends every() step with new data
            n_traversals_per_iter=3500,
            # number of mini_batch fetches and model updates on each step
            n_batches_adv_training=6000,  # 5000
            use_pre_layers_adv=True,
            n_cards_state_units_adv=192,
            n_merge_and_table_layer_units_adv=64,  # 64
            n_units_final_adv=64,  # 64
            dropout_adv=0.0,
            lr_patience_adv=750,  # decrease by a factor 0.5(in PSWorker)
            lr_adv=0.004,  # if no better after 150 batches

            # amount of batch to feed to NN at once, fetched from buffer randomly.
            mini_batch_size_adv=10000,  # 512
            init_adv_model="last",  # last, random
            game_cls=DiscretizedNLHoldem,  # PLO or DiscretizedNLHoldem
            env_bldr_cls=VanillaEnvBuilder,
            agent_bet_set=bet_sets.PL_2,
            n_seats=2,
            start_chips=10000,

            # You can specify one or both modes. Choosing both is useful to compare them.
            eval_modes_of_algo=(
                EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
                # EvalAgentDeepCFR.EVAL_MODE_AVRG_NET
            ),

            # enables simplified obs. Default works also for 3+ players
            use_simplified_headsup_obs=True,
            log_verbose=True,
            lbr_args=LBRArgs(
                lbr_bet_set=bet_sets.PL_2,
                n_lbr_hands_per_seat=1,
                lbr_check_to_round=Poker.TURN,
                # recommended to set to Poker.TURN for 4-round games.
                n_parallel_lbr_workers=1,
                use_gpu_for_batch_eval=False,
                DISTRIBUTED=False,
            ),
        ),
Пример #4
0
    ctrl = Driver(t_prof=TrainingProfile(name="MO_LEDUC_BigLeduc_LBR",
                                         nn_type="feedforward",

                                         eval_agent_export_freq=3,
                                         checkpoint_freq=3,
                                         n_learner_actor_workers=5,

                                         max_buffer_size_adv=1e6,
                                         n_traversals_per_iter=500,
                                         n_batches_adv_training=250,
                                         mini_batch_size_adv=2048,

                                         game_cls=BigLeduc,

                                         n_units_final_adv=64,
                                         n_merge_and_table_layer_units_adv=64,
                                         init_adv_model="random",  # warm start neural weights with init from last iter
                                         use_pre_layers_adv=False,  # shallower nets
                                         use_pre_layers_avrg=False,  # shallower nets

                                         # You can specify one or both modes. Choosing both is useful to compare them.
                                         eval_modes_of_algo=(
                                             EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
                                         ),

                                         DISTRIBUTED=True,
                                         log_verbose=True,
                                         rl_br_args=RLBRArgs(rlbr_bet_set=None,
                                                             n_hands_each_seat=200,
                                                             n_workers=1,
                                                             # Training
                                                             DISTRIBUTED=False,
                                                             n_iterations=100,
                                                             play_n_games_per_iter=50,
                                                             # The DDQN
                                                             batch_size=512,
                                                             ),
                                         lbr_args=LBRArgs(n_lbr_hands_per_seat=30000,
                                                          n_parallel_lbr_workers=10,
                                                          DISTRIBUTED=True,
                                                          ),
                                         ),
Пример #5
0
if __name__ == '__main__':
    ctrl = Driver(
        t_prof=TrainingProfile(
            name="SD-CFR_LEDUC_EXAMPLE",
            nn_type="feedforward",
            max_buffer_size_adv=3e6,
            eval_agent_export_freq=20,  # export API to play against the agent
            n_traversals_per_iter=1500,
            n_batches_adv_training=750,
            n_batches_avrg_training=2000,
            n_merge_and_table_layer_units_adv=64,
            n_merge_and_table_layer_units_avrg=64,
            n_units_final_adv=64,
            n_units_final_avrg=64,
            mini_batch_size_adv=2048,
            mini_batch_size_avrg=2048,
            init_adv_model="last",
            init_avrg_model="last",
            use_pre_layers_adv=False,
            use_pre_layers_avrg=False,
            game_cls=StandardLeduc,

            # You can specify one or both modes. Choosing both is useful to compare them.
            eval_modes_of_algo=(
                EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
                EvalAgentDeepCFR.EVAL_MODE_AVRG_NET,  # Deep CFR
            ),
            DISTRIBUTED=False,
        ),
        eval_methods={
            "br": 3,
Пример #6
0
if __name__ == '__main__':
    ctrl = Driver(
        t_prof=TrainingProfile(
            name="SD-CFR_LEDUC_BUF_500",
            nn_type="feedforward",
            max_buffer_size_adv=1e6,
            max_buffer_size_avrg=1e6,
            eval_agent_export_freq=999999,
            n_traversals_per_iter=1500,
            n_batches_adv_training=750,
            n_batches_avrg_training=5000,
            n_merge_and_table_layer_units_adv=64,
            n_merge_and_table_layer_units_avrg=64,
            n_units_final_adv=64,
            n_units_final_avrg=64,
            mini_batch_size_adv=2048,
            mini_batch_size_avrg=2048,
            init_adv_model="last",
            init_avrg_model="random",
            use_pre_layers_adv=False,
            use_pre_layers_avrg=False,
            eval_agent_max_strat_buf_size=500,
            game_cls=StandardLeduc,
            eval_modes_of_algo=(
                EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
            ),
            DISTRIBUTED=False,
            log_verbose=False,
        ),
        eval_methods={
            "br": 15,
    def __init__(self):
        t_prof = TrainingProfile(
            name="NLH_EXPLOITABILITY_PLO",
            nn_type="feedforward",
            DISTRIBUTED=False,
            CLUSTER=False,
            n_learner_actor_workers=2,  # 20 workers
            max_buffer_size_adv=1e6,
            max_buffer_size_avrg=1e6,
            export_each_net=False,
            checkpoint_freq=8,
            eval_agent_export_freq=4,  # produces GBs!

            # How many actions out of all legal on current step to branch randomly = action bredth limit
            n_actions_traverser_samples=
            4,  # 3 is the default, 4 is current max for b_2
            # number of traversals gives some amount of otcomes to train network on
            # mult = 1...4, buffer appends every() step with new data
            n_traversals_per_iter=30,
            # number of mini_batch fetches and model updates on each step
            n_batches_adv_training=801,  # 1024
            n_batches_avrg_training=2048,  # 2048
            use_pre_layers_adv=True,
            n_cards_state_units_adv=192,
            n_merge_and_table_layer_units_adv=64,
            n_units_final_adv=64,

            # amount of batch to feed to NN at once, fetched from buffer randomly.
            mini_batch_size_adv=512,  # 256
            mini_batch_size_avrg=512,  # 512
            init_adv_model=
            "random",  # warm start neural weights with init from last iter
            init_avrg_model="random",
            # use_pre_layers_avrg=False,  # shallower nets
            lr_avrg=0.001,
            game_cls=DiscretizedNLHoldem,  # PLO or DiscretizedNLHoldem
            env_bldr_cls=VanillaEnvBuilder,
            agent_bet_set=bet_sets.PL_2,
            n_seats=2,
            start_chips=10000,

            # You can specify one or both modes. Choosing both is useful to compare them.
            eval_modes_of_algo=(
                EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
                # EvalAgentDeepCFR.EVAL_MODE_AVRG_NET
            ),

            # enables simplified obs. Default works also for 3+ players
            use_simplified_headsup_obs=True,
            log_verbose=True,
            lbr_args=LBRArgs(
                lbr_bet_set=bet_sets.PL_2,
                n_lbr_hands_per_seat=100,
                lbr_check_to_round=Poker.TURN,
                # recommended to set to Poker.TURN for 4-round games.
                n_parallel_lbr_workers=1,
                use_gpu_for_batch_eval=False,
                DISTRIBUTED=True,
            ),
        )

        self._eval_env_bldr = _util.get_env_builder_lbr(t_prof=t_prof)
        stk = [10000, 10000]
        self._env = self._eval_env_bldr.get_new_env(is_evaluating=True,
                                                    stack_size=stk)
        self.t_prof = t_prof
Пример #8
0
        t_prof=TrainingProfile(
            name="BIGLEDUC_EXPLOITABILITY",
            DISTRIBUTED=True,
            n_learner_actor_workers=11,
            eval_agent_export_freq=9999999,  # Don't export
            nn_type="feedforward",
            max_buffer_size_adv=3.636e5,  # 364k * 11 = ~4M
            max_buffer_size_avrg=3.636e5,  # 364k * 11 = ~4M

            # longer action sequences than FHP -> more samples/iter because external sampling.
            n_traversals_per_iter=800,  # 800 * 11 = 8,800
            n_batches_adv_training=1200,
            n_batches_avrg_training=10000,  # trained far more than necessary
            n_merge_and_table_layer_units_adv=64,
            n_merge_and_table_layer_units_avrg=64,
            n_units_final_adv=64,
            n_units_final_avrg=64,
            n_cards_state_units_adv=64,
            n_cards_state_units_avrg=64,
            mini_batch_size_adv=256,  # 256 * 11 = 2,816
            mini_batch_size_avrg=512,  # 512 * 11 = 5,632
            init_adv_model=
            "last",  # warm start neural weights with init from last iter
            init_avrg_model="random",
            use_pre_layers_adv=True,
            use_pre_layers_avrg=True,
            game_cls=BigLeduc,

            # You can specify one or both modes. Choosing both is useful to compare them.
            eval_modes_of_algo=(
                EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
                EvalAgentDeepCFR.EVAL_MODE_AVRG_NET,  # Deep CFR
            ),
            h2h_args=H2HArgs(n_hands=500000, ),
            log_verbose=False,
        ),
Пример #9
0
        t_prof=TrainingProfile(
            name="EXPERIMENT_SD-CFR_vs_Deep-CFR_FHP",
            nn_type="feedforward",  # We also support RNNs, but the paper uses FF
            DISTRIBUTED=True,
            CLUSTER=False,
            n_learner_actor_workers=20,  # 20 workers

            # regulate exports
            export_each_net=False,
            checkpoint_freq=99999999,
            eval_agent_export_freq=
            1,  # produces around 15GB over 150 iterations!
            n_actions_traverser_samples=3,  # = external sampling in FHP
            n_traversals_per_iter=15000,
            n_batches_adv_training=4000,
            mini_batch_size_adv=512,  # *20=10240
            init_adv_model="random",
            use_pre_layers_adv=True,
            n_cards_state_units_adv=192,
            n_merge_and_table_layer_units_adv=64,
            n_units_final_adv=64,
            max_buffer_size_adv=2e6,  # *20 LAs = 40M
            lr_adv=0.001,
            lr_patience_adv=99999999,  # No lr decay
            n_batches_avrg_training=20000,
            mini_batch_size_avrg=1024,  # *20=20480
            init_avrg_model="random",
            use_pre_layers_avrg=True,
            n_cards_state_units_avrg=192,
            n_merge_and_table_layer_units_avrg=64,
            n_units_final_avrg=64,
            max_buffer_size_avrg=2e6,
            lr_avrg=0.001,
            lr_patience_avrg=99999999,  # No lr decay

            # With the H2H evaluator, these two are evaluated against eachother.
            eval_modes_of_algo=(EvalAgentDeepCFR.EVAL_MODE_AVRG_NET,
                                EvalAgentDeepCFR.EVAL_MODE_SINGLE),
            log_verbose=True,
            game_cls=Flop5Holdem,

            # enables simplified obs. Default works also for 3+ players
            use_simplified_headsup_obs=True,
            h2h_args=H2HArgs(
                n_hands=
                1500000,  # this is per seat; so in total 3M hands per eval
            ),
        ),
Пример #10
0
        t_prof=TrainingProfile(
            name="MO_HULH_1",
            nn_type="feedforward",  # We also support RNNs, but the paper uses FF
            DISTRIBUTED=True,
            CLUSTER=False,
            n_learner_actor_workers=40,  # 20 workers

            # regulate exports
            export_each_net=False,
            checkpoint_freq=99999999,
            eval_agent_export_freq=5,
            n_actions_traverser_samples=3,  # = external sampling in FHP
            n_traversals_per_iter=500,
            n_batches_adv_training=2000,
            mini_batch_size_adv=512,  # *20=10240
            init_adv_model="random",
            use_pre_layers_adv=True,
            n_cards_state_units_adv=192,
            n_merge_and_table_layer_units_adv=64,
            n_units_final_adv=64,
            max_buffer_size_adv=2e6,  # *20 LAs = 40M
            lr_adv=0.001,
            lr_patience_adv=99999999,  # No lr decay

            # With the H2H evaluator, these two are evaluated against eachother.
            eval_modes_of_algo=(EvalAgentDeepCFR.EVAL_MODE_SINGLE, ),
            log_verbose=True,
            game_cls=LimitHoldem,

            # enables simplified obs. Default works also for 3+ players
            use_simplified_headsup_obs=True,
        ),
Пример #11
0
        t_prof=TrainingProfile(
            name="Hanul_EXAMPLE",
            nn_type="recurrent",
            max_buffer_size_adv=3e6,
            eval_agent_export_freq=20,  # export API to play against the agent
            n_traversals_per_iter=200,
            n_batches_adv_training=8,
            n_batches_avrg_training=2000,
            n_merge_and_table_layer_units_adv=64,
            n_merge_and_table_layer_units_avrg=64,
            n_units_final_adv=64,
            n_units_final_avrg=64,
            mini_batch_size_adv=16,
            mini_batch_size_avrg=16,
            init_adv_model="last",
            init_avrg_model="last",
            use_pre_layers_adv=False,
            use_pre_layers_avrg=False,
            game_cls=DiscretizedNLHoldem,
            lbr_args=LBRArgs(
                lbr_bet_set=bet_sets.B_5,
                n_lbr_hands_per_seat=80,
                lbr_check_to_round=Poker.TURN,
                n_parallel_lbr_workers=N_LBR_WORKERS,
                use_gpu_for_batch_eval=False,
                DISTRIBUTED=True,
            ),

            # You can specify one or both modes. Choosing both is useful to compare them.
            eval_modes_of_algo=(
                EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
                EvalAgentDeepCFR.EVAL_MODE_AVRG_NET,  # Deep CFR
            ),
            DISTRIBUTED=False,
        ),
    ctrl = Driver(
        t_prof=TrainingProfile(
            name="LEDUC_EXPLOITABILITY",
            nn_type="feedforward",
            max_buffer_size_adv=1e6,
            max_buffer_size_avrg=1e6,
            eval_agent_export_freq=999999,  # Don't export
            n_traversals_per_iter=1500,
            n_batches_adv_training=750,
            n_batches_avrg_training=5000,
            n_merge_and_table_layer_units_adv=64,
            n_merge_and_table_layer_units_avrg=64,
            n_units_final_adv=64,
            n_units_final_avrg=64,
            mini_batch_size_adv=2048,
            mini_batch_size_avrg=2048,
            init_adv_model=
            "last",  # warm start neural weights with init from last iter
            init_avrg_model="random",
            use_pre_layers_adv=False,  # shallower nets
            use_pre_layers_avrg=False,  # shallower nets
            game_cls=StandardLeduc,

            # You can specify one or both modes. Choosing both is useful to compare them.
            eval_modes_of_algo=(
                EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
                EvalAgentDeepCFR.EVAL_MODE_AVRG_NET,  # Deep CFR
            ),
            DISTRIBUTED=False,
            log_verbose=False,
        ),
        eval_methods={
Пример #13
0
        t_prof=TrainingProfile(
            name=
            "PLO_3m_7.5mX14-b5000-last-patience350-Leaky-lr0.004-dense_residual",
            nn_type="dense_residual",
            DISTRIBUTED=True,
            CLUSTER=False,
            n_learner_actor_workers=14,  # 14 workers
            max_buffer_size_adv=3000000,  # 3e6
            export_each_net=False,
            # path_strategy_nets="",
            checkpoint_freq=9999,  # produces A SHITLOAD of Gbs!
            eval_agent_export_freq=1,

            # How many actions out of all legal on current step to branch randomly
            # = action breadth limit
            # 3 is the default, 4 is the current max for b_2
            n_actions_traverser_samples=4,
            # number of traversals equal to the number of entries that will be added
            # to adv buffer
            n_traversals_per_iter=150000,
            # number of mini_batch fetches and model updates on each iteration
            n_batches_adv_training=1500,
            max_n_las_sync_simultaneously=20,
            use_pre_layers_adv=True,
            n_cards_state_units_adv=192,
            n_merge_and_table_layer_units_adv=64,  # 64
            n_units_final_adv=64,  # 64
            lr_patience_adv=350,  # decrease by a factor 0.5(in PSWorker)
            lr_adv=0.004,

            # size of batch to feed to NN at once, fetched from buffer randomly.
            mini_batch_size_adv=5000,
            init_adv_model="last",  # last, random
            game_cls=PLO,  # PLO or DiscretizedNLHoldem
            env_bldr_cls=VanillaEnvBuilder,
            agent_bet_set=bet_sets.PL_2,
            n_seats=2,
            start_chips=10000,

            # You can specify one or both modes. Choosing both is useful to compare them.
            eval_modes_of_algo=(
                EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
            ),

            # enables simplified obs. Default works also for 3+ players
            use_simplified_headsup_obs=True,
            log_verbose=True,
            lbr_args=LBRArgs(
                lbr_bet_set=bet_sets.PL_2,
                n_lbr_hands_per_seat=1,
                lbr_check_to_round=Poker.TURN,
                # recommended to set to Poker.TURN for 4-round games.
                n_parallel_lbr_workers=1,
                use_gpu_for_batch_eval=False,
                DISTRIBUTED=False,
            ),
        ),
Пример #14
0
import pdb
if __name__ == '__main__':
    ctrl = Driver(
        t_prof=TrainingProfile(
            name="MO_LEDUC_EXPLOITABILITY",
            nn_type="feedforward",
            n_learner_actor_workers=5,
            eval_agent_export_freq=3,
            checkpoint_freq=1,
            max_buffer_size_adv=1e6,
            n_traversals_per_iter=500,
            n_batches_adv_training=250,
            mini_batch_size_adv=2048,
            game_cls=StandardLeduc,
            n_units_final_adv=64,
            n_merge_and_table_layer_units_adv=64,
            init_adv_model=
            "random",  # warm start neural weights with init from last iter
            use_pre_layers_adv=False,  # shallower nets
            use_pre_layers_avrg=False,  # shallower nets

            # You can specify one or both modes. Choosing both is useful to compare them.
            eval_modes_of_algo=(
                EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
            ),
            DISTRIBUTED=False,
            log_verbose=True,
        ),
        eval_methods={},
        n_iterations=4,
        name_to_import='MO_LEDUC_EXPLOITABILITY',