ignore_reinit_error=True, logging_level=logging.INFO, log_to_driver=os.getenv("RAY_LOG_TO_DRIVER", False)) def select_policy(agent_id): if agent_id == br_player: return "best_response" else: return f"average_policy" avg_policy_model_config = avg_pol_scenario.get_avg_trainer_config( tmp_env)["model"] player_0_avg_pol_spec = StrategySpec.from_json_file( "/home/jblanier/git/grl/grl/data/1000_oshi_zumo_nfsp_larger_dqn_larger_sparse_10.56.11PM_Mar-24-20217lav0isx/avg_policy_checkpoint_specs/average_policy_player_0_iter_53000.json" ) class HyperParamSearchCallbacks(DefaultCallbacks): def on_episode_start(self, *, worker: "RolloutWorker", base_env: BaseEnv, policies: Dict[PolicyID, Policy], episode: MultiAgentEpisode, env_index: int, **kwargs): super().on_episode_start(worker=worker, base_env=base_env, policies=policies, episode=episode, env_index=env_index, **kwargs) if not hasattr(worker,
dashboard_port=find_free_port(), ignore_reinit_error=True, logging_level=logging.INFO, log_to_driver=os.getenv("RAY_LOG_TO_DRIVER", False)) def select_policy(agent_id): if agent_id == br_player: return "best_response" else: return f"average_policy" avg_policy_model_config = avg_pol_scenario.get_avg_trainer_config(tmp_env)["model"] player_0_avg_pol_spec = StrategySpec.from_json_file( "/home/jblanier/git/grl/grl/data/oshi_zumo_tiny_nfsp_dqn_sparse_01.54.50PM_Apr-08-20218z_hf4wq/avg_policy_checkpoint_specs/average_policy_player_0_iter_214000.json") class HyperParamSearchCallbacks(DefaultCallbacks): def on_episode_start(self, *, worker: "RolloutWorker", base_env: BaseEnv, policies: Dict[PolicyID, Policy], episode: MultiAgentEpisode, env_index: int, **kwargs): super().on_episode_start(worker=worker, base_env=base_env, policies=policies, episode=episode, env_index=env_index, **kwargs) if not hasattr(worker, "avg_pol_loaded") or not worker.avg_pol_loaded: avg_policy = worker.policy_map["average_policy"] load_pure_strat(policy=avg_policy, pure_strat_spec=player_0_avg_pol_spec) worker.avg_pol_loaded = True def on_train_result(self, *, trainer, result: dict, **kwargs): super().on_train_result(trainer=trainer, result=result, **kwargs)
ignore_reinit_error=True, logging_level=logging.INFO, log_to_driver=os.getenv("RAY_LOG_TO_DRIVER", False)) def select_policy(agent_id): if agent_id == br_player: return "best_response" else: return f"average_policy" avg_policy_model_config = avg_pol_scenario.get_avg_trainer_config( tmp_env)["model"] player_0_avg_pol_spec = StrategySpec.from_json_file( "/home/jblanier/git/grl/grl/data/oshi_zumo_medium_nfsp_dqn_sparse_01.55.05PM_Apr-08-2021ta6arraq/avg_policy_checkpoint_specs/average_policy_player_0_iter_221000.json" ) class HyperParamSearchCallbacks(DefaultCallbacks): def on_episode_start(self, *, worker: "RolloutWorker", base_env: BaseEnv, policies: Dict[PolicyID, Policy], episode: MultiAgentEpisode, env_index: int, **kwargs): super().on_episode_start(worker=worker, base_env=base_env, policies=policies, episode=episode, env_index=env_index, **kwargs) if not hasattr(worker,
ignore_reinit_error=True, logging_level=logging.INFO, log_to_driver=os.getenv("RAY_LOG_TO_DRIVER", False)) def select_policy(agent_id): if agent_id == br_player: return "best_response" else: return f"average_policy" avg_policy_model_config = avg_pol_scenario.get_avg_trainer_config( tmp_env)["model"] player_0_avg_pol_spec = StrategySpec.from_json_file( "/home/jblanier/git/grl/grl/data/leduc_nfsp_dqn_sparse_02.34.06PM_Apr-08-2021bt5ym0l8/avg_policy_checkpoint_specs/average_policy_player_0_iter_263000.json" ) class HyperParamSearchCallbacks(DefaultCallbacks): def on_episode_start(self, *, worker: "RolloutWorker", base_env: BaseEnv, policies: Dict[PolicyID, Policy], episode: MultiAgentEpisode, env_index: int, **kwargs): super().on_episode_start(worker=worker, base_env=base_env, policies=policies, episode=episode, env_index=env_index, **kwargs) if not hasattr(worker,
ignore_reinit_error=True, logging_level=logging.INFO, log_to_driver=os.getenv("RAY_LOG_TO_DRIVER", False)) def select_policy(agent_id): if agent_id == br_player: return "best_response" else: return f"average_policy" avg_policy_model_config = avg_pol_scenario.get_avg_trainer_config(tmp_env)["model"] player_0_avg_pol_spec = StrategySpec.from_json_file( "/home/jblanier/git/grl/grl/data/loss_game_nfsp_10_moves_alpha_2.9_sparse_12.07.15AM_May-18-202120bfveou/avg_policy_checkpoint_specs/average_policy_player_0_iter_30000.json" ) class HyperParamSearchCallbacks(DefaultCallbacks): def on_episode_start(self, *, worker: "RolloutWorker", base_env: BaseEnv, policies: Dict[PolicyID, Policy], episode: MultiAgentEpisode, env_index: int, **kwargs): super().on_episode_start(worker=worker, base_env=base_env, policies=policies, episode=episode, env_index=env_index, **kwargs) if not hasattr(worker, "avg_pol_loaded") or not worker.avg_pol_loaded: avg_policy = worker.policy_map["average_policy"] load_pure_strat(policy=avg_policy, pure_strat_spec=player_0_avg_pol_spec) worker.avg_pol_loaded = True def on_train_result(self, *, trainer, result: dict, **kwargs):