def _init_evaluator(): rllib_config, stop_config = get_rllib_config(seeds=get_random_seeds(1)) evaluator = self_and_cross_perf.SelfAndCrossPlayEvaluator( exp_name="testing_amTFT", ) evaluator.define_the_experiment_to_run(evaluation_config=rllib_config, stop_config=stop_config, TrainerClass=PGTrainer) return evaluator
def _evaluate_self_and_cross_perf( rllib_hp, rllib_config_eval, policies_to_load, trainable_class, stop, env_config, tune_analysis_per_exp, ): evaluator = self_and_cross_perf.SelfAndCrossPlayEvaluator( exp_name=rllib_hp["exp_name"], local_mode=rllib_hp["debug"], use_wandb=not rllib_hp["debug"], ) analysis_metrics_per_mode = evaluator.perform_evaluation_or_load_data( evaluation_config=rllib_config_eval, stop_config=stop, policies_to_load_from_checkpoint=policies_to_load, tune_analysis_per_exp=tune_analysis_per_exp, TuneTrainerClass=trainable_class, n_cross_play_per_checkpoint=min(5, rllib_hp["train_n_replicates"] - 1), to_load_path=rllib_hp["load_plot_data"], ) if issubclass( rllib_hp["env_class"], matrix_sequential_social_dilemma.MatrixSequentialSocialDilemma, ): background_area_coord = rllib_hp["env_class"].PAYOUT_MATRIX else: background_area_coord = None plot_config = PlotConfig( xlim=rllib_hp["x_limits"], ylim=rllib_hp["y_limits"], markersize=5, jitter=rllib_hp["jitter"], xlabel="player 1 payoffs", ylabel="player 2 payoffs", plot_max_n_points=rllib_hp["train_n_replicates"], x_scale_multiplier=rllib_hp["scale_multipliers"][0], y_scale_multiplier=rllib_hp["scale_multipliers"][1], background_area_coord=background_area_coord, ) evaluator.plot_results( analysis_metrics_per_mode, plot_config=plot_config, x_axis_metric=f"policy_reward_mean/{env_config['players_ids'][0]}", y_axis_metric=f"policy_reward_mean/{env_config['players_ids'][1]}", )
def _init_evaluator(): exp_name, _ = log.log_in_current_day_dir("testing") rllib_config, stop_config = get_rllib_config(seeds=get_random_seeds(1)) evaluator = self_and_cross_perf.SelfAndCrossPlayEvaluator( exp_name=exp_name, ) evaluator.define_the_experiment_to_run( evaluation_config=rllib_config, stop_config=stop_config, TrainerClass=PGTrainer, ) return evaluator
def evaluate_self_play_cross_play(tune_analysis_per_welfare, config_eval, env_config, stop, hp_eval): exp_name = os.path.join(hp_eval["exp_name"], "eval") evaluator = self_and_cross_perf.SelfAndCrossPlayEvaluator( exp_name=exp_name, local_mode=hp_eval["debug"], ) analysis_metrics_per_mode = evaluator.perform_evaluation_or_load_data( evaluation_config=config_eval, stop_config=stop, policies_to_load_from_checkpoint=copy.deepcopy( env_config["players_ids"]), tune_analysis_per_exp=tune_analysis_per_welfare, TrainerClass=dqn.DQNTrainer, n_self_play_per_checkpoint=hp_eval["n_self_play_per_checkpoint"], n_cross_play_per_checkpoint=hp_eval["n_cross_play_per_checkpoint"], to_load_path=hp_eval["load_plot_data"], ) if "CoinGame" in hp_eval["env_name"]: background_area_coord = None else: background_area_coord = hp_eval["env_class"].PAYOUT_MATRIX plot_config = plot.PlotConfig( xlim=hp_eval["x_limits"], ylim=hp_eval["y_limits"], markersize=5, alpha=1.0, jitter=hp_eval["jitter"], xlabel="player 1 payoffs", ylabel="player 2 payoffs", plot_max_n_points=hp_eval["train_n_replicates"], x_scale_multiplier=hp_eval["plot_axis_scale_multipliers"][0], y_scale_multiplier=hp_eval["plot_axis_scale_multipliers"][1], background_area_coord=background_area_coord, ) evaluator.plot_results( analysis_metrics_per_mode, plot_config=plot_config, x_axis_metric=f"policy_reward_mean/{env_config['players_ids'][0]}", y_axis_metric=f"policy_reward_mean/{env_config['players_ids'][1]}", ) print_inequity_aversion_welfare(env_config, analysis_metrics_per_mode) return analysis_metrics_per_mode
def evaluate_self_and_cross_perf(tune_analysis_per_welfare, hp): config_eval, env_config, stop, hp_eval = generate_eval_config(hp) evaluator = self_and_cross_perf.SelfAndCrossPlayEvaluator( exp_name=hp_eval["exp_name"]) analysis_metrics_per_mode = evaluator.perform_evaluation_or_load_data( evaluation_config=config_eval, stop_config=stop, policies_to_load_from_checkpoint=copy.deepcopy( env_config["players_ids"]), tune_analysis_per_exp=tune_analysis_per_welfare, TrainerClass=dqn.DQNTrainer, n_cross_play_per_checkpoint=min( 5, (hp_eval["train_n_replicates"] * len(hp_eval["welfare_functions"])) - 1), to_load_path=hp_eval["load_plot_data"]) if hp["env"] in [coin_game.CoinGame, coin_game.AsymCoinGame]: background_area_coord = None else: background_area_coord = hp['env'].PAYOUT_MATRIX plot_config = PlotConfig( xlim=hp_eval["x_limits"], ylim=hp_eval["y_limits"], markersize=5, alpha=1.0, jitter=hp_eval["jitter"], xlabel="player 1 payoffs", ylabel="player 2 payoffs", plot_max_n_points=hp_eval["train_n_replicates"], # title="cross and same-play performances: " + hp_eval['env'].NAME, x_scale_multiplier=hp_eval["plot_axis_scale_multipliers"][0], y_scale_multiplier=hp_eval["plot_axis_scale_multipliers"][1], background_area_coord=background_area_coord) evaluator.plot_results( analysis_metrics_per_mode, plot_config=plot_config, x_axis_metric=f"policy_reward_mean/{env_config['players_ids'][0]}", y_axis_metric=f"policy_reward_mean/{env_config['players_ids'][1]}") return analysis_metrics_per_mode
def evaluate_self_and_cross_perf(rllib_hp, rllib_config_eval, policies_to_load, trainable_class, stop, env_config, tune_analysis_per_exp): evaluator = self_and_cross_perf.SelfAndCrossPlayEvaluator( exp_name=rllib_hp["exp_name"]) analysis_metrics_per_mode = evaluator.perform_evaluation_or_load_data( evaluation_config=rllib_config_eval, stop_config=stop, policies_to_load_from_checkpoint=policies_to_load, tune_analysis_per_exp=tune_analysis_per_exp, TuneTrainerClass=trainable_class, n_cross_play_per_checkpoint=min(5, rllib_hp["train_n_replicates"] - 1), to_load_path=rllib_hp["load_plot_data"]) if trainable_class == LOLAPGMatrice: background_area_coord = rllib_hp['env'].PAYOUT_MATRIX else: background_area_coord = None plot_config = PlotConfig( xlim=rllib_hp["x_limits"], ylim=rllib_hp["y_limits"], markersize=5, alpha=1.0, jitter=rllib_hp["jitter"], xlabel="player 1 payoffs", ylabel="player 2 payoffs", plot_max_n_points=rllib_hp["train_n_replicates"], # title="cross and same-play performances: " + rllib_hp['env'].NAME, x_scale_multiplier=rllib_hp["scale_multipliers"][0], y_scale_multiplier=rllib_hp["scale_multipliers"][1], background_area_coord=background_area_coord) evaluator.plot_results( analysis_metrics_per_mode, plot_config=plot_config, x_axis_metric=f"policy_reward_mean/{env_config['players_ids'][0]}", y_axis_metric=f"policy_reward_mean/{env_config['players_ids'][1]}")