# pip install wrapt import collections import warnings import numpy as np import tensorflow as tf import sonnet as snt import plotnine as gg from typing import Any, Callable, Dict, List, Tuple # Plotnine themes gg.theme_set(gg.theme_bw(base_size=16, base_family='serif')) gg.theme_update(figure_size=(12, 8), panel_spacing_x=0.5, panel_spacing_y=0.5) # Filter meaningless pandas warnings warnings.filterwarnings(action="ignore", category=UserWarning) warnings.filterwarnings(action="ignore", category=ImportWarning) # @title (CODE) Define the _Deep Sea_ environment TimeStep = collections.namedtuple('TimeStep', ['observation', 'reward', 'pcont']) class DeepSea(object): def __init__(self, size: int, seed: int = None, randomize: bool = True): self._size = size
def upper_interval(x): return np.mean(x) + 2 * np.std(x) ############################################################################# # Collating data with Pandas params_df = config_lib.get_params_df(config) df = pd.merge(pd.concat(results), params_df, on='unique_id') plt_df = (df.groupby(['agent', 't']).agg({ 'instant_regret': np.mean }).reset_index()) ############################################################################# # Plotting and analysis (uses plotnine by default) gg.theme_set(gg.theme_bw(base_size=16, base_family='serif')) gg.theme_update(figure_size=(12, 8)) p = (gg.ggplot(plt_df) + gg.aes('t', 'instant_regret', colour='agent') + gg.geom_line()) print(p) ############################################################################# # Collating data with Pandas params_df = config_lib.get_params_df(config) df = pd.merge(pd.concat(results), params_df, on='unique_id') plt_df = (df.groupby(['agent', 't']).agg({ 'cum_reward': np.mean }).reset_index()) ############################################################################# # Plotting and analysis (uses plotnine by default)
def make_plots(): # Setup plotting import pandas as pd import plotnine as gg import warnings pd.options.mode.chained_assignment = None gg.theme_set(gg.theme_bw(base_size=16, base_family='serif')) gg.theme_update(figure_size=(12, 8), panel_spacing_x=0.5, panel_spacing_y=0.5) warnings.filterwarnings('ignore') # Load Results experiments = { 'Random': RANDOM_RESULTS_PATH, 'TRPO': TRPO_RESULTS_PATH, } data_frame, sweep_vars = csv_load.load_bsuite(experiments) bsuite_score = summary_analysis.bsuite_score(data_frame, sweep_vars) bsuite_summary = summary_analysis.ave_score_by_tag(bsuite_score, sweep_vars) # Generate general plots radar_fig = summary_analysis.bsuite_radar_plot(bsuite_summary, sweep_vars) radar_fig.savefig(PLOTS_PATH + 'radar_fig.png', bbox_inches='tight') bar_fig = summary_analysis.bsuite_bar_plot(bsuite_score, sweep_vars) bar_fig.save(PLOTS_PATH + 'bar_fig.png') compare_bar_fig = summary_analysis.bsuite_bar_plot_compare( bsuite_score, sweep_vars) compare_bar_fig.save(PLOTS_PATH + 'compare_bar_fig.png') # Generate specific analyses # Learning performance from bsuite.experiments.bandit import analysis as bandit_analysis bandit_df = data_frame[data_frame.bsuite_env == 'bandit'].copy() bandit_scores = summary_analysis.plot_single_experiment( bsuite_score, 'bandit', sweep_vars) bandit_scores.save(PLOTS_PATH + 'bandits_scores.png') bandit_convergence = bandit_analysis.plot_learning(bandit_df, sweep_vars) bandit_convergence.save(PLOTS_PATH + 'bandits_convergence.png') bandit_seeds = bandit_analysis.plot_seeds(bandit_df, sweep_vars) bandit_seeds.save(PLOTS_PATH + 'bandits_seeds.png') # Robustness to noise from bsuite.experiments.bandit_noise import analysis as bandit_noise_analysis bandit_noise_df = data_frame[data_frame.bsuite_env == 'bandit_noise'].copy() bandit_noise_overall = summary_analysis.plot_single_experiment( bsuite_score, 'bandit_noise', sweep_vars) bandit_noise_overall.save(PLOTS_PATH + 'bandits_noise_overall.png') bandit_noise_avg = bandit_noise_analysis.plot_average( bandit_noise_df, sweep_vars) bandit_noise_avg.save(PLOTS_PATH + 'bandits_noise_avg.png') bandit_noise_regret = bandit_noise_analysis.plot_learning( bandit_noise_df, sweep_vars) bandit_noise_regret.save(PLOTS_PATH + 'bandits_noise_regret.png') # Robustness to reward scaling from bsuite.experiments.bandit_scale import analysis as bandit_scale_analysis bandit_scale_df = data_frame[data_frame.bsuite_env == 'bandit_scale'].copy() bandit_scale_overall = summary_analysis.plot_single_experiment( bsuite_score, 'bandit_scale', sweep_vars) bandit_scale_overall.save(PLOTS_PATH + 'bandits_scale_overall.png') bandit_scale_avg = bandit_scale_analysis.plot_average( bandit_scale_df, sweep_vars) bandit_scale_avg.save(PLOTS_PATH + 'bandits_scale_avg.png') bandit_scale_learn = bandit_scale_analysis.plot_learning( bandit_scale_df, sweep_vars) bandit_scale_learn.save(PLOTS_PATH + 'bandits_scale_learn.png') """