def plot_outputs(entries): """ Takes a pandas dataframe as an input and uses the seagul.plot function to create a plot. """ all_models = np.unique(np.array(entries['model'])) ax = None for model in all_models: entries_of_model = entries.loc[entries['model'] == model] for i in range(len(entries_of_model['ts'])): cutoff = entries_of_model['ts'].iloc[i].shape[0] if 'cutoff' not in locals() or entries_of_model['ts'].iloc[i].shape[0] < cutoff else cutoff ts_min = int(entries_of_model['ts'].iloc[i].iloc[0]) if 'ts_min' not in locals() or int(entries_of_model['ts'].iloc[i].iloc[0]) > ts_min else ts_min ts_max = int(entries_of_model['ts'].iloc[i].iloc[-1]) if 'ts_max' not in locals() or int(entries_of_model['ts'].iloc[i].iloc[-1]) < ts_max else ts_max for i in range(len(entries_of_model['ts'])): rew_f = interp1d(entries_of_model['ts'].iloc[i].to_numpy(dtype=float), entries_of_model['rewards'].iloc[i].to_numpy(dtype=float)) ts = np.linspace(ts_min, ts_max, num=1000) try: # rew = np.vstack((rew, entries_of_model['rewards'].iloc[i].to_numpy(dtype=float)[:cutoff])) rew = np.vstack((rew, rew_f(ts))) except: rew = rew_f(ts) # ts = entries_of_model['ts'].iloc[0].to_numpy(dtype=float)[:cutoff] col = entries_of_model['color'].iloc[0] where_is_nan = np.isnan(rew) rew[where_is_nan] = 0 try: rew.shape[1] except: rew = np.expand_dims(rew,0) fig, ax = smooth_bounded_curve(data=np.transpose(rew), time_steps=ts, label=model, ax=ax, color = col, alpha=0.1) rew = 0
print(entry.path) ws_list.append(ws) model_list.append(model) plt.show() plt.figure() rewards = np.zeros((max_size, len(ws_list))) for i, ws in enumerate(ws_list): # plt.plot(ws["raw_rew_hist"]) # plt.figure() # print(len(ws["raw_rew_hist"])) rewards[:len(ws["raw_rew_hist"]), i] = np.array(ws["raw_rew_hist"]) fig, ax = smooth_bounded_curve(rewards, window=100) plt.show() # %% #ws = ws_list[-1] #model = model_list[-1] plt.plot(ws['raw_rew_hist'], 'ko') plt.title('Return') plt.show() plt.plot(ws['pol_loss_hist'], 'k') plt.title('Policy loss') plt.show()
for entry in os.scandir(directory): model, env, args, ws = load_workspace(entry.path) if len(ws["raw_rew_hist"]) < min_length: min_length = len(ws["raw_rew_hist"]) ws_list.append(ws) model_list.append(model) min_length = int(min_length) rewards = np.zeros((min_length, len(ws_list))) for i, ws in enumerate(ws_list): rewards[:, i] = np.array(ws["raw_rew_hist"][:min_length]) print("seagul", rewards[-1, :].mean(), rewards[-1, :].std()) fig, ax = smooth_bounded_curve(rewards) ssac_size = rewards.shape[0] color_iter = iter(['b', 'g', 'y', 'm', 'c']) log_dir = jup_dir + 'ssac/rl-baselines-zoo/baseline_log2/' for algo in os.scandir(log_dir): try: df_list = [] min_length = float('inf') for entry in os.scandir(algo.path): df = load_results(entry.path) if len(df['r']) < min_length: min_length = len(df['r'])
directory = script_dir + "/data/tune/euler_but_working/PPO" df_list = [] for i, entry in enumerate(os.scandir(directory)): try: df_list.append(pd.read_csv(entry.path + "/progress.csv")) except FileNotFoundError: pass rewards = np.zeros((df_list[0]['episode_reward_mean'].shape[0], len(df_list))) for i, df in enumerate(df_list): rewards[:, i] = df['episode_reward_mean'] smooth_bounded_curve(rewards) # %% def do_rollout(init_point): env = gym.make(env_name, **config['env_config']) obs = env.reset(init_point) action_hist = [] m_act_hist = [] obs_hist = [] reward_hist = [] done = False
"init_noise_max": 10, } alg_config = { "env_name": env_name, "model": model, "total_steps": 2e6, "epoch_batch_size": 1024, "sgd_batch_size": 512, "lam": .2, "gamma": .95, "env_config": env_config, "sgd_epochs": 30, "reward_stop": 300 } seeds = np.random.randint(0,2**32,8) pool = Pool(processes=8) # results = run_and_test(seeds[0]) results = pool.map(run_and_test, seeds) results = chop_returns(results) results = np.array(results).transpose(1,0) smooth_bounded_curve(results) plt.show()
cur_step += 1 ep_obs1 = torch.stack(obs1_list) ep_acts = torch.stack(acts_list).reshape(-1, act_size) ep_rews = torch.stack(rews_list).reshape(-1, 1) ep_obs2 = torch.stack(obs2_list) ep_path = torch.tensor(path_list).reshape(-1, 1) return ep_obs1, ep_acts, ep_rews, ep_path # %% ws_list, model_list, rewards = load_trials( "seagul/seagul/notebooks/switching/data_needle/50k_slow_longer") fig, ax = smooth_bounded_curve( rewards, time_steps=[i * 50 for i in range(rewards.shape[0])]) plt.ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) ax.ticklabel_format(axis='x', style='sci') ax.set_title('Reward Curve') plt.show() #fig.savefig('reward_curve.pdf') ws = ws_list[-1] model = model_list[-1] #%% obs_hist, act_hist, rew_hist, lqr_on = do_rollout() print(lqr_on) t = np.array([i * .2 for i in range(act_hist.shape[0])])
ws_list.append(ws) model_list.append(model) min_length = int(min_length) rewards = np.zeros((min_length, len(ws_list))) for i, ws in enumerate(ws_list): rewards[:, i] = np.array(ws["raw_rew_hist"][:min_length]) print("seagul", rewards[-1, :].mean(), rewards[-1, :].std()) ssac_size = rewards.shape[0] shifted_reward = np.nan * np.ones((int(2e6 / 51), 8)) shifted_reward[int(1e6 / 51):int(1e6 / 51) + ssac_size] = rewards fig, ax = smooth_bounded_curve( shifted_reward, time_steps=[51 * i for i in range(shifted_reward.shape[0])]) color_iter = iter(['b', 'g', 'y', 'm', 'c']) log_dir = script_path + '../rl-baselines-zoo/baseline_log2/' for algo in os.scandir(log_dir): try: df_list = [] min_length = float('inf') for entry in os.scandir(algo.path): df = load_results(entry.path) if len(df['r']) < min_length: min_length = len(df['r'])
# %% md # Needle sac can be made to work well ## Observation: Bigger networks and longer runs improve performance (shocking...) ## worth noting the one successful rllib trial from last week was a [256, 256] network, and trying to replicate those results with a [32,32] failed # %% fig, ax = plt.subplots(1, 2, figsize=(16, 6)) ws_list, model_list, rewards = load_trials( "seagul/seagul/notebooks/switching2/data_needle/long_small_strong") smooth_bounded_curve(rewards, ax=ax[0]) ax[0].set_title('Hidden sizes: (32,32)') ws_list, model_list, rewards = load_trials( "seagul/seagul/notebooks/switching2/data_needle/less_hack") smooth_bounded_curve(rewards, ax=ax[1]) ax[1].set_title('Hidden sizes: (256,256)') ws = ws_list[-1] model = model_list[-1] # %% md # Again, reasonably robust to initial conditions ### This time environment is reset normally, but with randomized initial *position* (initial velocity is always zero, learning degrades dramatically with nonzero starting velocities)