def main(): parser = arg_parser() parser.add_argument('--env', help='environment ID', type=str, default='HalfCheetah-v2') parser.add_argument('--st_seed', help='start number of seeds', type=int, default=0) parser.add_argument('--seeds', help='number of seeds', type=int, default=1) parser.add_argument('--num_timesteps', type=str, default="3e4") parser.add_argument('--filename', type=str, default='_Offline_Evaluation_nosil.png') args = parser.parse_args() if args.env == 'Swimmer-v2' or args.env == 'HalfCheetah-v2': mbl_args = '--num_samples=1500 --num_elites=10 --horizon=10 --eval_freq=10 --mbl_train_freq=10' elif arg.env == 'Reacher-v2' or args.env == 'Ant-v2': mbl_args = '--num_samples=1500 --num_elites=10 --horizon=5 --eval_freq=10 --mbl_train_freq=10' # algo_names=["ppo2_sil_online","copos_sil_online","ppo2_online","copos_online"] # legend_names=["ppo2+sil","copos+sil","ppo2","copos"] # argus=["","","",""] algo_names = [ "mbl_ppo2", "ppo2_offline", "mbl_copos", "copos_offline", "mbl_trpo", "trpo_offline" ] # algo_names=["mbl_ppo2","ppo2_offline", # "mbl_copos","copos_offline"] legend_names = [ "mbl+ppo2", "ppo2", "mbl+copos", "copos", "mbl+trpo", "trpo" ] # legend_names=["mbl+ppo2","ppo2", # "mbl+copos","copos"] #argus=['--num_samples=1 --num_elites=1 --horizon=2' for _ in range(len(algo_names))] argus = [mbl_args for _ in range(len(algo_names))] for i in range(args.st_seed, args.st_seed + args.seeds): for j in range(len(algo_names)): os.system("python ../algos/" + algo_names[j] + "/run.py --alg=" + algo_names[j] + " --num_timestep=" + args.num_timesteps + " --seed=" + str(i) + " --env=" + args.env + " --log_path=~/Desktop/logs/EXP2_nosil/" + args.env + "/" + legend_names[j] + "-" + str(i) + ' ' + argus[j]) results = pu.load_results('~/Desktop/logs/EXP2_nosil/' + args.env) pu.plot_results(results, xy_fn=pu.progress_itermbl_xy_fn, average_group=True, split_fn=lambda _: '') #plt.title(args.env+" Online Evaluation") plt.xlabel('Evaluation Epochs [-]') plt.ylabel('Average Return [-]') fig = plt.gcf() fig.set_size_inches(9.5, 7.5) fig.savefig(args.env + "_" + args.filename)
def main(): parser = arg_parser() parser.add_argument('--env', help='environment ID', type=str, default='Swimmer-v2') parser.add_argument('--dir', type=str, default='EXP_ON_fix_norm') parser.add_argument('--thesis', type=str, default='Online_V0') args = parser.parse_args() # dirname = '~/Desktop/carla_sample_efficient/data/bk/bkup_EXP1_FINAL/'+args.extra_dir+args.env dirname = '~/Desktop/ppo_test/' + args.dir + '/' + args.env results = pu.load_results(dirname) # r_copos1,r_copos2,r_trpo,r_ppo=filt(results,'copos1'),filt(results,'copos2'),filt(results,'trpo'),filt(results,'ppo') # r_sil_n2=filt(results,'sil_n2_l0.001') # dt={'copos1':r_copos1, 'copos2':r_copos2,'trpo':r_trpo, 'ppo':r_ppo, 'sil_slight':r_sil_n2} r_ppo = filt(results, 'ppo') dt = {'ppo': r_ppo} for name in dt: pu.plot_results(dt[name], xy_fn=pu.progress_default_xy_fn, average_group=True, split_fn=lambda _: '', shaded_err=True, shaded_std=False) plt.xlabel('Number of Timesteps [M]') plt.ylabel('Average Return [-]') plt.tight_layout() fig = plt.gcf() fig.set_size_inches(9, 7.5) # fig.savefig("/Users/zsbjltwjj/Desktop/carla_sample_efficient/plot_f/ONLINE/"+args.extra_dir+args.env+'/'+name+'.pdf', format='pdf') # fig.savefig("/Users/zsbjltwjj/Desktop/thesis/img/"+args.thesis+"/"+args.env+'/'+name+'.pdf', format="pdf") fig.savefig("/Users/zsbjltwjj/Desktop/ppo_test/" + args.dir + '-' + name + '.pdf', format="pdf")
def plot_results(dirs, num_timesteps=10e6, xaxis=X_TIMESTEPS, yaxis=Y_REWARD, title='', split_fn=split_by_task, resample=100): results = plot_util.load_results(dirs) new_results = [] for result in results: dfs = result.monitor for df in dfs: temp_result = copy.deepcopy(result) temp_result = temp_result._replace(monitor=df) new_results.append(temp_result) plot_util.plot_results(new_results, xy_fn=lambda r: ts2xy(r.monitor, xaxis, yaxis), group_fn=split_fn, average_group=True, resample=resample)
def main(): parser = arg_parser() parser.add_argument('--env', help='environment ID', type=str, default='HalfCheetah-v2') parser.add_argument('--extra_dir', type=str, default='') args = parser.parse_args() dirname = '~/Desktop/carla_sample_efficient/data/bk/bkup_EXP1_FINAL/' + args.extra_dir + args.env results = pu.load_results(dirname) r_copos, r_trpo, r_ppo = filt(results, 'copos'), filt(results, 'trpo'), filt(results, 'ppo') r_sil_n2 = filt(results, 'sil_n2_l0.001') dt = { 'copos': r_copos, 'trpo': r_trpo, 'ppo': r_ppo, 'sil_slight': r_sil_n2 } for name in dt: pu.plot_results(dt[name], xy_fn=pu.progress_default_xy_fn, average_group=True, split_fn=lambda _: '', shaded_err=True, shaded_std=False) plt.xlabel('Number of Timesteps [M]') plt.ylabel('Average Return [-]') plt.tight_layout() fig = plt.gcf() fig.set_size_inches(9, 7.5) fig.savefig( "/Users/zsbjltwjj/Desktop/carla_sample_efficient/plot_f/ONLINE/" + args.env + '/' + name + '.pdf', format='pdf') if name == 'sil_slight': pu.plot_results(dt[name], xy_fn=pu.progress_default_entropy_xy_fn, average_group=True, split_fn=lambda _: '', shaded_err=True, shaded_std=False, legend_entropy=1) plt.xlabel('Number of Timesteps [M]') plt.ylabel('Entropy [-]') plt.tight_layout() fig = plt.gcf() fig.set_size_inches(9, 7.5) fig.savefig( "/Users/zsbjltwjj/Desktop/carla_sample_efficient/plot_f/ONLINE/" + args.env + '/' + name + '_entropy.pdf', format="pdf")
def test_plot_util(): nruns = 4 logdirs = [smoketest('--alg=ppo2 --env=CartPole-v0 --num_timesteps=10000') for _ in range(nruns)] data = pu.load_results(logdirs) assert len(data) == 4 _, axes = pu.plot_results(data[:1]); assert len(axes) == 1 _, axes = pu.plot_results(data, tiling='vertical'); assert axes.shape==(4,1) _, axes = pu.plot_results(data, tiling='horizontal'); assert axes.shape==(1,4) _, axes = pu.plot_results(data, tiling='symmetric'); assert axes.shape==(2,2) _, axes = pu.plot_results(data, split_fn=lambda _: ''); assert len(axes) == 1
def plot_results(dirs, num_timesteps=10e6, xaxis=X_TIMESTEPS, yaxis=Y_REWARD, title='', split_fn=split_by_task): results = plot_util.load_results(dirs) plot_util.plot_results(results, split_fn=split_fn, average_group=True, resample=int(1e6))
def main(logs='logs-arm', n_arm=3, smooth=True): logs = glob.glob(join(logs, str(n_arm),'*')) for log in logs: results = pu.load_results(log) r = results[0] eps = np.cumsum(r[1].epoch).values dist = r[1]['test/success_rate'].values if smooth: dist = pu.smooth(dist, radius=2) plt.plot(eps, dist, label=basename(log)) plt.legend() plt.title('Number of arms: {}'.format(n_arm)) plt.show()
def main(): parser = arg_parser() parser.add_argument('--dir', type=str, default='logs') parser.add_argument('--thesis', type=str, default='Online_V0') args = parser.parse_args() location=args.dir thesis_dir=args.thesis env_name_list=["Ant-v2", "HalfCheetah-v2", "Reacher-v2", "Swimmer-v2"] #env_name_list=["HalfCheetah-v2"] legend_name_list=["copos1", "copos1+sil_n2_l0.001", "copos1+sil_n10_l0.1", "copos2", "copos2+sil_n2_l0.001", "copos2+sil_n10_l0.1", "ppo", "ppo+sil_n2_l0.001", "ppo+sil_n10_l0.1", "trpo", "trpo+sil_n2_l0.001", "trpo+sil_n10_l0.1"] #legend_name_list=["copos1", "copos1+sil_n2_l0.001", "copos1+sil_n10_l0.1"] for env_name in env_name_list: dirname = '~/Desktop/logs/'+location+'/EXP_ON_V0/'+env_name results = pu.load_results(dirname) mn_ls, sd_ls,last_ls_ls=[],[],[] final_txt_name="/Users/zsbjltwjj/Desktop/thesis/img/"+thesis_dir+"/"+env_name+"-final-output.txt" for legend in legend_name_list: result=filt(results,legend+"-") mn, sd, last_ls = pu.table_results(result,xy_fn=pu.progress_default_xy_fn,average_group=True,split_fn=lambda _: '', name=result[0].dirname,tp='online',freq=10) txt_name="/Users/zsbjltwjj/Desktop/logs/"+location+"/EXP_ON_V0/"+env_name+"/"+legend+"-output.txt" with open(txt_name, "w") as text_file: text_file.write(str(mn)+'\n') text_file.write(str(sd)+'\n') for i in last_ls: text_file.write(str(i)+' ') # s=open(txt_name, "r") # tmp=s.readlines() # s.close() mn_ls.append(mn) sd_ls.append(sd) last_ls_ls.append(last_ls) #print(mn_ls) max_idx=np.argmax(mn_ls) with open(final_txt_name, "w") as txt_file: bolds=[] new_legends,new_mn_ls,new_sd_ls,new_last_ls_ls=reconstruct(max_idx,legend_name_list,mn_ls,sd_ls,last_ls_ls) for i in range(len(new_legends)): bold=t_test(last_ls_ls[max_idx],new_last_ls_ls[i]) bolds.append(bold) txt_file.write(new_legends[i]+": "+str(new_mn_ls[i])+' '+str(new_sd_ls[i])+' '+str(bold)+'\n') if any(bolds): max_bold=True else: max_bold=False txt_file.write("max alg: "+legend_name_list[max_idx]+": "+str(mn_ls[max_idx])+' '+str(sd_ls[max_idx])+' '+str(max_bold)+'\n')
def plot_monitors(): # If you want to average results for multiple seeds, LOG_DIRS must contain subfolders in the # following format: <name_exp0>-0, <name_exp0>-1, <name_exp1>-0, <name_exp1>-1. # Where names correspond to experiments you want to compare separated with random seeds by dash. LOG_DIRS = '/home/deep3/logs/Humanoid-v2/' # LOG_DIRS = '/home/deep3/logs/Hopper-v2/' # Uncomment below to see the effect of the timit limits flag # LOG_DIRS = 'time_limit_logs/reacher' results = pu.load_results(LOG_DIRS, running_agents=3) fig, ax = pu.plot_results(results, average_group=True, split_fn=lambda _: '', shaded_std=False)
def main(): # results = pu.load_results('data_the_best') results = pu.load_results('data_Test_obstacle_origin/log_data') r = results[0] # plt.plot(np.cumsum(r.monitor.l), r.monitor.r) # plt.plot(np.cumsum(r.monitor.l), pu.smooth(r.monitor.r, radius=10)) #### plt.plot(r.progress.total_timesteps, r.progress.eprewmean) # print('keys:', r.progress.keys()) # plt.plot(r.progress['epoch'], r.progress['test/success_rate']) # plt.plot(r.progress['epoch'], pu.smooth(r.progress['test/success_rate'], radius=5)) # pu.plot_results(results) pu.plot_results(results, average_group=True, split_fn=lambda _: '') set_trace()
def plot_data(exp, savefig, ttype): savefig = abspath(savefig) try: results = organize_results(pu.load_results(exp)) pu.plot_results(results, average_group=True, split_fn=lambda _: '', xy_fn=ep_distance_ratio_train \ if ttype == 'train' else ep_distance_ratio_test, shaded_std=False, shaded_err=True) if os.path.isfile(savefig): os.remove(savefig) plt.savefig(savefig) plt.clf() #print("Plot saved to: {}".format(savefig)) except Exception as e: print("Plotting failed for {}".format(savefig)) print("Reason: {}".format(str(e)))
def plot_results(self): # Create plot directory os.makedirs(self.plot_dir, exist_ok=True) results = pu.load_results( os.path.join(self.log_dir, self.env_name.split('-')[0], '')) pu.plot_results(results, average_group=True, split_fn=lambda _: '', shaded_std=False) plt.xlabel('Timestep') plt.ylabel('Reward') fig = plt.gcf() plot_path = os.path.join(self.plot_dir, 'plot_' + self.env_name) fig.savefig(plot_path, bbox_inches='tight') plt.show()
def main(): parser = arg_parser() parser.add_argument('--env', help='environment ID', type=str, default='HalfCheetah-v2') parser.add_argument('--extra_dir', type=str, default='') args = parser.parse_args() args.extra_dir='EXP2_IAS_5M_TRPO_OFF/' dirname = '~/Desktop/carla_sample_efficient/data/bk/bkup_EXP2_FINAL/'+args.extra_dir+args.env results = pu.load_results(dirname) r_copos_nosil,r_trpo_nosil,r_ppo_nosil=filt(results,'copos-'),filt(results,'trpo-'),filt(results,'ppo-') r_copos_sil,r_trpo_sil,r_ppo_sil=filt(results,'copos+sil-'),filt(results,'trpo+sil-'),filt(results,'ppo+sil-') r_mbl_sil=filt(results,'mbl+','sil-') # r_mbl_nosil_tmp=[r for r in results if r not in r_mbl_sil] r_mbl_nosil=filt_or_or(results,'mbl+copos-','mbl+trpo-','mbl+ppo-') r_copos_comp, r_trpo_comp, r_ppo_comp=filt_or(results,'mbl+copos','copos+sil'),filt_or(results,'mbl+trpo','trpo+sil'),filt_or(results,'mbl+ppo','ppo+sil') # dt={'copos_nosil':r_copos_nosil, 'trpo_nosil':r_trpo_nosil, 'ppo_nosil':r_ppo_nosil, # 'copos_sil':r_copos_sil, 'trpo_sil':r_trpo_sil, 'ppo_sil':r_ppo_sil, # 'mbl_nosil':r_mbl_nosil, 'mbl_sil':r_mbl_sil, # 'copos_comp':r_copos_comp, 'trpo_comp':r_trpo_comp, 'ppo_comp':r_ppo_comp} r_trpo_all=filt(results,'trpo') dt={'trpo_research':r_trpo_all} for name in dt: pu.plot_results(dt[name],xy_fn=pu.progress_mbl_vbest_xy_fn,average_group=True,name=name,split_fn=lambda _: '',shaded_err=True,shaded_std=False) plt.xlabel('Number of Timesteps [M]') plt.ylabel('Best Average Return [-]') plt.tight_layout() fig = plt.gcf() fig.set_size_inches(9, 7.5) fig.savefig("/Users/zsbjltwjj/Desktop/carla_sample_efficient/plot_f/OFFLINE/"+args.env+'/'+name+'.pdf',format="pdf") if name=='mbl_nosil' or name=='mbl_sil': pu.plot_results(dt[name],xy_fn=pu.progress_default_entropy_xy_fn,average_group=True,name=name,split_fn=lambda _: '',shaded_err=True,shaded_std=False,legend_entropy=1) plt.xlabel('Number of Timesteps [M]') plt.ylabel('Entropy [-]') plt.tight_layout() fig = plt.gcf() fig.set_size_inches(9, 7.5) fig.savefig("/Users/zsbjltwjj/Desktop/carla_sample_efficient/plot_f/OFFLINE/"+args.env+'/'+name+'_entropy.pdf',format="pdf")
def plot_training(experiment_no, experiment_dir): results = plot_util.load_results(experiment_dir, verbose=True) results.sort(key=lambda x: x.dirname) fig, ax = plt.subplots() color_set = ['b', 'g', 'r', 'm', 'y', 'c'] ax.grid(color='#dddddd', linestyle='-', linewidth=1) for i, r in enumerate(results): i_ = i % len(color_set) plt.plot(np.cumsum(r.monitor.l), plot_util.smooth(r.monitor.r, radius=200), color=color_set[i_]) for i, r in enumerate(results): i_ = i % len(color_set) plt.plot(np.cumsum(r.monitor.l), r.monitor.r, alpha=0.1, color=color_set[i_]) ax.ticklabel_format(axis='x', style='sci', scilimits=(0, 0)) ax.set_xlim(left=0) plt.tight_layout(pad=2) plt.xlabel('time steps') plt.ylabel('episode_reward') plt.title('experiment_{}'.format(experiment_no), fontsize=10) plt.legend([os.path.basename(r.dirname) for r in results]) plt.savefig(os.path.join(experiment_dir, 'training_{}.png'.format(experiment_no))) plt.show()
def load_data(item, path_a, window): results = pu.load_results(path_a) try: # for A2C xys = [(np.array(res.progress['total_timesteps'][1:]), np.array(res.progress[item][1:])) for res in results] except: # for TNPG xys = [(np.array(res.progress['TimestepsSoFar']), np.array(res.progress[item])) for res in results] origxs = [xy[0] for xy in xys] print([x[-1] for x in origxs]) low = max(x[0] for x in origxs) high = min(x[-1] for x in origxs) resample = len(origxs[0]) ts = np.linspace(low, high, resample)+1 ys = [] for (x, y) in xys: ys.append(symmetric_ema(x, y, low, high, resample, decay_steps=window)[1]) val = np.array(ys).T mean_val = np.mean(val, axis=1) sd_val = np.std(val, axis=1) med_val = np.median(val, axis=1) q1_val = np.percentile(val, 25, axis=1) q3_val = np.percentile(val, 75, axis=1) return {'ts':ts, 'median':med_val, 'q1':q1_val, 'q3':q3_val, 'mean':mean_val, 'std':sd_val}
from baselines.common import plot_util as pu mujoco_results = pu.load_results( '/home/developer/logs/her_pyrobot_push_mujoco/250k_v2') #results = pu.load_results('/home/developer/logs/her_pyrobot_reach/joint_100k_v4') import matplotlib.pyplot as plt import numpy as np mujoco_r = mujoco_results[0] mujoco_arr = np.array(mujoco_r.progress) epoch = mujoco_arr[:, 0] mujoco_test_success_rate = mujoco_arr[:, 7] mujoco_train_success_rate = mujoco_arr[:, 9] plt.suptitle( 'LocoBot Push Task Trained in MuJoCo with 250k timesteps(100 epoches)~') plt.title('Training in Mujoco') plt.plot(epoch, pu.smooth(mujoco_train_success_rate, radius=10), label="mujoco_train") plt.plot(epoch, pu.smooth(mujoco_test_success_rate, radius=10), label="mujoco_test") plt.xlabel('epoches') plt.ylabel('success_rate') plt.legend() #plt.plot(np.cumsum(r.monitor.l), pu.smooth(r.monitor.r, radius=10)) plt.show()
plot_envs = ["Qbert", "SpaceInvaders", 'Breakout', "BeamRider"] pre = "" means_cache = {} if os.path.exists(pre + "mean_reward_cache.pkl"): means_cache = pickle.load(open(pre + "mean_reward_cache.pkl", 'rb')) for i, hp in tqdm(hps): log_dir = hp['log_dir'] if not os.path.exists(log_dir): continue if log_dir in means_cache: r = means_cache[log_dir] else: try: result = pu.load_results(log_dir) if pre == "": r = result[0].monitor['r'].mean() else: r = result[0].monitor['r'].tail(1000).mean() except Exception as e: if log_dir in dont_cache: print(e, log_dir) continue if log_dir not in dont_cache: means_cache[log_dir] = r if not np.isfinite(r): #print("reward for",i, log_dir,"?", r) continue rewards.append(r)
from baselines.common import plot_util as pu results = pu.load_results('~/Documents/openai-results', verbose=True) # results = pu.load_results('/tmp') import matplotlib.pyplot as plt import numpy as np lines = ["-", "--", ":", "-."] markers = ['o', 'x', '+', '^'] colors = ['#000000', '#222222', '#444444', '#666666'] def average_graph(): min_number_episodes = 10000 for i in range(len(results)): result = results[i] rewards = result.monitor.r number_episodes = len(rewards) print('Results {}: number of episodes = {}'.format(i, number_episodes)) if (number_episodes < min_number_episodes): min_number_episodes = number_episodes total_reward_per_episode = [0] * min_number_episodes for i in range(len(results)): result = results[i] rewards = result.monitor.r for j in range(min_number_episodes):
f, ax = pu.plot_results( results, xy_fn=xy_fn, split_fn=lambda _: "", average_group=True, shaded_err=False, ) f.savefig(file) if __name__ == "__main__": # [run_cartpole_dqn(log_dir="logs/dqn-%d" % k, seed=k) for k in range(3)] # # [ # run_cartpole_reinforce( # RLparams(num_games=200, seed=k), log_dir="logs/reinforce-%d" % k # ) # for k in range(3) # ] [ run_cartpole_a2c( A2CParams(lr=0.01, num_rollout_steps=32, num_batches=1000, seed=k), log_dir="logs/a2c-%d" % k, ) for k in range(3) ] results = pu.load_results("logs") plot_save_results(lr_fn, "logs/steps_rewards.png") plot_save_results(tr_fn, "logs/time_rewards.png")
from baselines.common import plot_util as pu import matplotlib.pyplot as plt results = pu.load_results('path/to/your/experiment01') pu.plot_results(results, average_group=True, split_fn=lambda _: '', shaded_std=False, shaded_err=True, figsize=(10, 6), smooth_step=10.0) plt.title('Experiment01', fontsize=30) plt.tight_layout() plt.show()
from baselines.common import plot_util as pu import matplotlib.pyplot as plt import numpy as np dataDir = 'baselinesData' envName = 'HalfCheetah-v2' alg = 'ppo2' results = pu.load_results(dataDir + '/' + envName) #r = results[0] #plt.plot(np.cumsum(r.monitor.l), r.monitor.r) pu.plot_results(results) plt.show()
def main(): """ Plot the plots inside the folder given """ # Now plot the common things splits = args.files[0].split('/') if splits[-1] == '': splits = splits[-2] else: splits = splits[-1] env = splits results = pu.load_results(args.files, ) fig = pu.plot_results(results, average_group=True, shaded_err=False, shaded_std=True, group_fn=lambda _: check_last_name(_), split_fn=lambda _: '', figsize=(10, 10)) # Add results for behaviour cloning if present allbcfiles = [args.bcpath] allfiles = [] allrandomfiles = [] # For random agent behavior for file in allbcfiles: for r, dirs, files in os.walk(file): print(files) txtfiles = list(filter(lambda x: 'BC_' in x and '.txt' in x, files)) rndfiles = list( filter(lambda x: 'random_' in x and '.txt' in x, files)) allfiles.extend(list(map(lambda x: os.path.join(r, x), txtfiles))) allrandomfiles.extend( list(map(lambda x: os.path.join(r, x), rndfiles))) ## Show all files for BC and plot print(allfiles) if allfiles != []: bcreward = [] for file in allfiles: with open(file, 'r') as fi: rews = fi.read().split('\n') rews = filter(lambda x: x != '', rews) rews = list(map(lambda x: float(x), rews)) bcreward.extend(rews) # Get mean and std #print(bcreward) mean = np.mean(bcreward) std = np.std(bcreward) idxcolor = 10 plt.plot([0, args.max_steps], [mean, mean], label='BC', color=COLORS[idxcolor]) plt.fill_between([0, args.max_steps], [mean - std, mean - std], [mean + std, mean + std], alpha=0.2, color=COLORS[idxcolor]) ## Get random policy if allrandomfiles != []: rndreward = [] for file in allrandomfiles: with open(file, 'r') as fi: rews = fi.read().split('\n') rews = filter(lambda x: x != '', rews) rews = list(map(lambda x: float(x), rews)) rndreward.extend(rews) # Get mean and std #print(bcreward) mean = np.mean(rndreward) plt.plot([0, args.max_steps], [mean, mean], label='random', color='gray', linestyle='dashed') plt.xlabel('# environment interactions', fontsize=20) envnamehere = 'ant' if env.lower().startswith(envnamehere): plt.ylim(ymin=-5000, ymax=5000) if env.lower().startswith(''): plt.ylabel('Reward', fontsize=30) plt.yscale(args.yscale) plt.title(env.replace('BC','').replace('GAIL', '').replace('no', '').replace('alph', ''), \ fontsize=50) if env.lower().startswith(envnamehere): if args.legend != []: if allfiles != []: args.legend.append('BC') plt.legend(args.legend, fontsize=30, loc='bottom right') else: plt.legend().set_visible(False) #plt.ticklabel_format(useOffset=1) plt.savefig( '{}.png'.format(env), bbox_inches='tight', ) print("saved ", env)
from baselines.common import plot_util as pu LOG_DIRS = 'logs/coinrun_500_level/' results = pu.load_results(LOG_DIRS) smooth_step = 50.0 fig = pu.plot_results(results, average_group=True, split_fn=lambda _: '', shaded_std=False, smooth_step=smooth_step) pu.plt.savefig('coinrun_500_level')
from baselines.common import plot_util as pu import matplotlib.pyplot as plt import numpy as np # Parameters dir_name = 'openai-2018-11-06-21-19-32-539241' game_name = 'Pong' results = pu.load_results(dir_name) r = results[0] plt.plot(np.cumsum(r.monitor.l), pu.smooth(r.monitor.r, radius=10)) plt.title(game_name) plt.xlabel('# of timesteps') plt.ylabel('Reward') #plt.show() plt.savefig(game_name + '_rewards.png', bbox_inches='tight')
# Copyright (C) 2019 Istituto Italiano di Tecnologia (IIT) # This software may be modified and distributed under the terms of the # LGPL-2.1+ license. See the accompanying LICENSE file for details. import os, inspect currentdir = os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe()))) print(currentdir) parentdir = os.path.dirname(os.path.dirname(currentdir)) os.sys.path.insert(0, parentdir) import baselines from baselines.common import plot_util as pu import matplotlib.pyplot as plt import numpy as np results = pu.load_results('') r = results[0] print(r.monitor.r) plt.plot(np.cumsum(r.monitor.l), pu.smooth(r.monitor.r, radius=10)) plt.show()
import argparse from baselines.common import plot_util as pu parser = argparse.ArgumentParser() parser.add_argument('path', help='an integer for the accumulator') args = parser.parse_args() exp_prefix = '/home/murtaza/research/baselines/logs/' results = pu.load_results(exp_prefix + args.path) import matplotlib.pyplot as plt import numpy as np r = results[0] plt.plot(r.progress.epoch, r.progress['test/success_rate']) plt.show()
from baselines.common import plot_util as pu import matplotlib.pyplot as plt import numpy as np import json import os if not os.path.exists('pics/'): os.makedirs('pics/') results = pu.load_results('logs/', verbose=True) for r in results: with open("{}/0.0.monitor.csv".format(r.dirname), "r") as f: info = f.readline() meta = json.loads(info[1:]) game = meta['env_id'] fig = plt.figure() plt.plot(np.cumsum(r.monitor.l), pu.smooth(r.monitor.r, radius=10)) fig.suptitle(game, fontsize=18) plt.savefig('pics/{}.png'.format(game))
from baselines.common import plot_util as pu import matplotlib.pyplot as plt import argparse import os """Create a plot of training data for a given environment using baselines plotting utility Command line arguments: --env: environment name (ex: --env=RoboschoolHalfCheetah-v1) --dir: directory where training data is logged default is ./data/ """ if __name__ == "__main__": # Parse command line arguments parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--env', help='Environment name') parser.add_argument('--dir', help='Data directory', default='./data') args = parser.parse_args() envName = args.env # Load results with baselines plot utility results = pu.load_results('./'+args.dir+'/'+envName) # Plot results pu.plot_results(results, average_group=True, split_fn=lambda _: '', shaded_std=False) # Save plot as a pdf in a subdirectory of the data directory called plots if not os.path.exists('./'+args.dir+'/plots'): os.mkdir('./'+args.dir+'/plots') plt.savefig('./'+args.dir+'/plots/'+envName+'.pdf')
def save_plot(path, seed): results = pu.load_results(dirs + '%s/%s/' % (path, seed)) r = results[0] plt.plot(r.progress.steps, r.progress["mean 100 episode reward"]) plt.savefig(dirs + '%s/%s/plot.png' % (path, seed))
def plot_results(dirs, num_timesteps=10e6, xaxis=X_TIMESTEPS, yaxis=Y_REWARD, title='', split_fn=split_by_task): results = plot_util.load_results(dirs) plot_util.plot_results(results, xy_fn=lambda r: ts2xy(r['monitor'], xaxis, yaxis), split_fn=split_fn, average_group=True, resample=int(1e6))
from baselines.common import plot_util as pu import matplotlib.pyplot as plt import numpy as np results = pu.load_results('~/logs/NewHopperCmp/') print(len(results)) pu.plot_results(results, average_group=True, split_fn=lambda _: '') #print(np.cumsum(results[0].monitor.l)) #plt.plot(np.cumsum(results[0].monitor.l), pu.smooth(results[0].monitor.r, radius=10)) #plt.show()