def plot_tactical(): tactical_data = np.genfromtxt( '/home/jazz/Documents/openai-results/open-ai-tactical.csv', delimiter=',') plt.plot([i[0] for i in tactical_data], pu.smooth([i[1] for i in tactical_data], radius=1), linewidth=1, linestyle=lines[0], color=colors[0]) plt.xlabel('Number of steps') plt.ylabel('Reward per episode') plt.show()
def main(logs='logs-arm', n_arm=3, smooth=True): logs = glob.glob(join(logs, str(n_arm),'*')) for log in logs: results = pu.load_results(log) r = results[0] eps = np.cumsum(r[1].epoch).values dist = r[1]['test/success_rate'].values if smooth: dist = pu.smooth(dist, radius=2) plt.plot(eps, dist, label=basename(log)) plt.legend() plt.title('Number of arms: {}'.format(n_arm)) plt.show()
def plot_training(experiment_no, experiment_dir): results = plot_util.load_results(experiment_dir, verbose=True) results.sort(key=lambda x: x.dirname) fig, ax = plt.subplots() color_set = ['b', 'g', 'r', 'm', 'y', 'c'] ax.grid(color='#dddddd', linestyle='-', linewidth=1) for i, r in enumerate(results): i_ = i % len(color_set) plt.plot(np.cumsum(r.monitor.l), plot_util.smooth(r.monitor.r, radius=200), color=color_set[i_]) for i, r in enumerate(results): i_ = i % len(color_set) plt.plot(np.cumsum(r.monitor.l), r.monitor.r, alpha=0.1, color=color_set[i_]) ax.ticklabel_format(axis='x', style='sci', scilimits=(0, 0)) ax.set_xlim(left=0) plt.tight_layout(pad=2) plt.xlabel('time steps') plt.ylabel('episode_reward') plt.title('experiment_{}'.format(experiment_no), fontsize=10) plt.legend([os.path.basename(r.dirname) for r in results]) plt.savefig(os.path.join(experiment_dir, 'training_{}.png'.format(experiment_no))) plt.show()
def individual_graph(): for i in range(len(results)): # print(np.ones(3)) # print(np.cumsum(r.monitor.l)) # print(r.monitor.r) # print(r.monitor.r.mean()) # print(pu.smooth(r.monitor.r, radius=8)) # for l in np.cumsum(r.monitor.l): # plt.axvline(l, linestyle=':', linewidth=1) # plt.plot(np.cumsum(r.monitor.l), r.monitor.r) # Understanding convolution with window size: https://stackoverflow.com/a/20036959/7308982 plt.plot(np.cumsum(results[i].monitor.l), pu.smooth(results[i].monitor.r, radius=1), linewidth=1, linestyle=lines[i], color=colors[i]) plt.xlim(right=500) plt.xlabel('Number of steps') plt.ylabel('Average reward per episode') plt.show()
def average_graph(): min_number_episodes = 10000 for i in range(len(results)): result = results[i] rewards = result.monitor.r number_episodes = len(rewards) print('Results {}: number of episodes = {}'.format(i, number_episodes)) if (number_episodes < min_number_episodes): min_number_episodes = number_episodes total_reward_per_episode = [0] * min_number_episodes for i in range(len(results)): result = results[i] rewards = result.monitor.r for j in range(min_number_episodes): reward_episode = rewards[j] total_reward_per_episode[j] += reward_episode average_reward_per_episode = [ x / len(results) for x in total_reward_per_episode ] print(min_number_episodes) plt.plot(range(len(total_reward_per_episode)), pu.smooth(average_reward_per_episode, radius=1), linewidth=1, linestyle=lines[0], color=colors[0]) # plt.plot(range(len(total_reward_per_episode)), average_reward_per_episode, linewidth=1, linestyle=lines[0], marker=markers[0], markersize=3, color=colors[0]) plt.xlabel('Number of episodes') plt.ylabel('Average reward per episode') plt.show()
def tr_fn(r): x = r.monitor.t y = smooth(r.monitor.r, radius=10) return x, y
def lr_fn(r): x = np.cumsum(r.monitor.l) y = smooth(r.monitor.r, radius=10) return x, y
def xy_fn(r): x = np.divide(r.progress.steps, 1000) y = smooth(r.progress["mean 100 episode reward"], radius=1) #x = np.divide(r.progress.total_timesteps, 1000) #y = smooth(r.progress.eprewmean, radius=1) return x, y
from baselines.common import plot_util as pu mujoco_results = pu.load_results( '/home/developer/logs/her_pyrobot_push_mujoco/250k_v2') #results = pu.load_results('/home/developer/logs/her_pyrobot_reach/joint_100k_v4') import matplotlib.pyplot as plt import numpy as np mujoco_r = mujoco_results[0] mujoco_arr = np.array(mujoco_r.progress) epoch = mujoco_arr[:, 0] mujoco_test_success_rate = mujoco_arr[:, 7] mujoco_train_success_rate = mujoco_arr[:, 9] plt.suptitle( 'LocoBot Push Task Trained in MuJoCo with 250k timesteps(100 epoches)~') plt.title('Training in Mujoco') plt.plot(epoch, pu.smooth(mujoco_train_success_rate, radius=10), label="mujoco_train") plt.plot(epoch, pu.smooth(mujoco_test_success_rate, radius=10), label="mujoco_test") plt.xlabel('epoches') plt.ylabel('success_rate') plt.legend() #plt.plot(np.cumsum(r.monitor.l), pu.smooth(r.monitor.r, radius=10)) plt.show()
model.save('berater-ppo-v8.pkl') monitored_env.close() ################################################## print("--- Plot ---") from baselines.common import plot_util as pu results = pu.load_results(log_dir) import matplotlib.pyplot as plt import numpy as np r = results[0] plt.ylim(0, .75) # plt.plot(np.cumsum(r.monitor.l), r.monitor.r) plt.plot(np.cumsum(r.monitor.l), pu.smooth(r.monitor.r, radius=100)) ################################################## import numpy as np print("--- Enjoy ---") BeraterEnv.BeraterEnv.showStep = True for c in range(1000): observation = env.reset() for c in range(1000): actions, _, state, _ = model.step(observation)
from baselines.common import plot_util as pu import matplotlib.pyplot as plt import numpy as np num = input() results = pu.load_results('logs/single_ppo_092901') r = results[0] print(r.monitor.r) plt.plot(np.cumsum(r.monitor['l']), pu.smooth(r.monitor['r'], radius=10)) plt.show() # # plt.plot(r.progress.total_timesteps, r.progress.eprewmean)
import json import numpy as np import matplotlib.pyplot as plt from baselines.common import plot_util as pu exp_id = 'payload_empty_her100_demo' results = pu.load_results('../policies/{}'.format(exp_id))[0] epoch = results.progress["epoch"] trainscs = results.progress["train/success_rate"] testscs = results.progress["test/success_rate"] plt.plot(epoch, pu.smooth(trainscs, radius=1), label="Train") plt.plot(epoch, pu.smooth(testscs, radius=1), label="Test") # plt.plot(epoch,trainscs,label="Train") # plt.plot(epoch,testscs,label="Test") plt.xlabel('Epoch') plt.ylabel('Success Rate') f = open('../policies/{}/params.json'.format(exp_id)) params = json.load(f) plt.title(params['env_name']) plt.legend() #plt.show() #print(results.progress) plt.savefig('plots/exp_{}_results.png'.format(exp_id))
def plot(): results = pu.read_csv('results/baselines/ppo_50m.monitor.csv') plt.plot(np.cumsum(results.l), pu.smooth(results.r, radius=100)) plt.show()
from baselines.common import plot_util as plot import matplotlib.pyplot as plt import numpy as np results = plot.load_results('~/logs/HER') print(results) r = results[0] plt.plot(np.cumsum(r.monitor.l), plot.smooth(r.monitor.r, radius=10)) # radius to smoother plt.xlabel('Iterations') plt.ylabel('Reward') plt.show()