def main(): # In the event that you change the sub_directory within results, change this to match it. sub_dir = 'experts' ap = argparse.ArgumentParser() ap.add_argument('--envname', required=True) ap.add_argument('--t', required=True, type=int) ap.add_argument('--iters', required=True, type=int, nargs='+') ap.add_argument('--update', required=True, nargs='+', type=int) ap.add_argument('--save', action='store_true', default=False) ap.add_argument('--normalize', action='store_true', default=False) params = vars(ap.parse_args()) params['arch'] = [64, 64] params['lr'] = .01 params['epochs'] = 100 should_save = params['save'] should_normalize = params['normalize'] del params['save'] del params['normalize'] plt.style.use('ggplot') iters = params['iters'] ptype = 'data_used' parts = [5, 10, 50, 450][::-1] dart_names = ['DART ' + str(part) for part in parts] dart_data = [] dart_errs = [] for part in parts: title = 'test_dart' ptype = 'data_used' params_dart = params.copy() params_dart['partition'] = part try: means, sems = utils.extract_data(params_dart, iters, title, sub_dir, ptype) dart_data.append(means[-1]) dart_errs.append(sems[-1]) except IOError: pass labels = dart_names data = dart_data errs = dart_errs plt.bar(labels, data, yerr=errs) plt.title(params['envname'][:-3]) save_path = 'images/' if not os.path.exists(save_path): os.makedirs(save_path) if should_save == True: plt.savefig(save_path + str(params['envname']) + "_data.pdf") plt.savefig(save_path + "svg_" + str(params['envname']) + "_data.svg") else: plt.show()
def main(): # In the event that you change the sub_directory within results, change this to match it. sub_dir = 'experts' ap = argparse.ArgumentParser() ap.add_argument('--envname', required=True) ap.add_argument('--t', required=True, type=int) ap.add_argument('--iters', required=True, type=int, nargs='+') ap.add_argument('--update', required=True, nargs='+', type=int) ap.add_argument('--save', action='store_true', default=False) params = vars(ap.parse_args()) params['arch'] = [64, 64] params['lr'] = .01 params['epochs'] = 100 should_save = params['save'] del params['save'] plt.style.use('ggplot') iters = params['iters'] ptype = 'surr_loss' if params['envname'] == 'Humanoid-v1': traces = [0.005, 0.5, 10.0] else: traces = [0.005, 0.5, 5.0] # Rand for trace in traces: title = 'test_rand' ptype = 'sup_loss' params_rand = params.copy() params_rand[ 'trace'] = trace # You may adjust the trace to whatever you chose. del params_rand['update'] c = next(color) means, sems = utils.extract_data(params_rand, iters, title, sub_dir, ptype) plt.plot(iters, means, color=c, linestyle='--') ptype = 'surr_loss' means, sems = utils.extract_data(params_rand, iters, title, sub_dir, ptype) plt.plot(iters, means, label='Rand Loss, p = ' + str(trace), color=c) plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c) # ptype = 'sim_err' # means, sems = utils.extract_data(params_rand, iters, title, sub_dir, ptype) # plt.plot(iters, means, color=c, linestyle=':') # DART title = 'test_dart' ptype = 'sup_loss' params_dart = params.copy() c = next(color) means, sems = utils.extract_data(params_dart, iters, title, sub_dir, ptype) plt.plot(iters, means, color=c, linestyle='--') ptype = 'surr_loss' means, sems = utils.extract_data(params_dart, iters, title, sub_dir, ptype) plt.plot(iters, means, label='DART', color=c) plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c) # ptype = 'sim_err' # means, sems = utils.extract_data(params_dart, iters, title, sub_dir, ptype) # plt.plot(iters, means, color=c, linestyle=':') plt.title("Loss on " + str(params['envname'])) plt.xticks(iters) plt.legend(loc='upper right') save_path = 'images/' if not os.path.exists(save_path): os.makedirs(save_path) if should_save == True: plt.savefig(save_path + str(params['envname']) + "_loss_rand.pdf") else: plt.show()
def main(): sub_dir = 'experts' ap = argparse.ArgumentParser() ap.add_argument('--envname', required=True) ap.add_argument('--t', required=True, type=int) ap.add_argument('--save', action='store_true', default=False) ap.add_argument('--normalize', action='store_true', default=False) ap.add_argument('--num_evals', required=True, type=int) # number of evaluations ap.add_argument('--max_data', required=True, type=int) # maximum amount of data ap.add_argument('--config', required=True, type=str) params = vars(ap.parse_args()) params = load_config(params) should_save = params['save'] should_normalize = params['normalize'] del params['save'] del params['normalize'] snapshot_ranges = utils.compute_snapshot_ranges(params) update_periods = [50, 300] update_periods_dart = [300] update_periods_dagger = [50, 300] if params['envname'] == 'Humanoid-v1': update_periods = [200, 1000] update_periods_dart = [1000] update_periods_dagger = [200, 1000] plt.style.use('ggplot') # Best supervisor reward title = 'test_bc' ptype = 'sup_reward' params_bc = params.copy() means, sems = utils.extract_data(params_bc, title, sub_dir, ptype) # if not should_normalize: # plt.plot(snapshot_ranges, means, label='Supervisor', color='green') sup_means, sup_sems = means, sems def normalize(means, sems): if should_normalize: means = means / sup_means sems = sems / sup_means return means, sems else: return means, sems # # Noisy supervisor reward using DART # partition = .1 # update_period = update_periods_dart[0] # title = 'test_dart' # ptype = 'sup_reward' # params_dart = params.copy() # params_dart['partition'] = partition # params_dart['update_period'] = update_period # try: # means, sems = utils.extract_data(params_dart, title, sub_dir, ptype) # means, sems = normalize(means, sems) # plt.plot(snapshot_ranges, means, label='DART Noisy Supervisor', color='green', linestyle='--') # except IOError: # log("Not found.") # pass # BC title = 'test_bc' ptype = 'sup_reward' params_bc = params.copy() try: means, sems = utils.extract_data(params_bc, title, sub_dir, ptype) means, sems = normalize(means, sems) p = plt.plot(snapshot_ranges, means, label='Behavior Cloning') plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) except IOError: log("Not found.") pass # DAgger title = 'test_dagger' ptype = 'reward' params_dagger = params.copy() params_dagger['beta'] = .5 for update_period in update_periods_dagger: params_dagger['update_period'] = update_period try: means, sems = utils.extract_data(params_dagger, title, sub_dir, ptype) means, sems = normalize(means, sems) p = plt.plot(snapshot_ranges, means, label='DAgger ' + str(update_period)) plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) except IOError: log("Not found.") pass # Isotropic noise # scales = [1.0, 10.0, 20.0] scales = [1.0] for scale in scales: title = 'test_iso' ptype = 'sup_reward' params_iso = params.copy() params_iso['scale'] = scale try: means, sems = utils.extract_data(params_iso, title, sub_dir, ptype) means, sems = normalize(means, sems) p = plt.plot(snapshot_ranges, means, label='Iso ' + str(scale)) plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) except IOError: log("Not found.") pass # DART partition = .1 title = 'test_dart' ptype = 'sup_reward' params_dart = params.copy() params_dart['partition'] = partition for update_period in update_periods_dart: params_dart['update_period'] = update_period try: means, sems = utils.extract_data(params_dart, title, sub_dir, ptype) means, sems = normalize(means, sems) p = plt.plot(snapshot_ranges, means, label='DART part: ' + str(partition) + ", per: " + str(update_period)) plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) except IOError: log("Not found.") pass # plt.title("Reward on " + str(params['envname'])) # plt.legend() plt.xticks(snapshot_ranges) if should_normalize: plt.ylim(0, 1.05) plt.yticks([0.0, 0.25, 0.5, 0.75, 1.0]) # plt.title(params['envname'][:-3]) save_path = 'images/' if not os.path.exists(save_path): os.makedirs(save_path) if should_save == True: plt.savefig(save_path + str(params['envname']) + "_reward.pdf") plt.savefig(save_path + str(params['envname']) + "_reward.svg") else: plt.legend() plt.show()
def main(): # In the event that you change the sub_directory within results, change this to match it. sub_dir = 'experts' ap = argparse.ArgumentParser() ap.add_argument('--envname', required=True) ap.add_argument('--t', required=True, type=int) ap.add_argument('--iters', required=True, type=int, nargs='+') ap.add_argument('--update', required=True, nargs='+', type=int) ap.add_argument('--save', action='store_true', default=False) ap.add_argument('--normalize', action='store_true', default=False) params = vars(ap.parse_args()) params['arch'] = [64, 64] params['lr'] = .01 params['epochs'] = 100 should_save = params['save'] should_normalize = params['normalize'] del params['save'] del params['normalize'] plt.style.use('ggplot') iters = params['iters'] ptype = 'data_used' # DAgger B betas = [.1, .3, .5, .7, .9] colors = ['blue', 'red', 'black', 'pink', 'aqua'] dagger_b_data = [] dagger_b_sems = [] for beta, c in zip(betas, colors): title = 'test_dagger_b' ptype = 'data_used' params_dagger_b = params.copy() params_dagger_b[ 'beta'] = beta # You may adjust the prior to whatever you chose. try: means, sems = utils.extract_data(params_dagger_b, iters, title, sub_dir, ptype) dagger_b_data.append(means) dagger_b_sems.append(sems) except IOError: pass dagger_b_data = np.array(dagger_b_data) dagger_b_sems = np.array(dagger_b_sems) dagger_b_data = np.sum(dagger_b_data[:, -1]) sems = dagger_b_sems[:, -1] dagger_b_sem = np.sqrt(np.sum(sems**2.0)) parts = [10] dart_names = ['DART ' + str(part) for part in parts] dart_data = [] dart_sem = [] for part in parts: title = 'test_dart' ptype = 'data_used' params_dart = params.copy() params_dart['partition'] = part try: means, sems = utils.extract_data(params_dart, iters, title, sub_dir, ptype) dart_data.append(means[-1]) dart_sem.append(sems[-1]) except IOError: pass labels = ['Dagger-B'] data = [dagger_b_data] errs = [dagger_b_sem] labels = labels + dart_names data = data + dart_data errs = errs + dart_sem plt.bar(labels, data, yerr=errs) plt.title(params['envname'][:-3]) save_path = 'images/' if not os.path.exists(save_path): os.makedirs(save_path) if should_save == True: plt.savefig(save_path + str(params['envname']) + "_data2.pdf") plt.savefig(save_path + "svg_" + str(params['envname']) + "_data2.svg") else: plt.show()
def main(): # In the event that you change the sub_directory within results, change this to match it. sub_dir = 'experts' ap = argparse.ArgumentParser() ap.add_argument('--envname', required=True) ap.add_argument('--t', required=True, type=int) ap.add_argument('--iters', required=True, type=int, nargs='+') ap.add_argument('--update', required=True, nargs='+', type=int) ap.add_argument('--save', action='store_true', default=False) params = vars(ap.parse_args()) params['arch'] = [64] params['lr'] = .01 params['epochs'] = 100 should_save = params['save'] del params['save'] plt.style.use('ggplot') iters = params['iters'] # Behavior Cloning loss on sup distr title = 'test_bc' params['mode'] = 'bc' ptype = 'biases_learner' params_bc = params.copy() del params_bc['update'] # Updates are used in behavior cloning c = next(color) means, sems = utils.extract_data(params_bc, iters, title, sub_dir, ptype) plt.plot(iters, means, color=c, linestyle='--') ptype = 'variances_learner' means, sems = utils.extract_data(params_bc, iters, title, sub_dir, ptype) plt.plot(iters, means, label='Behavior Cloning', color=c) plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c) # DAgger beta = .5 title = 'test_dagger' params['mode'] = 'dagger' ptype = 'biases_learner' params_dagger = params.copy() params_dagger[ 'beta'] = .5 # You may adjust the prior to whatever you chose. del params_dagger['update'] c = next(color) means, sems = utils.extract_data(params_dagger, iters, title, sub_dir, ptype) plt.plot(iters, means, color=c, linestyle='--') ptype = 'variances_learner' means, sems = utils.extract_data(params_dagger, iters, title, sub_dir, ptype) plt.plot(iters, means, label='DAgger', color=c) plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c) # Bias Variance Thresh title = 'test_bias_variance_switch' params['mode'] = 'bias_variance_switch' ptype = 'biases_learner' params_bias_variance_switch = params.copy() del params_bias_variance_switch['update'] c = next(color) means, sems = utils.extract_data(params_bias_variance_switch, iters, title, sub_dir, ptype) plt.plot(iters, means, color=c, linestyle='--') ptype = 'variances_learner' means, sems = utils.extract_data(params_bias_variance_switch, iters, title, sub_dir, ptype) plt.plot(iters, means, label='Switch', color=c) plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c) # # DAgger B # beta = .5 # title = 'test_dagger_b' # ptype = 'sup_loss' # params_dagger_b = params.copy() # params_dagger_b['beta'] = beta # You may adjust the prior to whatever you chose. # c = next(color) # try: # means, sems = utils.extract_data(params_dagger_b, iters, title, sub_dir, ptype) # plt.plot(iters, means, color=c, linestyle='--') # ptype = 'surr_loss' # means, sems = utils.extract_data(params_dagger_b, iters, title, sub_dir, ptype) # plt.plot(iters, means, label='DAgger-B', color=c) # plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c) # except IOError: # pass # # Isotropic noise # title = 'test_iso' # params['mode'] = 'iso' # ptype = 'biases_learner' # params_iso = params.copy() # params_iso['scale'] = 1.0 # del params_iso['update'] # c = next(color) # try: # means, sems = utils.extract_data(params_iso, iters, title, sub_dir, ptype) # plt.plot(iters, means, color=c, linestyle='--') # ptype = 'variances_learner' # means, sems = utils.extract_data(params_iso, iters, title, sub_dir, ptype) # plt.plot(iters, means, label='Isotropic Noise 1.0', color=c) # plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c) # except IOError: # pass # # Isotropic noise # title = 'test_iso' # params['mode'] = 'iso' # ptype = 'biases_learner' # params_iso = params.copy() # params_iso['scale'] = 0.5 # del params_iso['update'] # c = next(color) # try: # means, sems = utils.extract_data(params_iso, iters, title, sub_dir, ptype) # plt.plot(iters, means, color=c, linestyle='--') # ptype = 'variances_learner' # means, sems = utils.extract_data(params_iso, iters, title, sub_dir, ptype) # plt.plot(iters, means, label='Isotropic Noise 0.5', color=c) # plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c) # except IOError: # pass # # Isotropic noise # title = 'test_iso' # params['mode'] = 'iso' # ptype = 'biases_learner' # params_iso = params.copy() # params_iso['scale'] = 2.0 # del params_iso['update'] # c = next(color) # try: # means, sems = utils.extract_data(params_iso, iters, title, sub_dir, ptype) # plt.plot(iters, means, color=c, linestyle='--') # ptype = 'variances_learner' # means, sems = utils.extract_data(params_iso, iters, title, sub_dir, ptype) # plt.plot(iters, means, label='Isotropic Noise 2.0', color=c) # plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c) # except IOError: # pass # DART partition = 450 title = 'test_dart' params['mode'] = 'dart' ptype = 'biases_learner' params_dart = params.copy() params_dart['partition'] = partition c = next(color) try: means, sems = utils.extract_data(params_dart, iters, title, sub_dir, ptype) plt.plot(iters, means, color=c, linestyle='--') ptype = 'variances_learner' means, sems = utils.extract_data(params_dart, iters, title, sub_dir, ptype) plt.plot(iters, means, label='DART ' + str(partition), color=c) plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c) except IOError: pass # # DART # partition = 450 # title = 'test_dart_min_var' # params['mode'] = 'dart' # ptype = 'biases_learner' # params_dart_min_var = params.copy() # params_dart_min_var['partition'] = partition # params_dart_min_var['reg_penalty'] = 0.3 # c = next(color) # try: # means, sems = utils.extract_data(params_dart_min_var, iters, title, sub_dir, ptype) # plt.plot(iters, means, color=c, linestyle='--') # ptype = 'variances_learner' # means, sems = utils.extract_data(params_dart_min_var, iters, title, sub_dir, ptype) # plt.plot(iters, means, label='DART Reg 0.3 ' + str(partition), color=c) # plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c) # except IOError: # pass # # DART # partition = 450 # title = 'test_dart_min_var' # params['mode'] = 'dart' # ptype = 'biases_learner' # params_dart_min_var = params.copy() # params_dart_min_var['partition'] = partition # params_dart_min_var['reg_penalty'] = 0.7 # c = next(color) # try: # means, sems = utils.extract_data(params_dart_min_var, iters, title, sub_dir, ptype) # plt.plot(iters, means, color=c, linestyle='--') # ptype = 'variances_learner' # means, sems = utils.extract_data(params_dart_min_var, iters, title, sub_dir, ptype) # plt.plot(iters, means, label='DART Reg 0.7 ' + str(partition), color=c) # plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c) # except IOError: # pass # # MIXED with Dagger mixed # title = 'test_mixed' # params['mode'] = 'mixed' # ptype = 'biases_learner' # params_mixed = params.copy() # params_mixed['dagger_mixed'] = 1 # del params_mixed['update'] # c = next(color) # try: # means, sems = utils.extract_data(params_mixed, iters, title, sub_dir, ptype) # plt.plot(iters, means, color=c, linestyle='--') # ptype = 'variances_learner' # means, sems = utils.extract_data(params_mixed, iters, title, sub_dir, ptype) # plt.plot(iters, means, label='MIXED DAgger', color=c) # plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c) # except IOError: # pass # # MIXED without Dagger mixed # title = 'test_mixed' # params['mode'] = 'mixed' # ptype = 'biases_learner' # params_mixed = params.copy() # params_mixed['dagger_mixed'] = 0 # del params_mixed['update'] # c = next(color) # try: # means, sems = utils.extract_data(params_mixed, iters, title, sub_dir, ptype) # plt.plot(iters, means, color=c, linestyle='--') # ptype = 'variances_learner' # means, sems = utils.extract_data(params_mixed, iters, title, sub_dir, ptype) # plt.plot(iters, means, label='MIXED', color=c) # plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c) # except IOError: # pass plt.title("Learner Bias/Variance on " + str(params['envname'])) plt.legend() plt.xticks(iters) plt.legend(loc='upper right') save_path = 'images/' if not os.path.exists(save_path): os.makedirs(save_path) if should_save == True: plt.savefig(save_path + str(params['envname']) + "_bias_variance_learner.pdf") else: plt.show()
def main(): # In the event that you change the sub_directory within results, change this to match it. color = itertools.cycle(( "#FCB716", "#2D3956", "#A0B2D8", "#988ED5", "#F68B20")) sub_dir = 'experts' ap = argparse.ArgumentParser() ap.add_argument('--envname', required=True) ap.add_argument('--t', required=True, type=int) ap.add_argument('--save', action='store_true', default=False) ap.add_argument('--normalize', action='store_true', default=False) ap.add_argument('--num_evals', required=True, type=int) # number of evaluations ap.add_argument('--max_data', required=True, type=int) # maximum amount of data ap.add_argument('--config', required=True, type=str) params = vars(ap.parse_args()) params = load_config(params) should_save = params['save'] should_normalize = params['normalize'] del params['save'] del params['normalize'] snapshot_ranges = utils.compute_snapshot_ranges(params) plt.style.use('ggplot') # Best supervisor reward title = 'test_bc' ptype = 'sup_reward' params_bc = params.copy() means, sems = utils.extract_data(params_bc, title, sub_dir, ptype) if not should_normalize: plt.plot(snapshot_ranges, means, label='Supervisor', color='green') sup_means, sup_sems = means, sems def normalize(means, sems): if should_normalize: means = means / sup_means sems = sems / sup_means return means, sems else: return means, sems # Noisy supervisor reward using DART title = 'test_dart' ptype = 'sup_reward' params_dart = params.copy() try: means, sems = utils.extract_data(params_dart, title, sub_dir, ptype) means, sems = normalize(means, sems) plt.plot(snapshot_ranges, means, label='DART Noisy Supervisor', color='green', linestyle='--') except IOError: log("Not found.") pass # BC degrees = [2, 3, 5, 6, 7] configs = ['poly' + str(d) for d in degrees] title = 'test_bc' ptype = 'reward' params_bc = params.copy() for config, degree in zip(configs, degrees): params_bc['config'] = config params_bc['degree'] = degree try: means, sems = utils.extract_data(params_bc, title, sub_dir, ptype) means, sems = normalize(means, sems) p = plt.plot(snapshot_ranges, means, label='Behavior Cloning deg: ' + str(degree)) plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) except IOError: log("Not found.") pass # DAgger update_periods = [2, 4, 8] title = 'test_dagger' ptype = 'reward' params_dagger = params.copy() params_dagger['beta'] = .5 for update_period in update_periods: params_dagger['update_period'] = update_period try: means, sems = utils.extract_data(params_dagger, title, sub_dir, ptype) means, sems = normalize(means, sems) p = plt.plot(snapshot_ranges, means, label='DAgger ' + str(update_period)) plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) except IOError: log("Not found.") pass # Isotropic noise # scales = [1.0, 10.0, 20.0] # for scale in scales: # title = 'test_iso' # ptype = 'reward' # params_iso = params.copy() # params_iso['scale'] = scale # try: # means, sems = utils.extract_data(params_iso, title, sub_dir, ptype) # means, sems = normalize(means, sems) # p = plt.plot(snapshot_ranges, means, label='Iso ' + str(scale)) # plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) # except IOError: # log("Not found.") # pass # DART update_periods = [2, 4, 8] partition = .1 title = 'test_dart' ptype = 'reward' params_dart = params.copy() params_dart['partition'] = partition for update_period in update_periods: params_dart['update_period'] = update_period try: means, sems = utils.extract_data(params_dart, title, sub_dir, ptype) means, sems = normalize(means, sems) p = plt.plot(snapshot_ranges, means, label='DART part: ' + str(partition) + ", per: " + str(update_period)) plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) except IOError: log("Not found.") pass plt.title("Reward on " + str(params['envname'])) plt.legend() plt.xticks(snapshot_ranges) if should_normalize: plt.ylim(0, 1.05) plt.yticks([0.0, 0.25, 0.5, 0.75, 1.0]) plt.title(params['envname'][:-3]) save_path = 'images/' if not os.path.exists(save_path): os.makedirs(save_path) if should_save == True: plt.savefig(save_path + str(params['envname']) + "_reward.pdf") else: plt.legend() plt.show()
def main(): # In the event that you change the sub_directory within results, change this to match it. sub_dir = 'experts' ap = argparse.ArgumentParser() ap.add_argument('--envname', required=True) ap.add_argument('--t', required=True, type=int) ap.add_argument('--save', action='store_true', default=False) ap.add_argument('--num_evals', required=True, type=int) # number of evaluations ap.add_argument('--max_data', required=True, type=int) # maximum amount of data ap.add_argument('--config', required=True, type=str) params = vars(ap.parse_args()) params = load_config(params) should_save = params['save'] del params['save'] snapshot_ranges = utils.compute_snapshot_ranges(params) plt.style.use('ggplot') # Behavior Cloning loss on sup distr degrees = [2, 3, 5, 6, 7] configs = ['poly' + str(d) for d in degrees] upper_bound = 0 title = 'test_bc' ptype = 'sup_loss' params_bc = params.copy() for config, degree in zip(configs, degrees): params_bc['config'] = config params_bc['degree'] = degree try: means, sems = utils.extract_data(params_bc, title, sub_dir, ptype) p = plt.plot(snapshot_ranges, means, linestyle='--') ptype = 'surr_loss' means, sems = utils.extract_data(params_bc, title, sub_dir, ptype) plt.plot(snapshot_ranges, means, label='Behavior Cloning degree: ' + str(degree), color=p[0].get_color()) plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) upper_bound = max(np.max(means), upper_bound) except IOError: log("Not found.") pass # DAgger update_periods = [2, 4, 8] beta = .5 title = 'test_dagger' ptype = 'sup_loss' params_dagger = params.copy() params_dagger['beta'] = beta for update_period in update_periods: params_dagger['update_period'] = update_period try: means, sems = utils.extract_data(params_dagger, title, sub_dir, ptype) p = plt.plot(snapshot_ranges, means, linestyle='--') ptype = 'surr_loss' means, sems = utils.extract_data(params_dagger, title, sub_dir, ptype) plt.plot(snapshot_ranges, means, label='DAgger per: ' + str(update_period), color=p[0].get_color()) plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) upper_bound = max(np.max(means), upper_bound) except IOError: log("Not found.") pass # Isotropic noise # scales = [1.0, 10.0, 20.0] # title = 'test_iso' # ptype = 'sup_loss' # params_iso = params.copy() # for scale in scales: # params_iso['scale'] = scale # try: # means, sems = utils.extract_data(params_iso, title, sub_dir, ptype) # p = plt.plot(snapshot_ranges, means, linestyle='--') # ptype = 'surr_loss' # means, sems = utils.extract_data(params_iso, title, sub_dir, ptype) # plt.plot(snapshot_ranges, means, label='Iso ' + str(scale), color=p[0].get_color()) # plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) # except IOError: # log("Not found.") # pass # DART update_periods = [2, 4, 8] partition = .1 title = 'test_dart' ptype = 'sup_loss' params_dart = params.copy() params_dart['partition'] = partition for update_period in update_periods: params_dart['update_period'] = update_period try: means, sems = utils.extract_data(params_dart, title, sub_dir, ptype) p = plt.plot(snapshot_ranges, means, linestyle='--') ptype = 'surr_loss' means, sems = utils.extract_data(params_dart, title, sub_dir, ptype) plt.plot(snapshot_ranges, means, label='DART part: ' + str(partition) + ", per: " + str(update_period), color=p[0].get_color()) plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) upper_bound = max(np.max(means), upper_bound) except IOError: log("Not found.") pass plt.title("Loss on " + str(params['envname'])) plt.legend() plt.xticks(snapshot_ranges) plt.legend(loc='upper right') upper_bound = min(20, upper_bound) plt.ylim(0, upper_bound) save_path = 'images/' if not os.path.exists(save_path): os.makedirs(save_path) if should_save == True: plt.savefig(save_path + str(params['envname']) + "_loss.pdf") else: plt.show()
def main(): # In the event that you change the sub_directory within results, change this to match it. sub_dir = 'experts' ap = argparse.ArgumentParser() ap.add_argument('--envname', required=True) ap.add_argument('--t', required=True, type=int) ap.add_argument('--iters', required=True, type=int, nargs='+') ap.add_argument('--update', required=True, nargs='+', type=int) ap.add_argument('--save', action='store_true', default=False) ap.add_argument('--normalize', action='store_true', default=False) params = vars(ap.parse_args()) params['arch'] = [64, 64] params['lr'] = .01 params['epochs'] = 50 should_save = params['save'] should_normalize = params['normalize'] del params['save'] del params['normalize'] plt.style.use('ggplot') iters = params['iters'] ptype = 'surr_loss' # Best supervisor reward title = 'test_bc' ptype = 'sup_reward' params_bc = params.copy() del params_bc['update'] # Updates are used in behavior cloning means, sems = utils.extract_data(params_bc, iters, title, sub_dir, ptype) plt.plot(iters, means, label='Supervisor', color='green') sup_means, sup_sems = means, sems def normalize(means, sems): if should_normalize: means = means / sup_means sems = sems / sup_means return means, sems else: return means, sems # Noisy supervisor reward using DART title = 'test_dart' ptype = 'sup_reward' params_dart = params.copy() means, sems = utils.extract_data(params_dart, iters, title, sub_dir, ptype) means, sems = normalize(means, sems) plt.plot(iters, means, label='DART Noisy Supervisor', color='green', linestyle='--') # BC title = 'test_bc' ptype = 'reward' params_bc = params.copy() del params_bc['update'] # Updates are used in behavior cloning c = next(color) means, sems = utils.extract_data(params_bc, iters, title, sub_dir, ptype) means, sems = normalize(means, sems) plt.plot(iters, means, label='Behavior Cloning', color=c) plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c) # DAgger title = 'test_dagger' ptype = 'reward' params_dagger = params.copy() del params_dagger['update'] params_dagger['beta'] = .5 c = next(color) means, sems = utils.extract_data(params_dagger, iters, title, sub_dir, ptype) means, sems = normalize(means, sems) plt.plot(iters, means, label='DAgger', color=c) plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c) # DAgger B title = 'test_dagger_b' ptype = 'reward' params_dagger_b = params.copy() params_dagger_b[ 'beta'] = .5 # You may adjust the prior to whatever you chose. c = next(color) means, sems = utils.extract_data(params_dagger_b, iters, title, sub_dir, ptype) means, sems = normalize(means, sems) plt.plot(iters, means, color=c, label='DAgger-B') plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c) # Isotropic noise title = 'test_iso' ptype = 'reward' params_iso = params.copy() params_iso['scale'] = 1.0 del params_iso['update'] c = next(color) means, sems = utils.extract_data(params_iso, iters, title, sub_dir, ptype) means, sems = normalize(means, sems) plt.plot(iters, means, color=c, label='Isotropic') plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c) # DART title = 'test_dart' ptype = 'reward' params_dart = params.copy() c = next(color) means, sems = utils.extract_data(params_dart, iters, title, sub_dir, ptype) means, sems = normalize(means, sems) plt.plot(iters, means, label='DART', color=c) plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c) plt.title("Reward on " + str(params['envname'])) plt.legend() plt.xticks(iters) plt.legend() if should_normalize: plt.ylim(0, 1.05) save_path = 'images/' if not os.path.exists(save_path): os.makedirs(save_path) if should_save == True: plt.savefig(save_path + str(params['envname']) + "_reward.pdf") else: plt.show()
def main(): sub_dir = 'experts' ap = argparse.ArgumentParser() ap.add_argument('--envname', required=True) ap.add_argument('--t', required=True, type=int) ap.add_argument('--save', action='store_true', default=False) ap.add_argument('--normalize', action='store_true', default=False) ap.add_argument('--num_evals', required=True, type=int) # number of evaluations ap.add_argument('--max_data', required=True, type=int) # maximum amount of data ap.add_argument('--config', required=True, type=str) params = vars(ap.parse_args()) params = load_config(params) should_save = params['save'] should_normalize = params['normalize'] del params['save'] del params['normalize'] snapshot_ranges = utils.compute_snapshot_ranges(params) update_periods = [50, 300] update_periods_dart = [300] update_periods_dagger = [50, 300] if params['envname'] == 'Humanoid-v1': update_periods = [200, 1000] update_periods_dart = [1000] update_periods_dagger = [200, 1000] plt.style.use('ggplot') def normalize(means, sems): return means, sems all_means = [] # BC title = 'test_bc' ptype = 'total_time' params_bc = params.copy() try: means, sems = utils.extract_data(params_bc, title, sub_dir, ptype) means, sems = normalize(means, sems) all_means.append(means[0]) # p = plt.plot(snapshot_ranges, means, label='Behavior Cloning') # plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) except IOError: log("Not found.") pass # DAgger title = 'test_dagger' ptype = 'total_time' params_dagger = params.copy() params_dagger['beta'] = .5 for update_period in update_periods_dagger: params_dagger['update_period'] = update_period try: means, sems = utils.extract_data(params_dagger, title, sub_dir, ptype) means, sems = normalize(means, sems) all_means.append(means[0]) # p = plt.plot(snapshot_ranges, means, label='DAgger ' + str(update_period)) # plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) except IOError: log("Not found.") pass # Isotropic noise # scales = [1.0, 10.0, 20.0] scales = [1.0] for scale in scales: title = 'test_iso' ptype = 'total_time' params_iso = params.copy() params_iso['scale'] = scale try: means, sems = utils.extract_data(params_iso, title, sub_dir, ptype) means, sems = normalize(means, sems) all_means.append(means[0]) # p = plt.plot(snapshot_ranges, means, label='Iso ' + str(scale)) # plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) except IOError: log("Not found.") pass # DART partition = .1 title = 'test_dart' ptype = 'total_time' params_dart = params.copy() params_dart['partition'] = partition for update_period in update_periods_dart: params_dart['update_period'] = update_period try: means, sems = utils.extract_data(params_dart, title, sub_dir, ptype) means, sems = normalize(means, sems) all_means.append(means[0]) # p = plt.plot(snapshot_ranges, means, label='DART part: ' + str(partition) + ", per: " + str(update_period)) # plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) except IOError: log("Not found.") pass all_means = np.array(all_means) inds = np.arange(len(all_means)) inds[0] = 3 inds[1] = 0 inds[2] = 1 inds[3] = 2 inds[4] = 4 for ind, mean in zip(inds, all_means): plt.bar([ind], [mean]) # plt.legend() save_path = 'images/' if not os.path.exists(save_path): os.makedirs(save_path) plt.xticks([]) if should_save == True: plt.savefig(save_path + str(params['envname']) + "_time.pdf") plt.savefig(save_path + str(params['envname']) + "_time.svg") else: plt.legend() plt.show()
def main(): # In the event that you change the sub_directory within results, change this to match it. sub_dir = 'experts' ap = argparse.ArgumentParser() ap.add_argument('--envname', required=True) ap.add_argument('--t', required=True, type=int) ap.add_argument('--iters', required=True, type=int, nargs='+') ap.add_argument('--update', required=True, nargs='+', type=int) ap.add_argument('--save', action='store_true', default=False) params = vars(ap.parse_args()) params['arch'] = [64, 64] params['lr'] = .01 params['epochs'] = 100 should_save = params['save'] del params['save'] plt.style.use('ggplot') iters = params['iters'] ptype = 'surr_loss' # Behavior Cloning loss on sup distr title = 'test_bc' ptype = 'sup_loss' params_bc = params.copy() del params_bc['update'] # Updates are used in behavior cloning c = next(color) try: means, sems = utils.extract_data(params_bc, iters, title, sub_dir, ptype) plt.plot(iters, means, color=c, linestyle='--') ptype = 'surr_loss' means, sems = utils.extract_data(params_bc, iters, title, sub_dir, ptype) plt.plot(iters, means, label='Behavior Cloning', color=c) plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c) except IOError: pass # DAgger beta = .5 title = 'test_dagger' ptype = 'sup_loss' params_dagger = params.copy() params_dagger[ 'beta'] = .5 # You may adjust the prior to whatever you chose. del params_dagger['update'] c = next(color) try: means, sems = utils.extract_data(params_dagger, iters, title, sub_dir, ptype) plt.plot(iters, means, color=c, linestyle='--') ptype = 'surr_loss' means, sems = utils.extract_data(params_dagger, iters, title, sub_dir, ptype) plt.plot(iters, means, label='DAgger', color=c) plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c) except IOError: pass # DAgger B beta = .5 title = 'test_dagger_b' ptype = 'sup_loss' params_dagger_b = params.copy() params_dagger_b[ 'beta'] = beta # You may adjust the prior to whatever you chose. c = next(color) try: means, sems = utils.extract_data(params_dagger_b, iters, title, sub_dir, ptype) plt.plot(iters, means, color=c, linestyle='--') ptype = 'surr_loss' means, sems = utils.extract_data(params_dagger_b, iters, title, sub_dir, ptype) plt.plot(iters, means, label='DAgger-B', color=c) plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c) except IOError: pass # Isotropic noise title = 'test_iso' ptype = 'sup_loss' params_iso = params.copy() params_iso['scale'] = 1.0 del params_iso['update'] c = next(color) try: means, sems = utils.extract_data(params_iso, iters, title, sub_dir, ptype) plt.plot(iters, means, color=c, linestyle='--') ptype = 'surr_loss' means, sems = utils.extract_data(params_iso, iters, title, sub_dir, ptype) plt.plot(iters, means, label='Isotropic Noise', color=c) plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c) except IOError: pass # DART partition = 450 title = 'test_dart' ptype = 'sup_loss' params_dart = params.copy() params_dart['partition'] = partition c = next(color) try: means, sems = utils.extract_data(params_dart, iters, title, sub_dir, ptype) plt.plot(iters, means, color=c, linestyle='--') ptype = 'surr_loss' means, sems = utils.extract_data(params_dart, iters, title, sub_dir, ptype) plt.plot(iters, means, label='DART ' + str(partition), color=c) plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c) except IOError: pass plt.title("Loss on " + str(params['envname'])) plt.legend() plt.xticks(iters) plt.legend(loc='upper right') save_path = 'images/' if not os.path.exists(save_path): os.makedirs(save_path) if should_save == True: plt.savefig(save_path + str(params['envname']) + "_loss.pdf") else: plt.show()