def main(): parser = argparse.ArgumentParser("Self-Paced Learning experiment runner") parser.add_argument("--base_log_dir", type=str, default="logs") parser.add_argument("--type", type=str, default="default", choices=[ "default", "random", "self_paced", "self_paced_v2", "alp_gmm", "goal_gan" ]) parser.add_argument("--learner", type=str, choices=["trpo", "ppo", "sac"]) parser.add_argument( "--env", type=str, default="point_mass", choices=["point_mass", "point_mass_2d", "ball_catching"]) parser.add_argument("--seed", type=int, default=1) parser.add_argument("--true_rewards", action="store_true", default=False) args, remainder = parser.parse_known_args() parameters = parse_parameters(remainder) if args.type == "self_paced": import torch torch.set_num_threads(1) if args.env == "point_mass": from deep_sprl.experiments import PointMassExperiment exp = PointMassExperiment(args.base_log_dir, args.type, args.learner, parameters, args.seed, use_true_rew=args.true_rewards) elif args.env == "point_mass_2d": from deep_sprl.experiments import PointMass2DExperiment exp = PointMass2DExperiment(args.base_log_dir, args.type, args.learner, parameters, args.seed, use_true_rew=args.true_rewards) else: from deep_sprl.experiments import BallCatchingExperiment exp = BallCatchingExperiment(args.base_log_dir, args.type, args.learner, parameters, args.seed, use_true_rew=args.true_rewards) exp.train() exp.evaluate()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--base_log_dir", type=str, default="logs") parser.add_argument("--learner", type=str, required=True, choices=["trpo", "ppo", "sac"]) parser.add_argument("--save_dir", type=str, default=None) args = parser.parse_args() # Create the evaluation environment exp = PointMassExperiment(args.base_log_dir, "default", args.learner, {}, 1) log_dir = os.path.join(os.path.dirname(__file__), "..", args.base_log_dir, "point_mass") types = [ d for d in os.listdir(log_dir) if os.path.isdir(os.path.join(log_dir, d)) ] for cur_type in types: f = plt.figure(figsize=(1.4, 1.4)) ax = f.gca() ax.plot([-5., 2.], [-0.1, -0.1], linewidth=5, color="black") ax.plot([3., 5.], [-0.1, -0.1], linewidth=5, color="black") ax.plot([-0.25, 0.25], [-3.25, -2.75], linewidth=3, color="red") ax.plot([-0.25, 0.25], [-2.75, -3.25], linewidth=3, color="red") exp.curriculum = CurriculumType.from_string(cur_type) type_log_dir = os.path.join(os.path.dirname(__file__), "..", os.path.dirname(exp.get_log_dir())) seeds = [ int(d.split("-")[1]) for d in os.listdir(type_log_dir) if os.path.isdir(os.path.join(type_log_dir, d)) ] for seed in seeds: path = os.path.join(type_log_dir, "seed-" + str(seed), "iteration-" + str(995)) if os.path.exists(path): model = exp.learner.load_for_evaluation( os.path.join(path, "model"), exp.vec_eval_env) path = [] done = False obs = exp.vec_eval_env.reset() path.append(obs[0][[0, 2]]) while not done: action = model.step(obs, state=None, deterministic=False) obs, reward, done, info = exp.vec_eval_env.step(action) # We need to add this check because the vectorized environment automatically resets everything on # done if not done: path.append(obs[0][[0, 2]]) path = np.array(path) ax.plot(path[:, 0], path[:, 1], color="C0", alpha=0.5, linewidth=3) ax.set_xlim([-4, 4]) ax.set_ylim([-4, 4]) ax.set_xticks([]) ax.set_yticks([]) if args.save_dir is None: plt.title(labels[cur_type]) plt.tight_layout() plt.show() else: plt.tight_layout() plt.savefig(os.path.join( args.save_dir, "point_mass_%s_%s_trajs.pdf" % (args.learner, cur_type)), bbox_inches='tight', pad_inches=0)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--base_log_dir", type=str, default="logs") parser.add_argument( "--env", type=str, default="point_mass", choices=["point_mass", "point_mass_2d", "ball_catching", "ant"]) parser.add_argument("--learner", type=str, default="trpo", choices=["trpo", "ppo", "sac"]) args, remainder = parser.parse_known_args() parameters = parse_parameters(remainder) if args.env == "point_mass": from deep_sprl.experiments import PointMassExperiment exp = PointMassExperiment(args.base_log_dir, "default", args.learner, parameters, 1) elif args.env == "point_mass_2d": from deep_sprl.experiments import PointMass2DExperiment exp = PointMass2DExperiment(args.base_log_dir, "default", args.learner, parameters, 1) elif args.env == "ant": from deep_sprl.experiments import AntExperiment exp = AntExperiment(args.base_log_dir, "default", args.learner, parameters, 1) else: from deep_sprl.experiments import BallCatchingExperiment exp = BallCatchingExperiment(args.base_log_dir, "default", args.learner, parameters, 1) log_dir = os.path.join(os.path.dirname(__file__), "..", args.base_log_dir, args.env) types = [ d for d in os.listdir(log_dir) if os.path.isdir(os.path.join(log_dir, d)) ] if args.env == "ball_catching": exps = [ exp, BallCatchingExperiment(args.base_log_dir, "default", args.learner, {"INIT_CONTEXT": False}, 1), BallCatchingExperiment(args.base_log_dir, "default", args.learner, {"INIT_POLICY": False}, 1) ] appendices = ["", "*", "*"] else: exps = [exp] appendices = [""] performances = {} for cur_type in types: for exp, appendix in zip(exps, appendices): exp.curriculum = CurriculumType.from_string(cur_type) type_log_dir = os.path.join(os.path.dirname(__file__), "..", os.path.dirname(exp.get_log_dir())) if os.path.exists(type_log_dir): seeds = [ int(d.split("-")[1]) for d in os.listdir(type_log_dir) if os.path.isdir(os.path.join(type_log_dir, d)) ] if len(seeds) != 0: type_perf = [] for seed in seeds: seed_log_dir = os.path.join(type_log_dir, "seed-" + str(seed)) with open( os.path.join(seed_log_dir, "performance.pkl"), "rb") as f: type_perf.append(pickle.load(f)[-1]) performances[cur_type + appendix] = np.array(type_perf) best_type = None best_mean_perf = -np.inf best_se = None for key, value in performances.items(): if np.mean(value) > best_mean_perf: best_mean_perf = np.mean(value) best_se = np.std(value) / np.sqrt(len(value)) best_type = key print("Best Type: %s, Best performance: %.2f, std: %.2f" % (best_type, best_mean_perf, best_se)) for key in sorted(performances.keys()): if key != best_type: mean_perf = np.mean(performances[key]) se = np.std(performances[key], axis=0) / np.sqrt( len(performances[key])) pvalue = ttest_ind(performances[best_type], performances[key])[1] print("Type: %s, performance: %.2f, std: %.2f, P-Value: %.3e" % (key, mean_perf, se, pvalue))
def main(): global LABEL_DICT global COLOR_DICT global MARKER_DICT global METHODS parser = argparse.ArgumentParser() parser.add_argument("--base_log_dir", type=str, default="logs") parser.add_argument( "--env", type=str, default=["point_mass"], nargs="*", choices=["point_mass", "point_mass_2d", "ball_catching"]) parser.add_argument("--learner", type=str, default=["trpo"], nargs="*", choices=["trpo", "ppo", "sac"]) parser.add_argument("--dist_vis", required=False, type=str) parser.add_argument("--methods", nargs="*", type=str, choices=[ "self_paced", "self_paced_v2", "alp_gmm", "random", "default", "goal_gan" ]) args, remainder = parser.parse_known_args() parameters = parse_parameters(remainder) if len(args.env) != len(args.learner): raise RuntimeError("Number of envs and learners must be equal") if args.methods is not None and len(args.methods) != 0: METHODS = args.methods n_envs = len(args.env) if n_envs > 2: print("At most two envs are allowed!") f = plt.figure(figsize=(WIDTH, MUL * WIDTH)) n_rows = 110 n_cols = 300 gs = f.add_gridspec(n_rows, n_cols) if n_envs == 1: axs_top = [f.add_subplot(gs[0:50, :])] else: axs_top = [ f.add_subplot(gs[0:50, 0:140]), f.add_subplot(gs[0:50, 160:]) ] axs_bottom = [ f.add_subplot(gs[77:105, idx[0]:idx[1]]) for idx in compute_indices(N_DIST_ITERS, n_cols, 11) ] for i in range(0, len(axs_bottom)): axs_bottom[i].tick_params(axis='both', which='major', labelsize=FONT_SIZE) axs_bottom[i].tick_params(axis='both', which='minor', labelsize=FONT_SIZE) lines = [] kl_lines = [] labels = [] kl_labels = [] for k in range(0, len(args.env)): axs_top[k].tick_params(axis='both', which='major', labelsize=FONT_SIZE) axs_top[k].tick_params(axis='both', which='minor', labelsize=FONT_SIZE) axs_top[k].set_xlabel(r"Iteration", fontsize=FONT_SIZE) axs_top[k].set_title(ENV_NAMES[args.env[k]], fontsize=FONT_SIZE) if args.env[k] == "point_mass": from deep_sprl.experiments import PointMassExperiment exp = PointMassExperiment(args.base_log_dir, "default", args.learner[k], parameters, 1) elif args.env[k] == "point_mass_2d": from deep_sprl.experiments import PointMass2DExperiment exp = PointMass2DExperiment(args.base_log_dir, "default", args.learner[k], parameters, 1) else: from deep_sprl.experiments import BallCatchingExperiment exp = BallCatchingExperiment(args.base_log_dir, "default", args.learner[k], parameters, 1) new_lines = [] new_kl_lines = [] new_labels = [] new_kl_labels = [] if args.env[k] != "ball_catching": add_plots(exp, axs_top[k], axs_bottom, new_lines, new_labels, dist_vis=args.dist_vis) if args.env[k].startswith("point_mass"): add_sprl_plot(exp, axs_top[k], new_lines, new_labels, args.base_log_dir, COLOR_DICT["sprl"]) else: LABEL_DICT["self_paced"] = r"SPDL*" LABEL_DICT["self_paced_v2"] = r"SPDL2*" LABEL_DICT["goal_gan"] = r"GoalGAN*" COLOR_DICT["self_paced"] = "C5" COLOR_DICT["self_paced_v2"] = "C6" COLOR_DICT["goal_gan"] = "C8" MARKER_DICT["self_paced"] = "v" MARKER_DICT["goal_gan"] = "d" add_plots(exp, axs_top[k], axs_bottom, new_lines, new_labels, dist_vis=None) exp = BallCatchingExperiment( args.base_log_dir, "default", args.learner[k], { **parameters, "INIT_CONTEXT": False }, 1) LABEL_DICT = { "self_paced": r"SPDL", "goal_gan": r"GoalGAN", "self_paced_v2": r"SPDL2" } COLOR_DICT = { "self_paced": "C0", "goal_gan": "C4", "self_paced_v2": "C1" } MARKER_DICT = { "self_paced": "^", "goal_gan": "D", "self_paced_v2": "x" } add_plots(exp, axs_top[k], axs_bottom, new_lines, new_labels, dist_vis=args.dist_vis) exp = BallCatchingExperiment(args.base_log_dir, "default", args.learner[k], { **parameters, "INIT_POLICY": False }, 1) LABEL_DICT = {"default": r"Default*"} COLOR_DICT = {"default": "C7"} MARKER_DICT = {"default": "."} add_plots(exp, axs_top[k], axs_bottom, new_lines, new_labels, dist_vis=None) # Only add new lines for new_line, new_label in zip(new_lines, new_labels): if new_label not in labels: lines.append(new_line) labels.append(new_label) for new_kl_line, new_kl_label in zip(new_kl_lines, new_kl_labels): if new_kl_label not in kl_labels: kl_lines.append(new_kl_line) kl_labels.append(new_kl_label) axs_top[k].grid() for i in range(0, len(axs_bottom)): axs_bottom[i].grid() lgd = f.legend(lines, labels, loc='lower center', bbox_to_anchor=(0.48, 0.95), ncol=9, fontsize=FONT_SIZE, handlelength=1.0, labelspacing=0., handletextpad=0.5, columnspacing=1.0) axs_top[0].set_ylabel(r"Reward", fontsize=FONT_SIZE) filename = "" for env, learner in zip(args.env, args.learner): if len(filename) == 0: filename += env + "_" + learner else: filename += "_" + env + "_" + learner key = "+".join(args.env) bbox = BBOXES[key] if key in BBOXES else None plt.savefig(filename + ".pdf", bbox_extra_artists=(lgd, ), bbox_inches=bbox)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--base_log_dir", type=str, default="logs") parser.add_argument( "--env", type=str, default="point_mass", choices=["point_mass", "point_mass_2d", "ball_catching"]) parser.add_argument("--learner", type=str, default="trpo", choices=["trpo", "ppo", "sac"]) args, remainder = parser.parse_known_args() parameters = parse_parameters(remainder) if args.env == "point_mass": from deep_sprl.experiments import PointMassExperiment exp = PointMassExperiment(args.base_log_dir, "default", args.learner, parameters, 1) elif args.env == "point_mass_2d": from deep_sprl.experiments import PointMass2DExperiment exp = PointMass2DExperiment(args.base_log_dir, "default", args.learner, parameters, 1) else: from deep_sprl.experiments import BallCatchingExperiment exp = BallCatchingExperiment(args.base_log_dir, "default", args.learner, parameters, 1) log_dir = os.path.join(os.path.dirname(__file__), "..", args.base_log_dir, args.env) types = [ d for d in os.listdir(log_dir) if os.path.isdir(os.path.join(log_dir, d)) ] if args.env == "ball_catching": # Use functions because the ZETA values are class fields and hence are altered when the others are created exps = [ exp, lambda: BallCatchingExperiment(args.base_log_dir, "default", args.learner, {"INIT_CONTEXT": False}, 1), lambda: BallCatchingExperiment(args.base_log_dir, "default", args. learner, {"INIT_POLICY": False}, 1) ] appendices = ["", " (no_init_con)", " (no_init_pol)"] else: exps = [exp] appendices = [""] performances = {} for cur_type in types: for exp, appendix in zip(exps, appendices): if cur_type != "sprl": if callable(exp): exp = exp() exp.curriculum = CurriculumType.from_string(cur_type) exp.use_true_rew = args.learner == "sac" and cur_type == "self_paced_v2" type_log_dir = os.path.join(os.path.dirname(__file__), "..", os.path.dirname(exp.get_log_dir())) if os.path.exists(type_log_dir): seeds = [ int(d.split("-")[1]) for d in os.listdir(type_log_dir) if os.path.isdir(os.path.join(type_log_dir, d)) ] if len(seeds) != 0: type_perf = [] for seed in seeds: seed_log_dir = os.path.join( type_log_dir, "seed-" + str(seed)) if os.path.exists( os.path.join(seed_log_dir, "performance.pkl")): with open( os.path.join(seed_log_dir, "performance.pkl"), "rb") as f: type_perf.append(pickle.load(f)[-1]) else: print( "Warning! Seed %d has not been evaluated. Maybe there was a problem with this run!" % seed) performances[cur_type + appendix] = np.array(type_perf) best_type = None best_mean_perf = -np.inf best_se = None for key, value in performances.items(): if np.mean(value) > best_mean_perf: best_mean_perf = np.mean(value) best_se = np.std(value) / np.sqrt(len(value)) best_type = key print("Best Type: %s, Best performance: %.2f, std: %.2f" % (best_type, best_mean_perf, best_se)) for key in sorted(performances.keys()): if key != best_type: mean_perf = np.mean(performances[key]) se = np.std(performances[key], axis=0) / np.sqrt( len(performances[key])) pvalue = ttest_ind(performances[best_type], performances[key], equal_var=False)[1] print("Type: %s, performance: %.2f, std: %.2f, P-Value: %.3e" % (key, mean_perf, se, pvalue))
def main(): global LABEL_DICT global COLOR_DICT parser = argparse.ArgumentParser() parser.add_argument("--base_log_dir", type=str, default="logs") parser.add_argument( "--env", type=str, default="point_mass", choices=["point_mass", "point_mass_2d", "ball_catching", "ant"]) parser.add_argument("--learner", type=str, default="trpo", choices=["trpo", "ppo", "sac"]) args, remainder = parser.parse_known_args() parameters = parse_parameters(remainder) if args.env == "point_mass": from deep_sprl.experiments import PointMassExperiment exp = PointMassExperiment(args.base_log_dir, "default", args.learner, parameters, 1) elif args.env == "point_mass_2d": from deep_sprl.experiments import PointMass2DExperiment exp = PointMass2DExperiment(args.base_log_dir, "default", args.learner, parameters, 1) elif args.env == "ant": from deep_sprl.experiments import AntExperiment exp = AntExperiment(args.base_log_dir, "default", args.learner, parameters, 1) else: from deep_sprl.experiments import BallCatchingExperiment exp = BallCatchingExperiment(args.base_log_dir, "default", args.learner, parameters, 1) # (3, 2.8) f, axs = plt.subplots(2, 1, sharex=True, gridspec_kw={"height_ratios": [2.8, 1.2]}, figsize=(3, 2.5)) axs[0].set_ylabel(r"Performance", fontsize=FONT_SIZE) axs[1].set_ylabel(r"KL-Divergence", fontsize=FONT_SIZE) axs[1].set_xlabel(r"Iteration", fontsize=FONT_SIZE) for i in range(0, 2): axs[i].tick_params(axis='both', which='major', labelsize=FONT_SIZE) axs[i].tick_params(axis='both', which='minor', labelsize=FONT_SIZE) lines = [] kl_lines = [] labels = [] kl_labels = [] if args.env != "ball_catching": add_plots(exp, axs, lines, kl_lines, labels, kl_labels, args.env, kl_color="C0") if args.env.startswith("point_mass"): add_sprl_plot(exp, axs, lines, kl_lines, labels, kl_labels, args.base_log_dir, COLOR_DICT["sprl"]) else: LABEL_DICT["self_paced"] = r"SPDL*" LABEL_DICT["goal_gan"] = r"GoalGAN*" COLOR_DICT["self_paced"] = "C5" COLOR_DICT["goal_gan"] = "C8" add_plots(exp, axs, lines, kl_lines, labels, kl_labels, args.env, kl_color="C5") exp = BallCatchingExperiment(args.base_log_dir, "default", args.learner, {"INIT_CONTEXT": False}, 1) LABEL_DICT = {"self_paced": r"SPDL", "goal_gan": r"GoalGAN"} COLOR_DICT = {"self_paced": "C0", "goal_gan": "C4"} add_plots(exp, axs, lines, kl_lines, labels, kl_labels, args.env, kl_color="C0") exp = BallCatchingExperiment(args.base_log_dir, "default", args.learner, {"INIT_POLICY": False}, 1) LABEL_DICT = {"default": r"Default*"} COLOR_DICT = {"default": "C7"} add_plots(exp, axs, lines, kl_lines, labels, kl_labels, args.env) axs[0].legend(lines, labels, loc='lower center', bbox_to_anchor=(0.5, 1.05), ncol=3, fontsize=FONT_SIZE, handlelength=1.0, labelspacing=0., handletextpad=0.5, columnspacing=1.0) # axs[1].legend(kl_lines, kl_labels, fontsize=FONT_SIZE, framealpha=0.3) if args.env in LIMITS: axs[0].set_ylim(LIMITS[args.env]) axs[0].grid() axs[1].grid() plt.tight_layout() plt.show()