Пример #1
0
def main():
    parser = argparse.ArgumentParser("Self-Paced Learning experiment runner")
    parser.add_argument("--base_log_dir", type=str, default="logs")
    parser.add_argument("--type",
                        type=str,
                        default="default",
                        choices=[
                            "default", "random", "self_paced", "self_paced_v2",
                            "alp_gmm", "goal_gan"
                        ])
    parser.add_argument("--learner", type=str, choices=["trpo", "ppo", "sac"])
    parser.add_argument(
        "--env",
        type=str,
        default="point_mass",
        choices=["point_mass", "point_mass_2d", "ball_catching"])
    parser.add_argument("--seed", type=int, default=1)
    parser.add_argument("--true_rewards", action="store_true", default=False)

    args, remainder = parser.parse_known_args()
    parameters = parse_parameters(remainder)

    if args.type == "self_paced":
        import torch
        torch.set_num_threads(1)

    if args.env == "point_mass":
        from deep_sprl.experiments import PointMassExperiment
        exp = PointMassExperiment(args.base_log_dir,
                                  args.type,
                                  args.learner,
                                  parameters,
                                  args.seed,
                                  use_true_rew=args.true_rewards)
    elif args.env == "point_mass_2d":
        from deep_sprl.experiments import PointMass2DExperiment
        exp = PointMass2DExperiment(args.base_log_dir,
                                    args.type,
                                    args.learner,
                                    parameters,
                                    args.seed,
                                    use_true_rew=args.true_rewards)
    else:
        from deep_sprl.experiments import BallCatchingExperiment
        exp = BallCatchingExperiment(args.base_log_dir,
                                     args.type,
                                     args.learner,
                                     parameters,
                                     args.seed,
                                     use_true_rew=args.true_rewards)

    exp.train()
    exp.evaluate()
Пример #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--base_log_dir", type=str, default="logs")
    parser.add_argument("--learner",
                        type=str,
                        required=True,
                        choices=["trpo", "ppo", "sac"])
    parser.add_argument("--save_dir", type=str, default=None)
    args = parser.parse_args()

    # Create the evaluation environment
    exp = PointMassExperiment(args.base_log_dir, "default", args.learner, {},
                              1)
    log_dir = os.path.join(os.path.dirname(__file__), "..", args.base_log_dir,
                           "point_mass")
    types = [
        d for d in os.listdir(log_dir)
        if os.path.isdir(os.path.join(log_dir, d))
    ]
    for cur_type in types:
        f = plt.figure(figsize=(1.4, 1.4))
        ax = f.gca()
        ax.plot([-5., 2.], [-0.1, -0.1], linewidth=5, color="black")
        ax.plot([3., 5.], [-0.1, -0.1], linewidth=5, color="black")
        ax.plot([-0.25, 0.25], [-3.25, -2.75], linewidth=3, color="red")
        ax.plot([-0.25, 0.25], [-2.75, -3.25], linewidth=3, color="red")

        exp.curriculum = CurriculumType.from_string(cur_type)
        type_log_dir = os.path.join(os.path.dirname(__file__), "..",
                                    os.path.dirname(exp.get_log_dir()))
        seeds = [
            int(d.split("-")[1]) for d in os.listdir(type_log_dir)
            if os.path.isdir(os.path.join(type_log_dir, d))
        ]
        for seed in seeds:
            path = os.path.join(type_log_dir, "seed-" + str(seed),
                                "iteration-" + str(995))
            if os.path.exists(path):
                model = exp.learner.load_for_evaluation(
                    os.path.join(path, "model"), exp.vec_eval_env)

                path = []
                done = False
                obs = exp.vec_eval_env.reset()
                path.append(obs[0][[0, 2]])
                while not done:
                    action = model.step(obs, state=None, deterministic=False)
                    obs, reward, done, info = exp.vec_eval_env.step(action)

                    # We need to add this check because the vectorized environment automatically resets everything on
                    # done
                    if not done:
                        path.append(obs[0][[0, 2]])

                path = np.array(path)
                ax.plot(path[:, 0],
                        path[:, 1],
                        color="C0",
                        alpha=0.5,
                        linewidth=3)

        ax.set_xlim([-4, 4])
        ax.set_ylim([-4, 4])
        ax.set_xticks([])
        ax.set_yticks([])

        if args.save_dir is None:
            plt.title(labels[cur_type])
            plt.tight_layout()
            plt.show()
        else:
            plt.tight_layout()
            plt.savefig(os.path.join(
                args.save_dir,
                "point_mass_%s_%s_trajs.pdf" % (args.learner, cur_type)),
                        bbox_inches='tight',
                        pad_inches=0)
Пример #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--base_log_dir", type=str, default="logs")
    parser.add_argument(
        "--env",
        type=str,
        default="point_mass",
        choices=["point_mass", "point_mass_2d", "ball_catching", "ant"])
    parser.add_argument("--learner",
                        type=str,
                        default="trpo",
                        choices=["trpo", "ppo", "sac"])

    args, remainder = parser.parse_known_args()
    parameters = parse_parameters(remainder)
    if args.env == "point_mass":
        from deep_sprl.experiments import PointMassExperiment
        exp = PointMassExperiment(args.base_log_dir, "default", args.learner,
                                  parameters, 1)
    elif args.env == "point_mass_2d":
        from deep_sprl.experiments import PointMass2DExperiment
        exp = PointMass2DExperiment(args.base_log_dir, "default", args.learner,
                                    parameters, 1)
    elif args.env == "ant":
        from deep_sprl.experiments import AntExperiment
        exp = AntExperiment(args.base_log_dir, "default", args.learner,
                            parameters, 1)
    else:
        from deep_sprl.experiments import BallCatchingExperiment
        exp = BallCatchingExperiment(args.base_log_dir, "default",
                                     args.learner, parameters, 1)

    log_dir = os.path.join(os.path.dirname(__file__), "..", args.base_log_dir,
                           args.env)
    types = [
        d for d in os.listdir(log_dir)
        if os.path.isdir(os.path.join(log_dir, d))
    ]
    if args.env == "ball_catching":
        exps = [
            exp,
            BallCatchingExperiment(args.base_log_dir, "default", args.learner,
                                   {"INIT_CONTEXT": False}, 1),
            BallCatchingExperiment(args.base_log_dir, "default", args.learner,
                                   {"INIT_POLICY": False}, 1)
        ]
        appendices = ["", "*", "*"]
    else:
        exps = [exp]
        appendices = [""]

    performances = {}
    for cur_type in types:
        for exp, appendix in zip(exps, appendices):
            exp.curriculum = CurriculumType.from_string(cur_type)
            type_log_dir = os.path.join(os.path.dirname(__file__), "..",
                                        os.path.dirname(exp.get_log_dir()))
            if os.path.exists(type_log_dir):
                seeds = [
                    int(d.split("-")[1]) for d in os.listdir(type_log_dir)
                    if os.path.isdir(os.path.join(type_log_dir, d))
                ]
                if len(seeds) != 0:
                    type_perf = []
                    for seed in seeds:
                        seed_log_dir = os.path.join(type_log_dir,
                                                    "seed-" + str(seed))
                        with open(
                                os.path.join(seed_log_dir, "performance.pkl"),
                                "rb") as f:
                            type_perf.append(pickle.load(f)[-1])
                    performances[cur_type + appendix] = np.array(type_perf)

    best_type = None
    best_mean_perf = -np.inf
    best_se = None
    for key, value in performances.items():
        if np.mean(value) > best_mean_perf:
            best_mean_perf = np.mean(value)
            best_se = np.std(value) / np.sqrt(len(value))
            best_type = key

    print("Best Type: %s, Best performance: %.2f, std: %.2f" %
          (best_type, best_mean_perf, best_se))
    for key in sorted(performances.keys()):
        if key != best_type:
            mean_perf = np.mean(performances[key])
            se = np.std(performances[key], axis=0) / np.sqrt(
                len(performances[key]))
            pvalue = ttest_ind(performances[best_type], performances[key])[1]
            print("Type: %s, performance: %.2f, std: %.2f, P-Value: %.3e" %
                  (key, mean_perf, se, pvalue))
Пример #4
0
def main():
    global LABEL_DICT
    global COLOR_DICT
    global MARKER_DICT
    global METHODS

    parser = argparse.ArgumentParser()
    parser.add_argument("--base_log_dir", type=str, default="logs")
    parser.add_argument(
        "--env",
        type=str,
        default=["point_mass"],
        nargs="*",
        choices=["point_mass", "point_mass_2d", "ball_catching"])
    parser.add_argument("--learner",
                        type=str,
                        default=["trpo"],
                        nargs="*",
                        choices=["trpo", "ppo", "sac"])
    parser.add_argument("--dist_vis", required=False, type=str)
    parser.add_argument("--methods",
                        nargs="*",
                        type=str,
                        choices=[
                            "self_paced", "self_paced_v2", "alp_gmm", "random",
                            "default", "goal_gan"
                        ])

    args, remainder = parser.parse_known_args()
    parameters = parse_parameters(remainder)
    if len(args.env) != len(args.learner):
        raise RuntimeError("Number of envs and learners must be equal")

    if args.methods is not None and len(args.methods) != 0:
        METHODS = args.methods

    n_envs = len(args.env)
    if n_envs > 2:
        print("At most two envs are allowed!")

    f = plt.figure(figsize=(WIDTH, MUL * WIDTH))

    n_rows = 110
    n_cols = 300

    gs = f.add_gridspec(n_rows, n_cols)
    if n_envs == 1:
        axs_top = [f.add_subplot(gs[0:50, :])]
    else:
        axs_top = [
            f.add_subplot(gs[0:50, 0:140]),
            f.add_subplot(gs[0:50, 160:])
        ]
    axs_bottom = [
        f.add_subplot(gs[77:105, idx[0]:idx[1]])
        for idx in compute_indices(N_DIST_ITERS, n_cols, 11)
    ]

    for i in range(0, len(axs_bottom)):
        axs_bottom[i].tick_params(axis='both',
                                  which='major',
                                  labelsize=FONT_SIZE)
        axs_bottom[i].tick_params(axis='both',
                                  which='minor',
                                  labelsize=FONT_SIZE)

    lines = []
    kl_lines = []
    labels = []
    kl_labels = []
    for k in range(0, len(args.env)):
        axs_top[k].tick_params(axis='both', which='major', labelsize=FONT_SIZE)
        axs_top[k].tick_params(axis='both', which='minor', labelsize=FONT_SIZE)
        axs_top[k].set_xlabel(r"Iteration", fontsize=FONT_SIZE)
        axs_top[k].set_title(ENV_NAMES[args.env[k]], fontsize=FONT_SIZE)

        if args.env[k] == "point_mass":
            from deep_sprl.experiments import PointMassExperiment
            exp = PointMassExperiment(args.base_log_dir, "default",
                                      args.learner[k], parameters, 1)
        elif args.env[k] == "point_mass_2d":
            from deep_sprl.experiments import PointMass2DExperiment
            exp = PointMass2DExperiment(args.base_log_dir, "default",
                                        args.learner[k], parameters, 1)
        else:
            from deep_sprl.experiments import BallCatchingExperiment
            exp = BallCatchingExperiment(args.base_log_dir, "default",
                                         args.learner[k], parameters, 1)

        new_lines = []
        new_kl_lines = []
        new_labels = []
        new_kl_labels = []
        if args.env[k] != "ball_catching":
            add_plots(exp,
                      axs_top[k],
                      axs_bottom,
                      new_lines,
                      new_labels,
                      dist_vis=args.dist_vis)
            if args.env[k].startswith("point_mass"):
                add_sprl_plot(exp, axs_top[k], new_lines, new_labels,
                              args.base_log_dir, COLOR_DICT["sprl"])
        else:
            LABEL_DICT["self_paced"] = r"SPDL*"
            LABEL_DICT["self_paced_v2"] = r"SPDL2*"
            LABEL_DICT["goal_gan"] = r"GoalGAN*"
            COLOR_DICT["self_paced"] = "C5"
            COLOR_DICT["self_paced_v2"] = "C6"
            COLOR_DICT["goal_gan"] = "C8"
            MARKER_DICT["self_paced"] = "v"
            MARKER_DICT["goal_gan"] = "d"
            add_plots(exp,
                      axs_top[k],
                      axs_bottom,
                      new_lines,
                      new_labels,
                      dist_vis=None)

            exp = BallCatchingExperiment(
                args.base_log_dir, "default", args.learner[k], {
                    **parameters, "INIT_CONTEXT": False
                }, 1)
            LABEL_DICT = {
                "self_paced": r"SPDL",
                "goal_gan": r"GoalGAN",
                "self_paced_v2": r"SPDL2"
            }
            COLOR_DICT = {
                "self_paced": "C0",
                "goal_gan": "C4",
                "self_paced_v2": "C1"
            }
            MARKER_DICT = {
                "self_paced": "^",
                "goal_gan": "D",
                "self_paced_v2": "x"
            }
            add_plots(exp,
                      axs_top[k],
                      axs_bottom,
                      new_lines,
                      new_labels,
                      dist_vis=args.dist_vis)

            exp = BallCatchingExperiment(args.base_log_dir, "default",
                                         args.learner[k], {
                                             **parameters, "INIT_POLICY": False
                                         }, 1)
            LABEL_DICT = {"default": r"Default*"}
            COLOR_DICT = {"default": "C7"}
            MARKER_DICT = {"default": "."}
            add_plots(exp,
                      axs_top[k],
                      axs_bottom,
                      new_lines,
                      new_labels,
                      dist_vis=None)

        # Only add new lines
        for new_line, new_label in zip(new_lines, new_labels):
            if new_label not in labels:
                lines.append(new_line)
                labels.append(new_label)

        for new_kl_line, new_kl_label in zip(new_kl_lines, new_kl_labels):
            if new_kl_label not in kl_labels:
                kl_lines.append(new_kl_line)
                kl_labels.append(new_kl_label)

        axs_top[k].grid()

    for i in range(0, len(axs_bottom)):
        axs_bottom[i].grid()

    lgd = f.legend(lines,
                   labels,
                   loc='lower center',
                   bbox_to_anchor=(0.48, 0.95),
                   ncol=9,
                   fontsize=FONT_SIZE,
                   handlelength=1.0,
                   labelspacing=0.,
                   handletextpad=0.5,
                   columnspacing=1.0)

    axs_top[0].set_ylabel(r"Reward", fontsize=FONT_SIZE)
    filename = ""
    for env, learner in zip(args.env, args.learner):
        if len(filename) == 0:
            filename += env + "_" + learner
        else:
            filename += "_" + env + "_" + learner
    key = "+".join(args.env)
    bbox = BBOXES[key] if key in BBOXES else None
    plt.savefig(filename + ".pdf",
                bbox_extra_artists=(lgd, ),
                bbox_inches=bbox)
Пример #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--base_log_dir", type=str, default="logs")
    parser.add_argument(
        "--env",
        type=str,
        default="point_mass",
        choices=["point_mass", "point_mass_2d", "ball_catching"])
    parser.add_argument("--learner",
                        type=str,
                        default="trpo",
                        choices=["trpo", "ppo", "sac"])

    args, remainder = parser.parse_known_args()
    parameters = parse_parameters(remainder)
    if args.env == "point_mass":
        from deep_sprl.experiments import PointMassExperiment
        exp = PointMassExperiment(args.base_log_dir, "default", args.learner,
                                  parameters, 1)
    elif args.env == "point_mass_2d":
        from deep_sprl.experiments import PointMass2DExperiment
        exp = PointMass2DExperiment(args.base_log_dir, "default", args.learner,
                                    parameters, 1)
    else:
        from deep_sprl.experiments import BallCatchingExperiment
        exp = BallCatchingExperiment(args.base_log_dir, "default",
                                     args.learner, parameters, 1)

    log_dir = os.path.join(os.path.dirname(__file__), "..", args.base_log_dir,
                           args.env)
    types = [
        d for d in os.listdir(log_dir)
        if os.path.isdir(os.path.join(log_dir, d))
    ]
    if args.env == "ball_catching":
        # Use functions because the ZETA values are class fields and hence are altered when the others are created
        exps = [
            exp, lambda:
            BallCatchingExperiment(args.base_log_dir, "default", args.learner,
                                   {"INIT_CONTEXT": False}, 1),
            lambda: BallCatchingExperiment(args.base_log_dir, "default", args.
                                           learner, {"INIT_POLICY": False}, 1)
        ]
        appendices = ["", " (no_init_con)", " (no_init_pol)"]
    else:
        exps = [exp]
        appendices = [""]

    performances = {}
    for cur_type in types:
        for exp, appendix in zip(exps, appendices):
            if cur_type != "sprl":
                if callable(exp):
                    exp = exp()
                exp.curriculum = CurriculumType.from_string(cur_type)
                exp.use_true_rew = args.learner == "sac" and cur_type == "self_paced_v2"
                type_log_dir = os.path.join(os.path.dirname(__file__), "..",
                                            os.path.dirname(exp.get_log_dir()))
                if os.path.exists(type_log_dir):
                    seeds = [
                        int(d.split("-")[1]) for d in os.listdir(type_log_dir)
                        if os.path.isdir(os.path.join(type_log_dir, d))
                    ]
                    if len(seeds) != 0:
                        type_perf = []
                        for seed in seeds:
                            seed_log_dir = os.path.join(
                                type_log_dir, "seed-" + str(seed))
                            if os.path.exists(
                                    os.path.join(seed_log_dir,
                                                 "performance.pkl")):
                                with open(
                                        os.path.join(seed_log_dir,
                                                     "performance.pkl"),
                                        "rb") as f:
                                    type_perf.append(pickle.load(f)[-1])
                            else:
                                print(
                                    "Warning! Seed %d has not been evaluated. Maybe there was a problem with this run!"
                                    % seed)
                        performances[cur_type + appendix] = np.array(type_perf)

    best_type = None
    best_mean_perf = -np.inf
    best_se = None
    for key, value in performances.items():
        if np.mean(value) > best_mean_perf:
            best_mean_perf = np.mean(value)
            best_se = np.std(value) / np.sqrt(len(value))
            best_type = key

    print("Best Type: %s, Best performance: %.2f, std: %.2f" %
          (best_type, best_mean_perf, best_se))
    for key in sorted(performances.keys()):
        if key != best_type:
            mean_perf = np.mean(performances[key])
            se = np.std(performances[key], axis=0) / np.sqrt(
                len(performances[key]))
            pvalue = ttest_ind(performances[best_type],
                               performances[key],
                               equal_var=False)[1]
            print("Type: %s, performance: %.2f, std: %.2f, P-Value: %.3e" %
                  (key, mean_perf, se, pvalue))
Пример #6
0
def main():
    global LABEL_DICT
    global COLOR_DICT

    parser = argparse.ArgumentParser()
    parser.add_argument("--base_log_dir", type=str, default="logs")
    parser.add_argument(
        "--env",
        type=str,
        default="point_mass",
        choices=["point_mass", "point_mass_2d", "ball_catching", "ant"])
    parser.add_argument("--learner",
                        type=str,
                        default="trpo",
                        choices=["trpo", "ppo", "sac"])

    args, remainder = parser.parse_known_args()
    parameters = parse_parameters(remainder)
    if args.env == "point_mass":
        from deep_sprl.experiments import PointMassExperiment
        exp = PointMassExperiment(args.base_log_dir, "default", args.learner,
                                  parameters, 1)
    elif args.env == "point_mass_2d":
        from deep_sprl.experiments import PointMass2DExperiment
        exp = PointMass2DExperiment(args.base_log_dir, "default", args.learner,
                                    parameters, 1)
    elif args.env == "ant":
        from deep_sprl.experiments import AntExperiment
        exp = AntExperiment(args.base_log_dir, "default", args.learner,
                            parameters, 1)
    else:
        from deep_sprl.experiments import BallCatchingExperiment
        exp = BallCatchingExperiment(args.base_log_dir, "default",
                                     args.learner, parameters, 1)

    # (3, 2.8)
    f, axs = plt.subplots(2,
                          1,
                          sharex=True,
                          gridspec_kw={"height_ratios": [2.8, 1.2]},
                          figsize=(3, 2.5))

    axs[0].set_ylabel(r"Performance", fontsize=FONT_SIZE)
    axs[1].set_ylabel(r"KL-Divergence", fontsize=FONT_SIZE)
    axs[1].set_xlabel(r"Iteration", fontsize=FONT_SIZE)

    for i in range(0, 2):
        axs[i].tick_params(axis='both', which='major', labelsize=FONT_SIZE)
        axs[i].tick_params(axis='both', which='minor', labelsize=FONT_SIZE)

    lines = []
    kl_lines = []
    labels = []
    kl_labels = []
    if args.env != "ball_catching":
        add_plots(exp,
                  axs,
                  lines,
                  kl_lines,
                  labels,
                  kl_labels,
                  args.env,
                  kl_color="C0")
        if args.env.startswith("point_mass"):
            add_sprl_plot(exp, axs, lines, kl_lines, labels, kl_labels,
                          args.base_log_dir, COLOR_DICT["sprl"])
    else:
        LABEL_DICT["self_paced"] = r"SPDL*"
        LABEL_DICT["goal_gan"] = r"GoalGAN*"
        COLOR_DICT["self_paced"] = "C5"
        COLOR_DICT["goal_gan"] = "C8"
        add_plots(exp,
                  axs,
                  lines,
                  kl_lines,
                  labels,
                  kl_labels,
                  args.env,
                  kl_color="C5")
        exp = BallCatchingExperiment(args.base_log_dir, "default",
                                     args.learner, {"INIT_CONTEXT": False}, 1)
        LABEL_DICT = {"self_paced": r"SPDL", "goal_gan": r"GoalGAN"}
        COLOR_DICT = {"self_paced": "C0", "goal_gan": "C4"}
        add_plots(exp,
                  axs,
                  lines,
                  kl_lines,
                  labels,
                  kl_labels,
                  args.env,
                  kl_color="C0")

        exp = BallCatchingExperiment(args.base_log_dir, "default",
                                     args.learner, {"INIT_POLICY": False}, 1)
        LABEL_DICT = {"default": r"Default*"}
        COLOR_DICT = {"default": "C7"}
        add_plots(exp, axs, lines, kl_lines, labels, kl_labels, args.env)
    axs[0].legend(lines,
                  labels,
                  loc='lower center',
                  bbox_to_anchor=(0.5, 1.05),
                  ncol=3,
                  fontsize=FONT_SIZE,
                  handlelength=1.0,
                  labelspacing=0.,
                  handletextpad=0.5,
                  columnspacing=1.0)
    # axs[1].legend(kl_lines, kl_labels, fontsize=FONT_SIZE, framealpha=0.3)
    if args.env in LIMITS:
        axs[0].set_ylim(LIMITS[args.env])
    axs[0].grid()
    axs[1].grid()
    plt.tight_layout()
    plt.show()