Exemplo n.º 1
0
def dump_trace(picklefile: str, args):

    filename = '{}_{}_{}_trace.png'.format(os.path.splitext(picklefile)[0],
                                           args.dim_0, args.dim_1)

    with tf.Session(), tf.variable_scope(picklefile):
        data = joblib.load(picklefile)
        policy = data['policy']
        env = data['env']
        num_skills = data['policy'].observation_space.flat_dim - data['env'].spec.observation_space.flat_dim

        plt.figure(figsize=(6, 6))
        palette = sns.color_palette('hls', num_skills)
        with policy.deterministic(args.deterministic):
            skills = range(num_skills) if args.specific_skill == _use_all_skills else [args.specific_skill]
            for z in skills:
                fixed_z_policy = FixedOptionPolicy(policy, num_skills, z)
                for path_index in range(args.n_paths):
                    obs = env.reset()
                    if args.use_qpos:
                        qpos = env.wrapped_env.env.model.data.qpos[:, 0]
                        obs_vec = [qpos]
                    else:
                        obs_vec = [obs]
                    for t in range(args.max_path_length):
                        action, _ = fixed_z_policy.get_action(obs)
                        (obs, _, _, _) = env.step(action)
                        if args.use_qpos:
                            qpos = env.wrapped_env.env.model.data.qpos[:, 0]
                            obs_vec.append(qpos)
                        elif args.use_action:
                            obs_vec.append(action)
                        else:
                            obs_vec.append(obs)

                    obs_vec = np.array(obs_vec)
                    x = obs_vec[:, args.dim_0]
                    y = obs_vec[:, args.dim_1]
                    plt.plot(x, y, c=palette[z])

                    use_plot_lims = np.isfinite(env.observation_space.bounds).all()
                    if use_plot_lims:
                        xlim, ylim = np.asarray(env.observation_space.bounds).T
                        plt.xlim(xlim)
                        plt.ylim(ylim)

        plt.savefig(filename)
        plt.close()
            'env'].spec.observation_space.flat_dim

        plt.figure(figsize=(6, 6))
        palette = sns.color_palette('hls', num_skills)
        with policy.deterministic(args.deterministic):
            for z in range(num_skills):
                fixed_z_policy = FixedOptionPolicy(policy, num_skills, z)
                for path_index in range(args.n_paths):
                    obs = env.reset()
                    if args.use_qpos:
                        qpos = env.wrapped_env.env.model.data.qpos[:, 0]
                        obs_vec = [qpos]
                    else:
                        obs_vec = [obs]
                    for t in range(args.max_path_length):
                        action, _ = fixed_z_policy.get_action(obs)
                        (obs, _, _, _) = env.step(action)
                        if args.use_qpos:
                            qpos = env.wrapped_env.env.model.data.qpos[:, 0]
                            obs_vec.append(qpos)
                        elif args.use_action:
                            obs_vec.append(action)
                        else:
                            obs_vec.append(obs)

                    obs_vec = np.array(obs_vec)
                    x = obs_vec[:, args.dim_0]
                    y = obs_vec[:, args.dim_1]
                    plt.plot(x, y, c=palette[z])

        plt.savefig(filename)