Пример #1
0
    def __init__(self, policy, env, n_envs, replay_pool_size, max_path_length, sampling_method,
                 save_rollouts=False, save_rollouts_observations=True, save_env_infos=False, env_str=None, replay_pool_params={}):
        self._policy = policy
        self._n_envs = n_envs

        assert(self._n_envs == 1) # b/c policy reset

        self._replay_pools = [ReplayPool(env.spec,
                                         env.horizon,
                                         policy.N,
                                         policy.gamma,
                                         replay_pool_size // n_envs,
                                         obs_history_len=policy.obs_history_len,
                                         sampling_method=sampling_method,
                                         save_rollouts=save_rollouts,
                                         save_rollouts_observations=save_rollouts_observations,
                                         save_env_infos=save_env_infos,
                                         replay_pool_params=replay_pool_params)
                              for _ in range(n_envs)]

        if self._n_envs == 1:
            envs = [env]
        else:
            try:
                envs = [pickle.loads(pickle.dumps(env)) for _ in range(self._n_envs)] if self._n_envs > 1 else [env]
            except:
                envs = [create_env(env_str) for _ in range(self._n_envs)] if self._n_envs > 1 else [env]
        ### need to seed each environment if it is GymEnv
        # TODO: set seed
        self._vec_env = VecEnvExecutor(
            envs=envs,
            max_path_length=max_path_length
        )
Пример #2
0
    def _load_data(self, folder):
        """
        Loads all .pkl files that can be found recursively from this folder
        """
        assert(os.path.exists(folder))

        rollouts = []
        num_load_success, num_load_fail = 0, 0
        for fname in glob.iglob('{0}/**/*.pkl'.format(folder), recursive=True):
            try:
                rollouts += mypickle.load(fname)['rollouts']
                num_load_success += 1
            except:
                num_load_fail += 1
        logger.info('Files successfully loaded: {0:.2f}%'.format(100. * num_load_success /
                                                                 float(num_load_success + num_load_fail)))

        replay_pool = ReplayPool(
            env_spec=self._env.spec,
            env_horizon=self._env.horizon,
            N=self._model.N,
            gamma=self._model.gamma,
            size=int(1.1 * sum([len(r['dones']) for r in rollouts])),
            obs_history_len=self._model.obs_history_len,
            sampling_method='uniform',
            save_rollouts=False,
            save_rollouts_observations=False,
            save_env_infos=True,
            replay_pool_params={}
        )

        curr_len = 0
        for rollout in rollouts:
            replay_pool.store_rollout(curr_len, rollout)
            curr_len += len(rollout['dones'])

        return replay_pool
Пример #3
0
    def __init__(self, env, steps, save_file, H, K):
        self._env = env
        self._steps = steps
        self._save_file = save_file
        self._H = H
        self._K = K

        # get starting positions
        grd = GatherRandomData(env, int(100*steps), '/tmp/tmp.pkl')
        grd.run()

        rollouts = mypickle.load('/tmp/tmp.pkl')['rollouts']
        self._start_poses = []
        while len(self._start_poses) < steps:
            rollout = random.choice(rollouts)
            env_info = random.choice(rollout['env_infos'])
            if 'pos' not in env_info:
                env_info = rollout['env_infos'][0]
            self._start_poses.append((env_info['pos'], env_info['hpr']))

        self._replay_pool = ReplayPool(self._env.spec,
                                       self._env.horizon,
                                       1,
                                       1,
                                       int(1e5),
                                       obs_history_len=1,
                                       sampling_method='uniform',
                                       save_rollouts=True,
                                       save_rollouts_observations=True,
                                       save_env_infos=True,
                                       replay_pool_params={})

        self._action_sequences = []
        for _ in range(self._K):
            self._action_sequences.append([self._env.action_space.sample() for _ in range(self._H)])
        self._action_sequences = np.array(self._action_sequences)
Пример #4
0
def plot_rw_rccar_var001_var016(ckpt_itr=None):
    label_params =[['exp', ('exp_name',)]]

    experiment_groups = [
        ExperimentGroup(os.path.join(DATA_DIR, 'rw_rccar/var{0:03d}'.format(i)),
                        label_params=label_params,
                        plot={

                        }) for i in [19]
    ]

    mec = MultiExperimentComparison(experiment_groups)
    exps = mec.list

    ### plot length of the rollouts
    # f, axes = plt.subplots(1, len(exps), figsize=(32, 6), sharex=True, sharey=True)
    # for i, (exp, ax) in enumerate(zip(exps, axes)):
    #     rollouts = list(itertools.chain(*exp.train_rollouts))
    #     lengths = [len(r['dones']) for r in rollouts][:16]
    #     assert (len(lengths) == 16)
    #     steps = np.arange(len(lengths))
    #
    #     label = '{0}, height: {1}, color: {2}, H: {3}'.format(
    #         exp.name,
    #         exp.params['alg']['env'].split("'obs_shape': (")[1].split(',')[0],
    #         exp.params['alg']['env'].split(',')[-1].split(')})')[0],
    #         exp.params['policy']['H']
    #     )
    #
    #     ax.scatter(steps, lengths, color=cm.magma(i / len(exps)))
    #     ax.set_title(label)
    #     ax.legend()
    #
    # plt.tight_layout()
    # f.savefig('plots/rw-rccar/var001_016.png', bbox_inches='tight', dpi=100)

    ### plot policy on the rollouts

    # candidate actions
    for exp in exps:
        rollouts = mypickle.load(os.path.join(exp.folder, 'rosbag_rollouts_00.pkl'))['rollouts']
        rollouts = rollouts[::len(rollouts) // 16]
        # rollouts = list(itertools.chain(*exp.train_rollouts))[:16]

        tf_sess, tf_graph = GCGPolicy.create_session_and_graph(gpu_device=1, gpu_frac=0.6)

        with tf_sess.as_default(), tf_graph.as_default():
            exp.create_env()
            exp.create_policy()
            exp_ckpt_itr = exp.restore_policy(itr=ckpt_itr)

            K = 2048
            actions = np.random.uniform(*exp.env.action_space.bounds,
                                        size=(K, exp.policy.H + 1, exp.env.action_space.flat_dim))

            replay_pool = ReplayPool(
                env_spec=exp.env.spec,
                env_horizon=exp.env.horizon,
                N=exp.policy.N,
                gamma=1,
                size=int(1.1 * sum([len(r['dones']) for r in rollouts])),
                obs_history_len=exp.policy.obs_history_len,
                sampling_method='uniform'
            )

            step = 0
            outputs = []
            for i, r in enumerate(rollouts):
                r_len = len(r['dones'])
                outputs_i = []
                for j in range(r_len):
                    # evaluate and get output
                    observation = (r['observations'][j][0], np.empty([exp.policy.obs_history_len, 0]))
                    replay_pool.store_observation(step, observation)

                    encoded_observation = replay_pool.encode_recent_observation()

                    observation_im, observation_vec = encoded_observation
                    observations = (np.tile(observation_im, (K, 1, 1)), np.tile(observation_vec, (K, 1, 1)))

                    probcolls = exp.policy.get_model_outputs(observations, actions)
                    outputs_i.append(probcolls)

                    step += 1
                    replay_pool.store_effect(
                        r['actions'][j],
                        r['rewards'][j],
                        r['dones'][j],
                        None,
                        r['est_values'][j],
                        r['logprobs'][j]
                    )

                outputs.append(outputs_i)

        f, axes = plt.subplots(1, 2, figsize=(12, 8))
        imshow = None

        plot_folder = os.path.join(exp.folder, 'plot', 'ckpt_{0:03d}'.format(exp_ckpt_itr))
        os.makedirs(plot_folder, exist_ok=True)

        for i, (r_i, output_i) in enumerate(zip(rollouts, outputs)):
            for j, (obs, cost) in enumerate(zip(r_i['observations'], output_i)):
                obs_im, obs_vec = obs
                probcoll = -cost

                # plot image
                im = np.reshape(obs_im, exp.env.observation_im_space.shape)
                is_gray = (im.shape[-1] == 1)
                if is_gray:
                    im = im[:, :, 0]
                    color = 'Greys_r'
                else:
                    color=None

                if imshow is None:
                    imshow = axes[0].imshow(im, cmap=color)
                else:
                    imshow.set_data(im)

                # plot probcolls
                steers = actions[:, :-1, 0]
                angle_const = 0.5 * np.pi / 2.
                angles = angle_const * steers
                ys = np.cumsum(np.cos(angles), axis=1)
                xs = np.cumsum(-np.sin(angles), axis=1)
                sort_idxs = np.argsort(probcoll)

                xlim = (min(xs.min(), 0), max(xs.max(), 0))
                ylim = (min(ys.min(), -0.5), max(ys.max(), 0.5))
                min_probcoll = probcoll.min()
                max_probcoll = probcoll.max()

                keep = 10
                ys = ys[sort_idxs][::K//keep]
                xs = xs[sort_idxs][::K//keep]
                probcoll = probcoll[sort_idxs][::K//keep]
                steers = steers[sort_idxs][::K//keep]

                ys = np.hstack((np.zeros((len(ys), 1)), ys))
                xs = np.hstack((np.zeros((len(xs), 1)), xs))

                # if lines is None:
                axes[1].cla()
                axes[1].plot(0, 0, 'rx', markersize=10)
                # lines = axes[1].plot(np.expand_dims(xs[:,-1], 0), np.expand_dims(ys[:,-1], 0),
                #                      marker='o', linestyle='', markersize=2)
                lines = axes[1].plot(xs.T, ys.T)
                axes[1].plot(xs[0,:], ys[0,:], 'b^', linestyle='', markersize=5)
                axes[1].arrow(0, 0, -2*np.sin(0.5*np.pi * steers[0,0]), 2*np.cos(0.5*np.pi * steers[0,0]), fc='b', ec='b')

                #normalize for color reasons
                # probcoll -= probcoll.min()
                # probcoll /= probcoll.max()
                for l, p in zip(lines, probcoll):
                    l.set_color(cm.viridis(1 - p))
                    l.set_markerfacecolor(cm.viridis(1 - p))

                axes[1].set_xlim(xlim)
                axes[1].set_ylim(ylim)
                axes[1].set_aspect('equal')

                axes[1].set_title('steer {0:.3f}, probcoll in [{1:.2f}, {2:.2f}]'.format(-steers[0, 0], min_probcoll, max_probcoll))

                f.savefig(os.path.join(plot_folder, 'rollout_{0:03d}_t_{1:03d}.png'.format(i, j)),
                          bbox_inches='tight', dpi=200)
                # break
            # break

        plt.close(f)

        tf_sess.close()
Пример #5
0
 def statistics(self):
     return ReplayPool.statistics_pools(self._replay_pools)
Пример #6
0
 def get_recent_paths(self):
     return ReplayPool.get_recent_paths_pools(self._replay_pools)
Пример #7
0
 def log(self, prefix=''):
     ReplayPool.log_pools(self._replay_pools, prefix=prefix)
Пример #8
0
 def sample(self, batch_size):
     return ReplayPool.sample_pools(self._replay_pools, batch_size,
                                    only_completed_episodes=self._policy.only_completed_episodes)
Пример #9
0
class GatherMPCData(object):
    def __init__(self, env, steps, save_file, H, K):
        self._env = env
        self._steps = steps
        self._save_file = save_file
        self._H = H
        self._K = K

        # get starting positions
        grd = GatherRandomData(env, int(100*steps), '/tmp/tmp.pkl')
        grd.run()

        rollouts = mypickle.load('/tmp/tmp.pkl')['rollouts']
        self._start_poses = []
        while len(self._start_poses) < steps:
            rollout = random.choice(rollouts)
            env_info = random.choice(rollout['env_infos'])
            if 'pos' not in env_info:
                env_info = rollout['env_infos'][0]
            self._start_poses.append((env_info['pos'], env_info['hpr']))

        self._replay_pool = ReplayPool(self._env.spec,
                                       self._env.horizon,
                                       1,
                                       1,
                                       int(1e5),
                                       obs_history_len=1,
                                       sampling_method='uniform',
                                       save_rollouts=True,
                                       save_rollouts_observations=True,
                                       save_env_infos=True,
                                       replay_pool_params={})

        self._action_sequences = []
        for _ in range(self._K):
            self._action_sequences.append([self._env.action_space.sample() for _ in range(self._H)])
        self._action_sequences = np.array(self._action_sequences)

    def run(self):
        rollouts = []
        step = 0
        for start_pose in self._start_poses:
            if len(rollouts) > self._steps:
                break

            rollouts_i = []
            for action_sequence in self._action_sequences:
                curr_obs, curr_goals = self._env.reset(start_pose[0], start_pose[1])
                for action in action_sequence:
                    self._replay_pool.store_observation(step, curr_obs)
                    next_obs, goal, reward, done, env_info = self._env.step(action)
                    self._replay_pool.store_effect(action, reward, done, env_info, np.nan, np.nan)
                    step += 1
                    if done:
                        break
                    curr_obs = next_obs
                else:
                    self._replay_pool.force_done()

                rollout = self._replay_pool.get_recent_paths()
                assert(len(rollout) == 1)
                rollout = rollout[0]
                rollouts_i.append(rollout)

            lengths = [len(r['dones']) for r in rollouts_i]
            print('length: min {0:.1f}, mean {1:.1f}, max {2:.1f}'.format(np.min(lengths), np.mean(lengths), np.max(lengths)))
            rollouts.append(rollouts_i)

        mypickle.dump({'rollouts': rollouts}, self._save_file)
Пример #10
0
 def __init__(self, num_bootstraps, **kwargs):
     self._num_bootstraps = num_bootstraps
     self._replay_pools = [
         ReplayPool(**kwargs) for _ in range(self._num_bootstraps)
     ]