def __init__(self, policy, env, n_envs, replay_pool_size, max_path_length, sampling_method, save_rollouts=False, save_rollouts_observations=True, save_env_infos=False, env_str=None, replay_pool_params={}): self._policy = policy self._n_envs = n_envs assert(self._n_envs == 1) # b/c policy reset self._replay_pools = [ReplayPool(env.spec, env.horizon, policy.N, policy.gamma, replay_pool_size // n_envs, obs_history_len=policy.obs_history_len, sampling_method=sampling_method, save_rollouts=save_rollouts, save_rollouts_observations=save_rollouts_observations, save_env_infos=save_env_infos, replay_pool_params=replay_pool_params) for _ in range(n_envs)] if self._n_envs == 1: envs = [env] else: try: envs = [pickle.loads(pickle.dumps(env)) for _ in range(self._n_envs)] if self._n_envs > 1 else [env] except: envs = [create_env(env_str) for _ in range(self._n_envs)] if self._n_envs > 1 else [env] ### need to seed each environment if it is GymEnv # TODO: set seed self._vec_env = VecEnvExecutor( envs=envs, max_path_length=max_path_length )
def _load_data(self, folder): """ Loads all .pkl files that can be found recursively from this folder """ assert(os.path.exists(folder)) rollouts = [] num_load_success, num_load_fail = 0, 0 for fname in glob.iglob('{0}/**/*.pkl'.format(folder), recursive=True): try: rollouts += mypickle.load(fname)['rollouts'] num_load_success += 1 except: num_load_fail += 1 logger.info('Files successfully loaded: {0:.2f}%'.format(100. * num_load_success / float(num_load_success + num_load_fail))) replay_pool = ReplayPool( env_spec=self._env.spec, env_horizon=self._env.horizon, N=self._model.N, gamma=self._model.gamma, size=int(1.1 * sum([len(r['dones']) for r in rollouts])), obs_history_len=self._model.obs_history_len, sampling_method='uniform', save_rollouts=False, save_rollouts_observations=False, save_env_infos=True, replay_pool_params={} ) curr_len = 0 for rollout in rollouts: replay_pool.store_rollout(curr_len, rollout) curr_len += len(rollout['dones']) return replay_pool
def __init__(self, env, steps, save_file, H, K): self._env = env self._steps = steps self._save_file = save_file self._H = H self._K = K # get starting positions grd = GatherRandomData(env, int(100*steps), '/tmp/tmp.pkl') grd.run() rollouts = mypickle.load('/tmp/tmp.pkl')['rollouts'] self._start_poses = [] while len(self._start_poses) < steps: rollout = random.choice(rollouts) env_info = random.choice(rollout['env_infos']) if 'pos' not in env_info: env_info = rollout['env_infos'][0] self._start_poses.append((env_info['pos'], env_info['hpr'])) self._replay_pool = ReplayPool(self._env.spec, self._env.horizon, 1, 1, int(1e5), obs_history_len=1, sampling_method='uniform', save_rollouts=True, save_rollouts_observations=True, save_env_infos=True, replay_pool_params={}) self._action_sequences = [] for _ in range(self._K): self._action_sequences.append([self._env.action_space.sample() for _ in range(self._H)]) self._action_sequences = np.array(self._action_sequences)
def plot_rw_rccar_var001_var016(ckpt_itr=None): label_params =[['exp', ('exp_name',)]] experiment_groups = [ ExperimentGroup(os.path.join(DATA_DIR, 'rw_rccar/var{0:03d}'.format(i)), label_params=label_params, plot={ }) for i in [19] ] mec = MultiExperimentComparison(experiment_groups) exps = mec.list ### plot length of the rollouts # f, axes = plt.subplots(1, len(exps), figsize=(32, 6), sharex=True, sharey=True) # for i, (exp, ax) in enumerate(zip(exps, axes)): # rollouts = list(itertools.chain(*exp.train_rollouts)) # lengths = [len(r['dones']) for r in rollouts][:16] # assert (len(lengths) == 16) # steps = np.arange(len(lengths)) # # label = '{0}, height: {1}, color: {2}, H: {3}'.format( # exp.name, # exp.params['alg']['env'].split("'obs_shape': (")[1].split(',')[0], # exp.params['alg']['env'].split(',')[-1].split(')})')[0], # exp.params['policy']['H'] # ) # # ax.scatter(steps, lengths, color=cm.magma(i / len(exps))) # ax.set_title(label) # ax.legend() # # plt.tight_layout() # f.savefig('plots/rw-rccar/var001_016.png', bbox_inches='tight', dpi=100) ### plot policy on the rollouts # candidate actions for exp in exps: rollouts = mypickle.load(os.path.join(exp.folder, 'rosbag_rollouts_00.pkl'))['rollouts'] rollouts = rollouts[::len(rollouts) // 16] # rollouts = list(itertools.chain(*exp.train_rollouts))[:16] tf_sess, tf_graph = GCGPolicy.create_session_and_graph(gpu_device=1, gpu_frac=0.6) with tf_sess.as_default(), tf_graph.as_default(): exp.create_env() exp.create_policy() exp_ckpt_itr = exp.restore_policy(itr=ckpt_itr) K = 2048 actions = np.random.uniform(*exp.env.action_space.bounds, size=(K, exp.policy.H + 1, exp.env.action_space.flat_dim)) replay_pool = ReplayPool( env_spec=exp.env.spec, env_horizon=exp.env.horizon, N=exp.policy.N, gamma=1, size=int(1.1 * sum([len(r['dones']) for r in rollouts])), obs_history_len=exp.policy.obs_history_len, sampling_method='uniform' ) step = 0 outputs = [] for i, r in enumerate(rollouts): r_len = len(r['dones']) outputs_i = [] for j in range(r_len): # evaluate and get output observation = (r['observations'][j][0], np.empty([exp.policy.obs_history_len, 0])) replay_pool.store_observation(step, observation) encoded_observation = replay_pool.encode_recent_observation() observation_im, observation_vec = encoded_observation observations = (np.tile(observation_im, (K, 1, 1)), np.tile(observation_vec, (K, 1, 1))) probcolls = exp.policy.get_model_outputs(observations, actions) outputs_i.append(probcolls) step += 1 replay_pool.store_effect( r['actions'][j], r['rewards'][j], r['dones'][j], None, r['est_values'][j], r['logprobs'][j] ) outputs.append(outputs_i) f, axes = plt.subplots(1, 2, figsize=(12, 8)) imshow = None plot_folder = os.path.join(exp.folder, 'plot', 'ckpt_{0:03d}'.format(exp_ckpt_itr)) os.makedirs(plot_folder, exist_ok=True) for i, (r_i, output_i) in enumerate(zip(rollouts, outputs)): for j, (obs, cost) in enumerate(zip(r_i['observations'], output_i)): obs_im, obs_vec = obs probcoll = -cost # plot image im = np.reshape(obs_im, exp.env.observation_im_space.shape) is_gray = (im.shape[-1] == 1) if is_gray: im = im[:, :, 0] color = 'Greys_r' else: color=None if imshow is None: imshow = axes[0].imshow(im, cmap=color) else: imshow.set_data(im) # plot probcolls steers = actions[:, :-1, 0] angle_const = 0.5 * np.pi / 2. angles = angle_const * steers ys = np.cumsum(np.cos(angles), axis=1) xs = np.cumsum(-np.sin(angles), axis=1) sort_idxs = np.argsort(probcoll) xlim = (min(xs.min(), 0), max(xs.max(), 0)) ylim = (min(ys.min(), -0.5), max(ys.max(), 0.5)) min_probcoll = probcoll.min() max_probcoll = probcoll.max() keep = 10 ys = ys[sort_idxs][::K//keep] xs = xs[sort_idxs][::K//keep] probcoll = probcoll[sort_idxs][::K//keep] steers = steers[sort_idxs][::K//keep] ys = np.hstack((np.zeros((len(ys), 1)), ys)) xs = np.hstack((np.zeros((len(xs), 1)), xs)) # if lines is None: axes[1].cla() axes[1].plot(0, 0, 'rx', markersize=10) # lines = axes[1].plot(np.expand_dims(xs[:,-1], 0), np.expand_dims(ys[:,-1], 0), # marker='o', linestyle='', markersize=2) lines = axes[1].plot(xs.T, ys.T) axes[1].plot(xs[0,:], ys[0,:], 'b^', linestyle='', markersize=5) axes[1].arrow(0, 0, -2*np.sin(0.5*np.pi * steers[0,0]), 2*np.cos(0.5*np.pi * steers[0,0]), fc='b', ec='b') #normalize for color reasons # probcoll -= probcoll.min() # probcoll /= probcoll.max() for l, p in zip(lines, probcoll): l.set_color(cm.viridis(1 - p)) l.set_markerfacecolor(cm.viridis(1 - p)) axes[1].set_xlim(xlim) axes[1].set_ylim(ylim) axes[1].set_aspect('equal') axes[1].set_title('steer {0:.3f}, probcoll in [{1:.2f}, {2:.2f}]'.format(-steers[0, 0], min_probcoll, max_probcoll)) f.savefig(os.path.join(plot_folder, 'rollout_{0:03d}_t_{1:03d}.png'.format(i, j)), bbox_inches='tight', dpi=200) # break # break plt.close(f) tf_sess.close()
def statistics(self): return ReplayPool.statistics_pools(self._replay_pools)
def get_recent_paths(self): return ReplayPool.get_recent_paths_pools(self._replay_pools)
def log(self, prefix=''): ReplayPool.log_pools(self._replay_pools, prefix=prefix)
def sample(self, batch_size): return ReplayPool.sample_pools(self._replay_pools, batch_size, only_completed_episodes=self._policy.only_completed_episodes)
class GatherMPCData(object): def __init__(self, env, steps, save_file, H, K): self._env = env self._steps = steps self._save_file = save_file self._H = H self._K = K # get starting positions grd = GatherRandomData(env, int(100*steps), '/tmp/tmp.pkl') grd.run() rollouts = mypickle.load('/tmp/tmp.pkl')['rollouts'] self._start_poses = [] while len(self._start_poses) < steps: rollout = random.choice(rollouts) env_info = random.choice(rollout['env_infos']) if 'pos' not in env_info: env_info = rollout['env_infos'][0] self._start_poses.append((env_info['pos'], env_info['hpr'])) self._replay_pool = ReplayPool(self._env.spec, self._env.horizon, 1, 1, int(1e5), obs_history_len=1, sampling_method='uniform', save_rollouts=True, save_rollouts_observations=True, save_env_infos=True, replay_pool_params={}) self._action_sequences = [] for _ in range(self._K): self._action_sequences.append([self._env.action_space.sample() for _ in range(self._H)]) self._action_sequences = np.array(self._action_sequences) def run(self): rollouts = [] step = 0 for start_pose in self._start_poses: if len(rollouts) > self._steps: break rollouts_i = [] for action_sequence in self._action_sequences: curr_obs, curr_goals = self._env.reset(start_pose[0], start_pose[1]) for action in action_sequence: self._replay_pool.store_observation(step, curr_obs) next_obs, goal, reward, done, env_info = self._env.step(action) self._replay_pool.store_effect(action, reward, done, env_info, np.nan, np.nan) step += 1 if done: break curr_obs = next_obs else: self._replay_pool.force_done() rollout = self._replay_pool.get_recent_paths() assert(len(rollout) == 1) rollout = rollout[0] rollouts_i.append(rollout) lengths = [len(r['dones']) for r in rollouts_i] print('length: min {0:.1f}, mean {1:.1f}, max {2:.1f}'.format(np.min(lengths), np.mean(lengths), np.max(lengths))) rollouts.append(rollouts_i) mypickle.dump({'rollouts': rollouts}, self._save_file)
def __init__(self, num_bootstraps, **kwargs): self._num_bootstraps = num_bootstraps self._replay_pools = [ ReplayPool(**kwargs) for _ in range(self._num_bootstraps) ]