def _forward_sample_open_loop(self): """ Sampling method. Open loop forward sampling from demonstrations. Starts by sampling from states near the beginning of the demonstrations. Increases the window forwards as the number of calls to this sampling method increases at a fixed rate. """ # get a random episode index ep_ind = random.choice(self.demo_list) # sample uniformly in a window that grows forwards from the beginning of the demos states = self.demo_file["data/{}/states".format(ep_ind)].value eps_len = states.shape[0] index = np.random.randint(0, min(self.open_loop_window_size, eps_len)) state = states[index] # increase window size at a fixed frequency (open loop) self.demo_sampled += 1 if self.demo_sampled >= self.open_loop_increment_freq: if self.open_loop_window_size < eps_len: self.open_loop_window_size += self.open_loop_window_increment self.demo_sampled = 0 if self.need_xml: model_xml = self._xml_for_episode_index(ep_ind) xml = postprocess_model_xml(model_xml) return state, xml return state
def load_episode_xml(self, demo_num): """ Loads demo episode with specified @demo_num into the simulator. Args: demo_num (int): Demonstration number to load """ # Grab raw xml file ep = self.demos[demo_num] model_xml = self.f[f"data/{ep}"].attrs["model_file"] # Reset environment self.env.reset() xml = postprocess_model_xml(model_xml) xml = self.modify_xml_for_camera_movement(xml, camera_name=self.camera) self.env.reset_from_xml_string(xml) self.env.sim.reset() # Update camera info self.camera_id = self.env.sim.model.camera_name2id(self.camera) # Load states and actions self.states = self.f[f"data/{ep}/states"].value self.actions = np.array(self.f[f"data/{ep}/actions"].value) # Set initial state self.env.sim.set_state_from_flattened(self.states[0]) # Reset step count and set current episode number self.step = 0 self.n_steps = len(self.actions) self.current_ep = demo_num # Notify user of loaded episode print(f"Loaded episode {demo_num}.")
def render(args, f, env): demos = list(f["data"].keys()) for key in tqdm.tqdm(demos): # read the model xml, using the metadata stored in the attribute for this episode model_file = f["data/{}".format(key)].attrs["model_file"] model_path = os.path.join(args.demo_folder, "models", model_file) with open(model_path, "r") as model_f: model_xml = model_f.read() env.reset() xml = postprocess_model_xml(model_xml) env.reset_from_xml_string(xml) env.sim.reset() # load + subsample data states, _ = FixedFreqSubsampler(n_skip=args.skip_frame)(f["data/{}/states".format(key)].value) d_pos, _ = FixedFreqSubsampler(n_skip=args.skip_frame, aggregator=SumAggregator()) \ (f["data/{}/right_dpos".format(key)].value, aggregate=True) d_quat, _ = FixedFreqSubsampler(n_skip=args.skip_frame, aggregator=QuaternionAggregator()) \ (f["data/{}/right_dquat".format(key)].value, aggregate=True) gripper_actuation, _ = FixedFreqSubsampler(n_skip=args.skip_frame)(f["data/{}/gripper_actuations".format(key)].value) joint_velocities, _ = FixedFreqSubsampler(n_skip=args.skip_frame, aggregator=SumAggregator()) \ (f["data/{}/joint_velocities".format(key)].value, aggregate=True) n_steps = states.shape[0] if args.target_length is not None and n_steps > args.target_length: continue # force the sequence of internal mujoco states one by one frames = [] for i, state in enumerate(states): env.sim.set_state_from_flattened(state) env.sim.forward() obs = env._get_observation() frame = obs["image"][::-1] frames.append(frame) frames = np.stack(frames, axis=0) actions = np.concatenate((d_pos, d_quat, gripper_actuation), axis=-1) pad_mask = np.ones((n_steps,)) if n_steps == args.target_length \ else np.concatenate((np.ones((n_steps,)), np.zeros((args.target_length - n_steps,)))) h5_path = os.path.join(args.output_path, "seq_{}.h5".format(key)) with h5py.File(h5_path, 'w') as F: F['traj_per_file'] = 1 F["traj0/images"] = frames F["traj0/actions"] = actions F["traj0/states"] = states F["traj0/pad_mask"] = pad_mask F["traj0/joint_velocities"] = joint_velocities
def _uniform_sample_half(self): """ ## Uniformly sample states from first half of demo. """ # get a random episode index ep_ind = random.choice(self.demo_list) # select a flattened mujoco state uniformly from this episode states = self.demo_file["data/{}/states".format(ep_ind)].value state = random.choice(states[0:len(states) // 2]) if self.need_xml: model_xml = self._xml_for_episode_index(ep_ind) xml = postprocess_model_xml(model_xml) return state, xml return state
def _uniform_sample(self): """ Sampling method. First uniformly sample a demonstration from the set of demonstrations. Then uniformly sample a state from the selected demonstration. """ # get a random episode index ep_ind = random.choice(self.demo_list) # select a flattened mujoco state uniformly from this episode states = self.demo_file["data/{}/states".format(ep_ind)].value state = random.choice(states) if self.need_xml: model_xml = self._xml_for_episode_index(ep_ind) xml = postprocess_model_xml(model_xml) return state, xml return state
def gen_gifs(args, f, env, target_length=None): demos = list(f["data"].keys()) for key in tqdm.tqdm(demos): # load the flattened mujoco states states = f["data/{}/states".format(key)].value seq_length = steps2length(states.shape[0]) if target_length is not None and np.abs(seq_length - target_length) > 1: continue # read the model xml, using the metadata stored in the attribute for this episode model_file = f["data/{}".format(key)].attrs["model_file"] model_path = os.path.join(args.demo_folder, "models", model_file) with open(model_path, "r") as model_f: model_xml = model_f.read() env.reset() xml = postprocess_model_xml(model_xml) env.reset_from_xml_string(xml) env.sim.reset() # force the sequence of internal mujoco states one by one frames = [] for i in tqdm.tqdm(range(states.shape[0])): if i % args.skip_frame == 0: env.sim.set_state_from_flattened(states[i]) env.sim.forward() obs = env._get_observation() frame = obs["image"][::-1] frames.append(frame) frames = np.stack(frames, axis=0) fig_file_name = os.path.join(args.output_path, "seq_{}".format(key)) if target_length is not None: fig_file_name += "_len_{}".format(target_length) save_gif(fig_file_name + ".gif", frames, fps=15) if target_length is not None: return True return False if target_length is not None else True
def _reverse_sample_open_loop(self): """ Sampling method. Open loop reverse sampling from demonstrations. Starts by sampling from states near the end of the demonstrations. Increases the window backwards as the number of calls to this sampling method increases at a fixed rate. Returns: np.array or 2-tuple: If np.array, is the state sampled from a demo file. If 2-tuple, additionally includes the model xml file """ # get a random episode index ep_ind = random.choice(self.demo_list) # sample uniformly in a window that grows backwards from the end of the demos states = self.demo_file["data/{}/states".format(ep_ind)][()] eps_len = states.shape[0] index = np.random.randint(max(eps_len - self.open_loop_window_size, 0), eps_len) state = states[index] # increase window size at a fixed frequency (open loop) self.demo_sampled += 1 if self.demo_sampled >= self.open_loop_increment_freq: if self.open_loop_window_size < eps_len: self.open_loop_window_size += self.open_loop_window_increment self.demo_sampled = 0 if self.need_xml: model_xml = self._xml_for_episode_index(ep_ind) xml = postprocess_model_xml(model_xml) return state, xml return state
def _uniform_sample(self): """ Sampling method. First uniformly sample a demonstration from the set of demonstrations. Then uniformly sample a state from the selected demonstration. Returns: np.array or 2-tuple: If np.array, is the state sampled from a demo file. If 2-tuple, additionally includes the model xml file """ # get a random episode index ep_ind = random.choice(self.demo_list) # select a flattened mujoco state uniformly from this episode states = self.demo_file["data/{}/states".format(ep_ind)][()] state = random.choice(states) if self.need_xml: model_xml = self._xml_for_episode_index(ep_ind) xml = postprocess_model_xml(model_xml) return state, xml return state
def _load_raw_data(self, itr): """ doing the reverse of save_raw_data :param itr: :return: """ traj = self._start_goal_confs + 'itr.hdf5' self._demo_images = demo_images.astype(np.float32) / 255. self._goal_image = self._demo_images[-1] with open('{}/obs_dict.pkl'.format(traj), 'rb') as file: obs_dict.update(pkl.load(file)) self._goal = self.env.get_goal_from_obs(obs_dict) reset_state = self.get_reset_state(obs_dict) if os.path.exists(traj + '/robosuite.xml'): with open(traj + '/robosuite.xml', "r") as model_f: model_xml = model_f.read() xml = postprocess_model_xml(model_xml) reset_state['robosuite_xml'] = xml return reset_state
writer = imageio.get_writer('./test.mp4', fps=20) while True: print("Playing back random episode... (press ESC to quit)") # # select an episode randomly ep = random.choice(demos) # read the model xml, using the metadata stored in the attribute for this episode model_file = f["data/{}".format(ep)].attrs["model_file"] model_path = os.path.join(demo_path, "models", model_file) with open(model_path, "r") as model_f: model_xml = model_f.read() env.reset() xml = postprocess_model_xml(model_xml) env.reset_from_xml_string(xml) env.sim.reset() env.viewer.set_camera(0) # load the flattened mujoco states states = f["data/{}/states".format(ep)].value if args.use_actions: # load the initial state env.sim.set_state_from_flattened(states[0]) if not args.visualize_gripper: # We make the gripper site invisible robot = env.robots[0] env.sim.model.site_rgba[robot.eef_site_id] = np.zeros(4)
def _load_raw_data(self, itr): """ doing the reverse of save_raw_data :param itr: :return: """ if 'robot_name' in self._hp.env[ 1]: # robot experiments don't have a reset state return None if 'iex' in self._hp: itr = self._hp.iex ngroup = 1000 igrp = itr // ngroup group_folder = '{}/traj_group{}'.format(self._start_goal_confs, igrp) traj_folder = group_folder + '/traj{}'.format(itr) print('reading from: ', traj_folder) num_files = len(glob.glob("{}/images0/*.png".format(traj_folder))) assert num_files > 0, " no files found!" obs_dict = {} demo_images = np.zeros([ num_files, self.ncam, self._hp.image_height, self._hp.image_width, 3 ]) for t in [0, num_files - 1]: #range(num_files): for c in range(self.ncam): image_file = '{}/images{}/im_{}.png'.format(traj_folder, c, t) if not os.path.isfile(image_file): raise ValueError( "Can't find goal image: {}".format(image_file)) img = cv2.imread(image_file)[..., ::-1] if img.shape[0] != self._hp.image_height or img.shape[ 1] != self._hp.image_width: img = Image.fromarray(img) img = img.resize( (self._hp.image_height, self._hp.image_width), PIL.Image.BILINEAR) img = np.asarray(img, dtype=np.uint8) demo_images[t, c] = img self._demo_images = demo_images.astype(np.float32) / 255. self._goal_image = self._demo_images[-1] with open('{}/obs_dict.pkl'.format(traj_folder), 'rb') as file: obs_dict.update(pkl.load(file)) self._goal = self.env.get_goal_from_obs(obs_dict) reset_state = self.get_reset_state(obs_dict) if os.path.exists(traj_folder + '/robosuite.xml'): with open(traj_folder + '/robosuite.xml', "r") as model_f: model_xml = model_f.read() from robosuite.utils.mjcf_utils import postprocess_model_xml xml = postprocess_model_xml(model_xml) reset_state['robosuite_xml'] = xml return reset_state
def extract_states_for_episode_id(self, demo_id): """ Extracts states and actions from a particular demonstration episode and writes it to the output hdf5 file. Args: demo_id (int): An episode index to extract states and actions for. """ ep = self.demos[demo_id] ep_data_grp = self.f_sars_grp.create_group("demo_{}".format(demo_id)) # read the model xml, using the metadata stored in the attribute for this episode model_file = self.f["data/{}".format(ep)].attrs["model_file"] model_path = os.path.join(self.demo_path, "models", model_file) with open(model_path, "r") as model_f: model_xml = model_f.read() # also store the location of model xml in the new hdf5 file ep_data_grp.attrs["model_file"] = model_file self.env.reset() xml = postprocess_model_xml(model_xml) self.env.reset_from_xml_string(xml) self.env.sim.reset() # load the flattened mujoco states and the actions in the environment states = self.f["data/{}/states".format(ep)][()] jvels = self.f["data/{}/joint_velocities".format(ep)][()] gripper_acts = self.f["data/{}/gripper_actuations".format(ep)][()] goal = np.array(self.f["data/{}".format(ep)].attrs["goal"]) # assert gripper_acts.shape[1] == 1, 'gripper_acts shape: {}'.format(gripper_acts.shape) prev_obs = None prev_ac = None prev_rew = None prev_done = None prev_state = None ep_obs = [] ep_acts = [] ep_rews = [] ep_next_obs = [] ep_dones = [] ep_states = [] if self.use_images: prev_image = None ep_images = [] ep_next_images = [] # prev_eef_pos = None # prev_eef_rot = None # force the sequence of internal mujoco states one by one for t in range(len(states)): # self.env.sim.reset() self.env.sim.set_state_from_flattened(states[t]) self.env.sim.forward() # make teleop visualization site colors transparent self.env.sim.model.site_rgba[self.env.eef_site_id] = np.array([0., 0., 0., 0.]) self.env.sim.model.site_rgba[self.env.eef_cylinder_id] = np.array([0., 0., 0., 0.]) obs_dict = self.env._get_observation() if self.use_images or self.robot_only: obs = np.array(obs_dict["robot-state"]) elif self.object_only: obs = np.array(obs_dict["object-state"]) else: obs = np.concatenate([obs_dict["robot-state"], obs_dict["object-state"]]) action = np.concatenate([jvels[t], gripper_acts[t]]) if self.use_images: image = np.array(obs_dict["image"])[::-1] # NOTE: ours tasks use reward r(s'), reward AFTER transition, so this is # the reward for the previous timestep prev_rew = self.env.reward(None) prev_done = 0 if self.env._check_success(): prev_done = 1 # skips the first iteration if prev_obs is not None: # record (s, a) from last iteration and s' from this one ep_obs.append(prev_obs) ep_acts.append(prev_ac) ep_rews.append(prev_rew) ep_next_obs.append(obs) ep_dones.append(prev_done) ep_states.append(prev_state) if self.use_images: ep_images.append(prev_image) ep_next_images.append(image) # im = Image.fromarray(prev_image) # path = pjoin("tmp", "img%06d.png"%t) # im.save(path) prev_obs = np.array(obs) prev_ac = np.array(action) prev_state = np.array(states[t]) if self.use_images: prev_image = np.array(image) # if len(states) == len(jvels): assert(len(states) == len(jvels)) # play the last action to get one more additional data point. # this might be critical if only the last state got a reward in # the sparse reward setting. # self.env.sim.reset() self.env.sim.set_state_from_flattened(states[-1]) self.env.sim.forward() # make teleop visualization site colors transparent self.env.sim.model.site_rgba[self.env.eef_site_id] = np.array([0., 0., 0., 0.]) self.env.sim.model.site_rgba[self.env.eef_cylinder_id] = np.array([0., 0., 0., 0.]) obs_dict = self.env._get_observation() if self.use_images or self.robot_only: obs = np.array(obs_dict["robot-state"]) elif self.object_only: obs = np.array(obs_dict["object-state"]) else: obs = np.concatenate([obs_dict["robot-state"], obs_dict["object-state"]]) action = np.concatenate([jvels[-1], gripper_acts[-1]]) if self.use_images: image = np.array(obs_dict["image"])[::-1] self.env.step(action) reward = self.env.reward(None) done = 0 if self.env._check_success(): done = 1 # ensure consistency from loop above assert(np.array_equal(prev_obs, obs)) if not self.use_eef_actions: assert(np.array_equal(prev_ac, action)) # assert(done == 1) obs_dict = self.env._get_observation() if self.use_images or self.robot_only: next_obs = np.array(obs_dict["robot-state"]) elif self.object_only: next_obs = np.array(obs_dict["object-state"]) else: next_obs = np.concatenate([obs_dict["robot-state"], obs_dict["object-state"]]) if self.use_images: next_image = np.array(obs_dict["image"])[::-1] ep_obs.append(obs) ep_acts.append(action) ep_rews.append(reward) ep_next_obs.append(next_obs) ep_dones.append(done) ep_states.append(states[-1]) if self.use_images: ep_images.append(image) ep_next_images.append(next_image) if self.use_eef_actions: # re-run through transitions to rewrite actions as end effector actions, and # downsample the frames ep_eef_obs = [] ep_eef_acts = [] ep_eef_rews = [] ep_eef_next_obs = [] ep_eef_dones = [] ep_eef_states = [] if self.use_images: ep_eef_images = [] ep_eef_next_images = [] self.env.sim.reset() self.env.sim.set_state_from_flattened(ep_states[0]) self.env.sim.forward() # make teleop visualization site colors transparent self.env.sim.model.site_rgba[self.env.eef_site_id] = np.array([0., 0., 0., 0.]) self.env.sim.model.site_rgba[self.env.eef_cylinder_id] = np.array([0., 0., 0., 0.]) obs_dict = self.env._get_observation() prev_eef_pos = np.array(obs_dict["eef_pos"]) prev_eef_rot = T.quat2mat(obs_dict["eef_quat"]) prev_obs = np.array(ep_obs[0]) prev_done = ep_dones[0] if self.use_images: prev_image = np.array(obs_dict["image"])[::-1] loop_cnt = self.eef_downsample num_transitions = len(ep_obs) while True: #self.env.sim.reset() self.env.sim.set_state_from_flattened(ep_states[loop_cnt]) self.env.sim.forward() # make teleop visualization site colors transparent self.env.sim.model.site_rgba[self.env.eef_site_id] = np.array([0., 0., 0., 0.]) self.env.sim.model.site_rgba[self.env.eef_cylinder_id] = np.array([0., 0., 0., 0.]) obs_dict = self.env._get_observation() cur_eef_pos = np.array(obs_dict["eef_pos"]) cur_eef_rot = T.quat2mat(obs_dict["eef_quat"]) dpos = cur_eef_pos - prev_eef_pos drotation = prev_eef_rot.T.dot(cur_eef_rot) dquat = T.mat2quat(drotation) # current timestep lets us compute delta pose from last downsampled timestep to get action for last timestep # but take gripper action from last downsapled timestep prev_eef_action = np.concatenate([dpos, dquat, ep_acts[loop_cnt - self.eef_downsample][-1:]]) # take reward from the immediate prior timestep, since rewards at s depend on s' prev_rew = ep_rews[loop_cnt - 1] cur_obs = np.array(ep_obs[loop_cnt]) if self.use_images: cur_image = np.array(ep_images[loop_cnt]) ep_eef_obs.append(prev_obs) ep_eef_acts.append(prev_eef_action) ep_eef_rews.append(prev_rew) ep_eef_next_obs.append(cur_obs) ep_eef_dones.append(prev_done) if self.use_images: ep_eef_images.append(prev_image) ep_eef_next_images.append(cur_image) prev_obs = np.array(cur_obs) prev_done = ep_dones[loop_cnt] prev_eef_pos = np.array(cur_eef_pos) prev_eef_rot = np.array(cur_eef_rot) if self.use_images: prev_image = np.array(cur_image) loop_cnt += self.eef_downsample if loop_cnt > num_transitions - 1: if loop_cnt - self.eef_downsample != num_transitions - 1: # back track to the last point to make sure we add it loop_cnt = num_transitions - 1 else: break # overwrite prior variables ep_obs = ep_eef_obs ep_acts = ep_eef_acts ep_rews = ep_eef_rews ep_next_obs = ep_eef_next_obs ep_dones = ep_eef_dones if self.use_images: ep_images = ep_eef_images ep_next_images = ep_eef_next_images # write datasets for states and actions ep_data_grp.create_dataset("obs", data=np.array(ep_obs)) ep_data_grp.create_dataset("actions", data=np.array(ep_acts)) ep_data_grp.create_dataset("rewards", data=np.array(ep_rews)) ep_data_grp.create_dataset("next_obs", data=np.array(ep_next_obs)) ep_data_grp.create_dataset("dones", data=np.array(ep_dones)) ep_data_grp.create_dataset("states", data=np.array(ep_states)) ep_data_grp.attrs["goal"] = goal if self.use_images: ep_data_grp.create_dataset("images", data=np.array(ep_images)) ep_data_grp.create_dataset("next_images", data=np.array(ep_next_images)) # write some metadata ep_data_grp.attrs["num_samples"] = len(ep_obs) # number of transitions in this episode print("ep {}: wrote {} transitions".format(demo_id, ep_data_grp.attrs["num_samples"])) return ep_data_grp.attrs["num_samples"]