def cam_poses_from_states(self, states): cam_pos = states[:, 9:12] cam_rot = states[:, 12:16] pos_variance = 0 rot_variance = 0 if self.use_pos_noise: pos_variance = self.params["noisy_pos_variance"] if self.use_rot_noise: rot_variance = self.params["noisy_rot_variance"] pose = Pose(cam_pos, cam_rot) if self.use_pos_noise or self.use_rot_noise: pose = get_noisy_poses_torch(pose, pos_variance, rot_variance, cuda=self.is_cuda, cuda_device=self.cuda_device) return pose
def unbatch(self, batch, halfway=False): # Inputs images = self.maybe_cuda(batch["images"][0]) seq_len = len(images) instructions = self.maybe_cuda(batch["instr"][0][:seq_len]) instr_lengths = batch["instr_len"][0][:seq_len] states = self.maybe_cuda(batch["states"][0]) if not halfway: plan_mask = batch["plan_mask"][ 0] # True for every timestep that we do visitation prediction firstseg_mask = batch["firstseg_mask"][ 0] # True for every timestep that is a new instruction segment # Labels (including for auxiliary losses) lm_pos_fpv = batch["lm_pos_fpv"][ 0] # All object 2D coordinates in the first-person image lm_pos_map_m = batch["lm_pos_map"][ 0] # All object 2D coordinates in the semantic map lm_indices = batch["lm_indices"][0] # All object class indices goal_pos_map_m = batch["goal_loc"][ 0] # Goal location in the world in meters_and_metrics lm_mentioned = batch["lm_mentioned"][ 0] # 1/0 labels whether object was mentioned/not mentioned in template instruction # TODO: We're taking the FIRST label here. SINGLE SEGMENT ASSUMPTION lang_lm_mentioned = batch["lang_lm_mentioned"][0][ 0] # integer labes as to which object was mentioned start_poses = batch["start_poses"][0] noisy_start_poses = get_noisy_poses_torch( start_poses.numpy(), self.params["pos_variance"], self.params["rot_variance"], cuda=False, cuda_device=None) # Ground truth visitation distributions (in start and global frames) v_dist_w_ground_truth_select = self.maybe_cuda( batch["traj_ground_truth"][0]) start_poses_select = self.batch_select.one( start_poses, plan_mask, v_dist_w_ground_truth_select.device) v_dist_s_ground_truth_select, poses_s = self.map_transform_w_to_s( v_dist_w_ground_truth_select, None, start_poses_select) #self.tensor_store.keep_inputs("v_dist_w_ground_truth_select", v_dist_w_ground_truth_select) self.tensor_store.keep_inputs("v_dist_s_ground_truth_select", v_dist_s_ground_truth_select) #Presenter().show_image(v_dist_s_ground_truth_select.detach().cpu()[0,0], "v_dist_s_ground_truth_select", waitkey=1, scale=4) #Presenter().show_image(v_dist_w_ground_truth_select.detach().cpu()[0,0], "v_dist_w_ground_truth_select", waitkey=1, scale=4) lm_pos_map_px = [ torch.from_numpy( transformations.pos_m_to_px(p.numpy(), self.params["global_map_size"], self.params["world_size_m"], self.params["world_size_px"])) if p is not None else None for p in lm_pos_map_m ] goal_pos_map_px = torch.from_numpy( transformations.pos_m_to_px(goal_pos_map_m.numpy(), self.params["global_map_size"], self.params["world_size_m"], self.params["world_size_px"])) resnet_factor = self.img_to_features_w.img_to_features.get_downscale_factor( ) lm_pos_fpv = [ self.cuda_var( (s / resnet_factor).long()) if s is not None else None for s in lm_pos_fpv ] lm_indices = [ self.cuda_var(s) if s is not None else None for s in lm_indices ] lm_mentioned = [ self.cuda_var(s) if s is not None else None for s in lm_mentioned ] lang_lm_mentioned = self.cuda_var(lang_lm_mentioned) lm_pos_map_px = [ self.cuda_var(s.long()) if s is not None else None for s in lm_pos_map_px ] goal_pos_map_px = self.cuda_var(goal_pos_map_px) self.tensor_store.keep_inputs("lm_pos_fpv", lm_pos_fpv) self.tensor_store.keep_inputs("lm_pos_map", lm_pos_map_px) self.tensor_store.keep_inputs("lm_indices", lm_indices) self.tensor_store.keep_inputs("lm_mentioned", lm_mentioned) self.tensor_store.keep_inputs("lang_lm_mentioned", lang_lm_mentioned) self.tensor_store.keep_inputs("goal_pos_map", goal_pos_map_px) lm_pos_map_select = [ lm_pos for i, lm_pos in enumerate(lm_pos_map_px) if plan_mask[i] ] lm_indices_select = [ lm_idx for i, lm_idx in enumerate(lm_indices) if plan_mask[i] ] lm_mentioned_select = [ lm_m for i, lm_m in enumerate(lm_mentioned) if plan_mask[i] ] goal_pos_map_select = [ pos for i, pos in enumerate(goal_pos_map_px) if plan_mask[i] ] self.tensor_store.keep_inputs("lm_pos_map_select", lm_pos_map_select) self.tensor_store.keep_inputs("lm_indices_select", lm_indices_select) self.tensor_store.keep_inputs("lm_mentioned_select", lm_mentioned_select) self.tensor_store.keep_inputs("goal_pos_map_select", goal_pos_map_select) # We won't need this extra information else: noisy_poses, start_poses, noisy_start_poses = None, None, None plan_mask, firstseg_mask = None, None metadata = batch["md"][0][0] env_id = metadata["env_id"] self.tensor_store.set_flag("env_id", env_id) return images, states, instructions, instr_lengths, plan_mask, firstseg_mask, start_poses, noisy_start_poses, metadata