Пример #1
0
    def _compute_relative_local_goals(self, agent_world_pose, M, s):
        """
        Converts a local goal (x, y) position in the map to egocentric coordinates
        relative to agent's current pose.
        """
        local_map_goals = self.states["curr_local_goals"]
        local_world_goals = convert_map2world(local_map_goals, (M, M), s)  # (bs, 2)
        # Concatenate dummy directions to goal
        local_world_goals = torch.cat(
            [
                local_world_goals,
                torch.zeros(self.nplanners, 1).to(agent_world_pose.device),
            ],
            dim=1,
        )
        relative_goals = subtract_pose(agent_world_pose, local_world_goals)[:, :2]

        return relative_goals
Пример #2
0
def map_update_fn(ps_args):
    # Unpack args
    mapper = ps_args[0]
    mapper_rollouts = ps_args[1]
    optimizer = ps_args[2]
    num_update_batches = ps_args[3]
    batch_size = ps_args[4]
    freeze_projection_unit = ps_args[5]
    bias_factor = ps_args[6]
    occupancy_anticipator_type = ps_args[7]
    pose_loss_coef = ps_args[8]
    max_grad_norm = ps_args[9]
    label_id = ps_args[10]

    # Perform update
    losses = {
        "total_loss": 0,
        "mapping_loss": 0,
        "trans_loss": 0,
        "rot_loss": 0,
    }

    if isinstance(mapper, nn.DataParallel):
        mapper_config = mapper.module.config
    else:
        mapper_config = mapper.config

    img_mean = mapper_config.NORMALIZATION.img_mean
    img_std = mapper_config.NORMALIZATION.img_std
    start_time = time.time()
    # Debugging
    map_update_profile = {"data_sampling": 0.0, "pytorch_update": 0.0}
    for i in range(num_update_batches):
        start_time_sample = time.time()
        observations = mapper_rollouts.sample(batch_size)
        map_update_profile["data_sampling"] += time.time() - start_time_sample
        # Labels
        # Pose labels
        start_time_pyt = time.time()
        device = observations["pose_gt_at_t_1"].device

        pose_gt_at_t_1 = observations["pose_gt_at_t_1"]
        pose_gt_at_t = observations["pose_gt_at_t"]
        pose_at_t_1 = observations["pose_at_t_1"]
        pose_at_t = observations["pose_at_t"]
        dpose_gt = subtract_pose(pose_gt_at_t_1, pose_gt_at_t)  # (bs, 3)
        dpose_noisy = subtract_pose(pose_at_t_1, pose_at_t)  # (bs, 3)
        ddpose_gt = subtract_pose(dpose_noisy, dpose_gt)

        # Map labels
        pt_gt = observations[f"{label_id}_at_t"]  # (bs, V, V, 2)
        pt_gt = rearrange(pt_gt, "b h w c -> b c h w")  # (bs, 2, V, V)

        # Forward pass
        mapper_inputs = observations
        mapper_outputs = mapper(mapper_inputs, method_name="predict_deltas")
        pt_hat = mapper_outputs["pt"]

        # Compute losses
        # -------- mapping loss ---------
        mapping_loss = simple_mapping_loss_fn(pt_hat, pt_gt)
        if freeze_projection_unit:
            mapping_loss = mapping_loss.detach()

        if occupancy_anticipator_type == "rgb_model_v2":
            ego_map_gt = observations["ego_map_gt_at_t"]  # (bs, V, V, 2)
            ego_map_gt = rearrange(ego_map_gt, "b h w c -> b c h w")
            ego_map_hat = mapper_outputs["all_pu_outputs"]["depth_proj_estimate"]
            mapping_loss = mapping_loss + simple_mapping_loss_fn(
                ego_map_hat, ego_map_gt
            )

        all_pose_outputs = mapper_outputs["all_pose_outputs"]
        if all_pose_outputs is None:
            pose_estimation_loss = torch.zeros([0]).to(device).sum()
            trans_loss = torch.zeros([0]).to(device).sum()
            rot_loss = torch.zeros([0]).to(device).sum()
        else:
            pose_outputs = all_pose_outputs["pose_outputs"]
            pose_estimation_loss, trans_loss, rot_loss = 0, 0, 0
            n_outputs = len(list(pose_outputs.keys()))
            # The pose prediction outputs are performed for individual modalities,
            # and then weighted-averaged according to an ensemble MLP.
            # Here, the loss is computed for each modality as well as the ensemble.
            # Finally, it is averaged across the modalities.
            pose_label = ddpose_gt
            for _, dpose_hat in pose_outputs.items():
                curr_pose_losses = pose_loss_fn(dpose_hat, pose_label)
                pose_estimation_loss = pose_estimation_loss + curr_pose_losses[0]
                trans_loss = trans_loss + curr_pose_losses[1]
                rot_loss = rot_loss + curr_pose_losses[2]
            pose_estimation_loss = pose_estimation_loss / n_outputs
            trans_loss = trans_loss / n_outputs
            rot_loss = rot_loss / n_outputs

        optimizer.zero_grad()
        total_loss = mapping_loss + pose_estimation_loss * pose_loss_coef

        # Backward pass
        total_loss.backward()

        # Update
        nn.utils.clip_grad_norm_(mapper.parameters(), max_grad_norm)
        optimizer.step()

        losses["total_loss"] += total_loss.item()
        losses["mapping_loss"] += mapping_loss.item()
        losses["trans_loss"] += trans_loss.item()
        losses["rot_loss"] += rot_loss.item()

        map_update_profile["pytorch_update"] += time.time() - start_time_pyt
        time_per_step = (time.time() - start_time) / (60 * (i + 1))

    losses["pose_loss"] = losses["trans_loss"] + losses["rot_loss"]
    for k in losses.keys():
        losses[k] /= num_update_batches

    return losses
Пример #3
0
 def predict_deltas(self, x, masks=None):
     # Transpose multichannel inputs
     st_1 = process_image(x["rgb_at_t_1"], self.img_mean_t, self.img_std_t)
     dt_1 = transpose_image(x["depth_at_t_1"])
     ego_map_gt_at_t_1 = transpose_image(x["ego_map_gt_at_t_1"])
     st = process_image(x["rgb_at_t"], self.img_mean_t, self.img_std_t)
     dt = transpose_image(x["depth_at_t"])
     ego_map_gt_at_t = transpose_image(x["ego_map_gt_at_t"])
     # This happens only for a baseline
     if ("ego_map_gt_anticipated_at_t_1" in x
             and x["ego_map_gt_anticipated_at_t_1"] is not None):
         ego_map_gt_anticipated_at_t_1 = transpose_image(
             x["ego_map_gt_anticipated_at_t_1"])
         ego_map_gt_anticipated_at_t = transpose_image(
             x["ego_map_gt_anticipated_at_t"])
     else:
         ego_map_gt_anticipated_at_t_1 = None
         ego_map_gt_anticipated_at_t = None
     # Compute past and current egocentric maps
     bs = st_1.size(0)
     pu_inputs_t_1 = {
         "rgb": st_1,
         "depth": dt_1,
         "ego_map_gt": ego_map_gt_at_t_1,
         "ego_map_gt_anticipated": ego_map_gt_anticipated_at_t_1,
     }
     pu_inputs_t = {
         "rgb": st,
         "depth": dt,
         "ego_map_gt": ego_map_gt_at_t,
         "ego_map_gt_anticipated": ego_map_gt_anticipated_at_t,
     }
     pu_inputs = self._safe_cat(pu_inputs_t_1, pu_inputs_t)
     pu_outputs = self.projection_unit(pu_inputs)
     pu_outputs_t = {k: v[bs:] for k, v in pu_outputs.items()}
     pt_1, pt = pu_outputs["occ_estimate"][:bs], pu_outputs["occ_estimate"][
         bs:]
     # Compute relative pose
     dx = subtract_pose(x["pose_at_t_1"], x["pose_at_t"])
     # Estimate pose
     dx_hat = dx
     xt_hat = x["pose_at_t"]
     all_pose_outputs = None
     if not self.config.ignore_pose_estimator:
         all_pose_outputs = {}
         pose_inputs = {}
         if "rgb" in self.config.pose_predictor_inputs:
             pose_inputs["rgb_t_1"] = st_1
             pose_inputs["rgb_t"] = st
         if "depth" in self.config.pose_predictor_inputs:
             pose_inputs["depth_t_1"] = dt_1
             pose_inputs["depth_t"] = dt
         if "ego_map" in self.config.pose_predictor_inputs:
             pose_inputs["ego_map_t_1"] = pt_1
             pose_inputs["ego_map_t"] = pt
         if self.config.detach_map:
             for k in pose_inputs.keys():
                 pose_inputs[k] = pose_inputs[k].detach()
         n_pose_inputs = self._transform_observations(pose_inputs, dx)
         pose_outputs = self.pose_estimator(n_pose_inputs)
         dx_hat = add_pose(dx, pose_outputs["pose"])
         all_pose_outputs["pose_outputs"] = pose_outputs
         # Estimate global pose
         xt_hat = add_pose(x["pose_hat_at_t_1"], dx_hat)
     # Zero out pose prediction based on the mask
     if masks is not None:
         xt_hat = xt_hat * masks
         dx_hat = dx_hat * masks
     outputs = {
         "pt": pt,
         "dx_hat": dx_hat,
         "xt_hat": xt_hat,
         "all_pu_outputs": pu_outputs_t,
         "all_pose_outputs": all_pose_outputs,
     }
     if "ego_map_hat" in pu_outputs_t:
         outputs["ego_map_hat_at_t"] = pu_outputs_t["ego_map_hat"]
     return outputs
Пример #4
0
    def _get_wall_occupancy(self, episode, agent_state):
        episode_id = (episode.episode_id, episode.scene_id)
        # Load the episode specific maps only if the episode has changed
        if self.current_wall_episode_id != episode_id:
            self.current_wall_episode_id = episode_id
            if self.config.GT_TYPE == "wall_occupancy":
                scene_id = episode.scene_id.split("/")[-1]
                self._scene_maps_info = self._all_maps_info[scene_id]
                # Load the maps per floor
                seen_maps, wall_maps = self._load_transformed_wall_maps(
                    self._scene_maps_info, episode,
                )
                self._scene_maps = {}
                self._scene_maps["seen_maps"] = seen_maps
                self._scene_maps["wall_maps"] = wall_maps

        agent_state = self._sim.get_agent_state()
        current_height = agent_state.position[1]
        best_floor_idx = None
        best_floor_dist = math.inf
        for floor_idx, floor_data in enumerate(self._scene_maps_info):
            floor_height = floor_data["floor_height"]
            if abs(current_height - floor_height) < best_floor_dist:
                best_floor_idx = floor_idx
                best_floor_dist = abs(current_height - floor_height)
        assert best_floor_idx is not None
        current_wall_map = self._scene_maps["wall_maps"][best_floor_idx]
        # Take only channel 0 as both channels have save values
        current_wall_map = current_wall_map[..., 0]

        # ========= Get local egocentric crop of the current wall map =========
        # Compute relative pose of agent from start location
        start_position = episode.start_position  # (X, Y, Z)
        start_rotation = quaternion_xyzw_to_wxyz(episode.start_rotation)
        start_heading = compute_heading_from_quaternion(start_rotation)
        start_pose = torch.Tensor(
            [[-start_position[2], start_position[0], start_heading]]
        )
        agent_position = agent_state.position
        agent_heading = compute_heading_from_quaternion(agent_state.rotation)
        agent_pose = torch.Tensor(
            [[-agent_position[2], agent_position[0], agent_heading]]
        )
        rel_pose = subtract_pose(start_pose, agent_pose)[0]  # (3,)

        # Compute agent position on the map image
        map_scale = self.config.MAP_SCALE

        H, W = current_wall_map.shape[:2]
        Hby2, Wby2 = (H + 1) // 2, (W + 1) // 2
        agent_map_x = int(rel_pose[1].item() / map_scale + Wby2)
        agent_map_y = int(-rel_pose[0].item() / map_scale + Hby2)

        # Crop the region around the agent.
        mrange = int(1.5 * self.map_size)

        # Add extra padding if map range is coordinates go out of bounds
        y_start = agent_map_y - mrange
        y_end = agent_map_y + mrange
        x_start = agent_map_x - mrange
        x_end = agent_map_x + mrange

        x_l_pad, y_l_pad, x_r_pad, y_r_pad = 0, 0, 0, 0

        H, W = current_wall_map.shape
        if x_start < 0:
            x_l_pad = int(-x_start)
            x_start += x_l_pad
            x_end += x_l_pad
        if x_end >= W:
            x_r_pad = int(x_end - W + 1)
        if y_start < 0:
            y_l_pad = int(-y_start)
            y_start += y_l_pad
            y_end += y_l_pad
        if y_end >= H:
            y_r_pad = int(y_end - H + 1)

        ego_map = np.pad(current_wall_map, ((y_l_pad, y_r_pad), (x_l_pad, x_r_pad)))
        ego_map = ego_map[y_start: (y_end + 1), x_start: (x_end + 1)]

        agent_heading = rel_pose[2].item()
        agent_heading = math.degrees(agent_heading)

        half_size = ego_map.shape[0] // 2
        center = (half_size, half_size)
        M = cv2.getRotationMatrix2D(center, agent_heading, scale=1.0)

        ego_map = cv2.warpAffine(
            ego_map,
            M,
            (ego_map.shape[1], ego_map.shape[0]),
            flags=cv2.INTER_NEAREST,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=(1,),
        )

        ego_map = ego_map.astype(np.float32)
        mrange = int(self.map_size)
        ego_map = ego_map[
                  (half_size - mrange): (half_size + mrange),
                  (half_size - mrange): (half_size + mrange),
                  ]

        # Get forward region infront of the agent
        half_size = ego_map.shape[0] // 2
        quarter_size = ego_map.shape[0] // 4
        center = (half_size, half_size)

        ego_map = ego_map[0:half_size, quarter_size: (quarter_size + half_size)]

        # Append explored status in the 2nd channel
        ego_map = np.stack([ego_map, ego_map], axis=2)

        return ego_map