def _prepare_batch(self, observations, device=None, actions=None):
        imH, imW = self.config.RL.ANS.image_scale_hw
        device = self.device if device is None else device
        batch = batch_obs(observations, device=device)
        if batch["rgb"].size(1) != imH or batch["rgb"].size(2) != imW:
            rgb = rearrange(batch["rgb"], "b h w c -> b c h w")
            rgb = F.interpolate(rgb, (imH, imW), mode="bilinear")
            batch["rgb"] = rearrange(rgb, "b c h w -> b h w c")
        if batch["depth"].size(1) != imH or batch["depth"].size(2) != imW:
            depth = rearrange(batch["depth"], "b h w c -> b c h w")
            depth = F.interpolate(depth, (imH, imW), mode="nearest")
            batch["depth"] = rearrange(depth, "b c h w -> b h w c")
        # Compute ego_map_gt from depth
        ego_map_gt_b = self.depth_projection_net(
            rearrange(batch["depth"], "b h w c -> b c h w")
        )
        batch["ego_map_gt"] = rearrange(ego_map_gt_b, "b c h w -> b h w c")
        # Add previous action to batch as well
        batch["prev_actions"] = self.prev_actions
        # Add a rough pose estimate if GT pose is not available
        if "pose" not in batch:
            if self.prev_batch is None:
                # Set initial pose estimate to zero
                batch["pose"] = torch.zeros(self.envs.num_envs, 3).to(self.device)
            else:
                actions_delta = self._convert_actions_to_delta(self.prev_actions)
                batch["pose"] = add_pose(self.prev_batch["pose"], actions_delta)

        return batch
示例#2
0
文件: ans.py 项目: aimagelab/LoCoNav
    def act(
            self,
            observations,
            prev_observations,
            prev_state_estimates,
            ep_time,
            masks,
            deterministic=False,
    ):
        # ============================ Set useful variables ===========================
        ep_step = ep_time[0].item()
        M = prev_state_estimates["map_states"].shape[2]
        s = self.map_scale
        device = observations['rgb'].device
        assert M % 2 == 1, "The code is tested only for odd map sizes!"
        # =================== Update states from current observation ==================
        # Update map, pose and visitation map
        mapper_inputs = self._create_mapper_inputs(
            observations, prev_observations, prev_state_estimates
        )
        mapper_outputs = self.mapper(mapper_inputs)
        global_map = mapper_outputs["mt"]
        global_pose = mapper_outputs["xt_hat"]
        if self.config.MAPPER.debug_log:
            print(f"Global Pose: {global_pose}")
        map_xy = convert_world2map(global_pose[:, :2], (M, M), s)
        map_xy = torch.clamp(map_xy, 0, M - 1)
        visited_states = self._update_state_visitation(
            prev_state_estimates["visited_states"], map_xy
        )  # (bs, 1, M, M)
        # Update local ANM state variables
        curr_map_position = map_xy
        if ep_step > 0:
            # Compute state updates
            prev_dist2localgoal = self.states["curr_dist2localgoal"]
            curr_dist2localgoal = self._compute_dist2localgoal(
                global_map, map_xy, self.states["curr_local_goals"],
            )
            prev_map_position = self.states["curr_map_position"]
            prev_step_size = (
                    torch.norm(curr_map_position - prev_map_position, dim=1).unsqueeze(1)
                    * s
            )
            # Update the state variables
            self.states["prev_dist2localgoal"] = prev_dist2localgoal
            self.states["curr_dist2localgoal"] = curr_dist2localgoal
            self.states["prev_map_position"] = prev_map_position
            self.states["local_path_length"] += prev_step_size
        self.states["curr_map_position"] = curr_map_position
        # Initialize collision and visited maps at t=0
        if ep_step == 0:
            self.states["collision_map"] = torch.zeros(self.nplanners, M, M).to(device)
            self.states["visited_map"] = torch.zeros(self.nplanners, M, M).to(device)
            self.states["col_width"] = torch.ones(self.nplanners)
            # Monitors number of steps elapsed since last call to sample random explored
            self.states["sample_random_explored_timer"] = torch.zeros(self.nplanners)
        if ep_step > 0:
            # Update collision maps
            forward_step = self.config.LOCAL_POLICY.AGENT_DYNAMICS.forward_step
            for i in range(self.nplanners):
                prev_action_i = observations["prev_actions"][i, 0].item()
                # If not forward action, skip
                if prev_action_i != 0:
                    continue
                x1, y1 = asnumpy(self.states["prev_map_position"][i]).tolist()
                x2, y2 = asnumpy(self.states["curr_map_position"][i]).tolist()
                t2 = global_pose[i, 2].item() - math.pi / 2
                if abs(x1 - x2) < 1 and abs(y1 - y2) < 1:
                    self.states["col_width"][i] += 2
                    self.states["col_width"][i] = min(self.states["col_width"][i], 9)
                else:
                    self.states["col_width"][i] = 1
                dist_trav_i = math.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2) * s
                # Add an obstacle infront of the agent if a collision happens
                if dist_trav_i < 0.7 * forward_step:  # Collision
                    length = 2
                    width = int(self.states["col_width"][i].item())
                    buf = 3
                    cmH, cmW = self.states["collision_map"][i].shape
                    for j in range(length):
                        for k in range(width):
                            wx = (
                                    x2
                                    + ((j + buf) * math.cos(t2))
                                    + ((k - width / 2) * math.sin(t2))
                            )
                            wy = (
                                    y2
                                    + ((j + buf) * math.sin(t2))
                                    - ((k - width / 2) * math.cos(t2))
                            )
                            wx, wy = int(wx), int(wy)
                            if wx < 0 or wx >= cmW or wy < 0 or wy >= cmH:
                                continue
                            self.states["collision_map"][i, wy, wx] = 1
        # Update visitation maps
        for i in range(self.nplanners):
            mx, my = asnumpy(self.states["curr_map_position"][i]).tolist()
            mx, my = int(mx), int(my)
            self.states["visited_map"][i, my - 2: my + 3, mx - 2: mx + 3] = 1
        # ===================== Compute rewards for previous action ===================
        local_rewards = self._compute_local_rewards(ep_step, s).to(device)
        # ====================== Global policy action selection =======================
        SAMPLE_GLOBAL_GOAL_FLAG = ep_step % self.goal_interval == 0
        # Sample global goal if needed
        if SAMPLE_GLOBAL_GOAL_FLAG:
            global_policy_inputs = self._create_global_policy_inputs(
                global_map, visited_states, self.states["curr_map_position"]
            )
            # diff_policy baseline add
            if hasattr(self.config.GLOBAL_POLICY, "difference_baseline_explorer"):
                if self.config.GLOBAL_POLICY.difference_baseline_explorer:
                    update_dict = {"diff_scores": prev_state_estimates.pop("diff_scores")}
                    global_policy_inputs.update(update_dict)

            (
                global_value,
                global_action,
                global_action_log_probs,
                _,
            ) = self.global_policy.act(global_policy_inputs, None, None, None)
            # Convert action to location (row-major format)
            G = self.global_policy.G
            global_action_map_x = torch.fmod(
                global_action.squeeze(1), G
            ).float()  # (bs, )
            global_action_map_y = (global_action.squeeze(1).float() / G).float()  # (bs, )
            # Convert to MxM map coordinates
            global_action_map_x = global_action_map_x * M / G
            global_action_map_y = global_action_map_y * M / G

            if self.config.fixed_global_goal:
                starting_point = (M - 1) // 2
                global_action_map_x[0] = starting_point + (self.config.fixed_delta_x / s)
                global_action_map_y[0] = starting_point - (self.config.fixed_delta_y / s)

            global_action_map_xy = torch.stack(
                [global_action_map_x, global_action_map_y], dim=1
            )
            # Set the goal (bs, 2) --- (x, y) in map image coordinates
            self.states["curr_global_goals"] = global_action_map_xy
            # Update the current map to prev_map_states in order to facilitate
            # future reward computation.
            self.states["prev_map_states"] = global_map.detach()
        else:
            global_policy_inputs = None
            global_value = None
            global_action = None
            global_action_log_probs = None
        # ======================= Local policy action selection =======================
        # Initialize states at t=0
        if ep_step == 0:
            self.states["curr_local_goals"] = torch.zeros(self.nplanners, 2).to(device)
            self.states["local_shortest_path_length"] = torch.zeros(
                self.nplanners, 1
            ).to(device)
            self.states["local_path_length"] = torch.zeros(self.nplanners, 1).to(device)
        # Should local goals be sampled now?
        if SAMPLE_GLOBAL_GOAL_FLAG:
            if self.config.MAPPER.debug_log:
                print('NEW LOCAL GOAL SAMPLED!')
            # Condition 1: A new global goal was sampled
            SAMPLE_LOCAL_GOAL_FLAGS = [1 for _ in range(self.nplanners)]
        else:
            if self.config.MAPPER.debug_log:
                print('REACHING LOCAL GOAL:')
                print('Distance to Goal: {}'.format(self.states["curr_dist2localgoal"]))
                print('Goal Position: {}'.format(self.states["curr_local_goals"]))
                print('Agent Position: {}'.format(map_xy))
            # Condition 2 (a): The previous local goal was reached
            prev_goal_reached = (
                    self.states["curr_dist2localgoal"] < self.goal_success_radius
            )
            # Condition 2 (b): The previous local goal is occupied.
            goals = self.states["curr_local_goals"].long().to(device)
            prev_gcells = global_map[
                          torch.arange(0, goals.shape[0]).long(), :, goals[:, 1], goals[:, 0]
                          ]
            prev_goal_occupied = (prev_gcells[:, 0] > self.config.thresh_obstacle) & (
                    prev_gcells[:, 1] > self.config.thresh_explored
            )
            SAMPLE_LOCAL_GOAL_FLAGS = asnumpy(
                (prev_goal_reached | prev_goal_occupied).float()
            ).tolist()
        # Execute planner and compute local goals
        self._compute_plans_and_local_goals(
            global_map, self.states["curr_map_position"], SAMPLE_LOCAL_GOAL_FLAGS
        )
        # Update state variables to account for new local goals
        self.states["curr_dist2localgoal"] = self._compute_dist2localgoal(
            global_map,
            self.states["curr_map_position"],
            self.states["curr_local_goals"],
        )

        # Sample action with local policy
        local_masks = 1 - torch.Tensor(SAMPLE_LOCAL_GOAL_FLAGS).to(device).unsqueeze(1)
        recurrent_hidden_states = prev_state_estimates["recurrent_hidden_states"]
        relative_goals = self._compute_relative_local_goals(global_pose, M, s)
        local_policy_inputs = {
            "rgb_at_t": observations["rgb"],
            "goal_at_t": relative_goals,
            "t": ep_time,
        }
        outputs = self.local_policy.act(
            local_policy_inputs,
            recurrent_hidden_states,
            None,
            local_masks,
            deterministic=deterministic,
        )
        (
            local_value,
            local_action,
            local_action_log_probs,
            recurrent_hidden_states,
        ) = outputs
        # If imitation learning is used, also sample the ground-truth action to take
        if "gt_global_map" in observations.keys():
            gt_global_map = observations["gt_global_map"]  # (bs, 2, M, M)
            gt_global_pose = observations["pose_gt"]  # (bs, 3)
            relative_goals_aug = torch.cat(
                [relative_goals, torch.zeros_like(relative_goals[:, 0:1])], dim=1
            )
            gt_goals = add_pose(gt_global_pose, relative_goals_aug)  # (bs, 3)
            gt_actions = self._compute_gt_local_action(
                gt_global_map, gt_global_pose, gt_goals, M, s
            )  # (bs, 1)
            gt_actions = gt_actions.to(device)
        else:
            gt_actions = torch.zeros_like(local_action)
        # ============================== Create output dicts ==========================
        state_estimates = {
            "recurrent_hidden_states": recurrent_hidden_states,
            "map_states": mapper_outputs["mt"],
            "visited_states": visited_states,
            "pose_estimates": global_pose,
        }
        local_policy_outputs = {
            "values": local_value,
            "actions": local_action,
            "action_log_probs": local_action_log_probs,
            "local_masks": local_masks,
            "gt_actions": gt_actions,
        }
        global_policy_outputs = {
            "values": global_value,
            "actions": global_action,
            "action_log_probs": global_action_log_probs,
        }
        rewards = {
            "local_rewards": local_rewards,
        }
        mapper_outputs.update({'curr_map_position': curr_map_position})
        return (
            mapper_inputs,
            local_policy_inputs,
            global_policy_inputs,
            mapper_outputs,
            local_policy_outputs,
            global_policy_outputs,
            state_estimates,
            rewards,
        )
示例#3
0
 def predict_deltas(self, x, masks=None):
     # Transpose multichannel inputs
     st_1 = process_image(x["rgb_at_t_1"], self.img_mean_t, self.img_std_t)
     dt_1 = transpose_image(x["depth_at_t_1"])
     ego_map_gt_at_t_1 = transpose_image(x["ego_map_gt_at_t_1"])
     st = process_image(x["rgb_at_t"], self.img_mean_t, self.img_std_t)
     dt = transpose_image(x["depth_at_t"])
     ego_map_gt_at_t = transpose_image(x["ego_map_gt_at_t"])
     # This happens only for a baseline
     if ("ego_map_gt_anticipated_at_t_1" in x
             and x["ego_map_gt_anticipated_at_t_1"] is not None):
         ego_map_gt_anticipated_at_t_1 = transpose_image(
             x["ego_map_gt_anticipated_at_t_1"])
         ego_map_gt_anticipated_at_t = transpose_image(
             x["ego_map_gt_anticipated_at_t"])
     else:
         ego_map_gt_anticipated_at_t_1 = None
         ego_map_gt_anticipated_at_t = None
     # Compute past and current egocentric maps
     bs = st_1.size(0)
     pu_inputs_t_1 = {
         "rgb": st_1,
         "depth": dt_1,
         "ego_map_gt": ego_map_gt_at_t_1,
         "ego_map_gt_anticipated": ego_map_gt_anticipated_at_t_1,
     }
     pu_inputs_t = {
         "rgb": st,
         "depth": dt,
         "ego_map_gt": ego_map_gt_at_t,
         "ego_map_gt_anticipated": ego_map_gt_anticipated_at_t,
     }
     pu_inputs = self._safe_cat(pu_inputs_t_1, pu_inputs_t)
     pu_outputs = self.projection_unit(pu_inputs)
     pu_outputs_t = {k: v[bs:] for k, v in pu_outputs.items()}
     pt_1, pt = pu_outputs["occ_estimate"][:bs], pu_outputs["occ_estimate"][
         bs:]
     # Compute relative pose
     dx = subtract_pose(x["pose_at_t_1"], x["pose_at_t"])
     # Estimate pose
     dx_hat = dx
     xt_hat = x["pose_at_t"]
     all_pose_outputs = None
     if not self.config.ignore_pose_estimator:
         all_pose_outputs = {}
         pose_inputs = {}
         if "rgb" in self.config.pose_predictor_inputs:
             pose_inputs["rgb_t_1"] = st_1
             pose_inputs["rgb_t"] = st
         if "depth" in self.config.pose_predictor_inputs:
             pose_inputs["depth_t_1"] = dt_1
             pose_inputs["depth_t"] = dt
         if "ego_map" in self.config.pose_predictor_inputs:
             pose_inputs["ego_map_t_1"] = pt_1
             pose_inputs["ego_map_t"] = pt
         if self.config.detach_map:
             for k in pose_inputs.keys():
                 pose_inputs[k] = pose_inputs[k].detach()
         n_pose_inputs = self._transform_observations(pose_inputs, dx)
         pose_outputs = self.pose_estimator(n_pose_inputs)
         dx_hat = add_pose(dx, pose_outputs["pose"])
         all_pose_outputs["pose_outputs"] = pose_outputs
         # Estimate global pose
         xt_hat = add_pose(x["pose_hat_at_t_1"], dx_hat)
     # Zero out pose prediction based on the mask
     if masks is not None:
         xt_hat = xt_hat * masks
         dx_hat = dx_hat * masks
     outputs = {
         "pt": pt,
         "dx_hat": dx_hat,
         "xt_hat": xt_hat,
         "all_pu_outputs": pu_outputs_t,
         "all_pose_outputs": all_pose_outputs,
     }
     if "ego_map_hat" in pu_outputs_t:
         outputs["ego_map_hat_at_t"] = pu_outputs_t["ego_map_hat"]
     return outputs