def _prepare_batch(self, observations, device=None, actions=None): imH, imW = self.config.RL.ANS.image_scale_hw device = self.device if device is None else device batch = batch_obs(observations, device=device) if batch["rgb"].size(1) != imH or batch["rgb"].size(2) != imW: rgb = rearrange(batch["rgb"], "b h w c -> b c h w") rgb = F.interpolate(rgb, (imH, imW), mode="bilinear") batch["rgb"] = rearrange(rgb, "b c h w -> b h w c") if batch["depth"].size(1) != imH or batch["depth"].size(2) != imW: depth = rearrange(batch["depth"], "b h w c -> b c h w") depth = F.interpolate(depth, (imH, imW), mode="nearest") batch["depth"] = rearrange(depth, "b c h w -> b h w c") # Compute ego_map_gt from depth ego_map_gt_b = self.depth_projection_net( rearrange(batch["depth"], "b h w c -> b c h w") ) batch["ego_map_gt"] = rearrange(ego_map_gt_b, "b c h w -> b h w c") # Add previous action to batch as well batch["prev_actions"] = self.prev_actions # Add a rough pose estimate if GT pose is not available if "pose" not in batch: if self.prev_batch is None: # Set initial pose estimate to zero batch["pose"] = torch.zeros(self.envs.num_envs, 3).to(self.device) else: actions_delta = self._convert_actions_to_delta(self.prev_actions) batch["pose"] = add_pose(self.prev_batch["pose"], actions_delta) return batch
def act( self, observations, prev_observations, prev_state_estimates, ep_time, masks, deterministic=False, ): # ============================ Set useful variables =========================== ep_step = ep_time[0].item() M = prev_state_estimates["map_states"].shape[2] s = self.map_scale device = observations['rgb'].device assert M % 2 == 1, "The code is tested only for odd map sizes!" # =================== Update states from current observation ================== # Update map, pose and visitation map mapper_inputs = self._create_mapper_inputs( observations, prev_observations, prev_state_estimates ) mapper_outputs = self.mapper(mapper_inputs) global_map = mapper_outputs["mt"] global_pose = mapper_outputs["xt_hat"] if self.config.MAPPER.debug_log: print(f"Global Pose: {global_pose}") map_xy = convert_world2map(global_pose[:, :2], (M, M), s) map_xy = torch.clamp(map_xy, 0, M - 1) visited_states = self._update_state_visitation( prev_state_estimates["visited_states"], map_xy ) # (bs, 1, M, M) # Update local ANM state variables curr_map_position = map_xy if ep_step > 0: # Compute state updates prev_dist2localgoal = self.states["curr_dist2localgoal"] curr_dist2localgoal = self._compute_dist2localgoal( global_map, map_xy, self.states["curr_local_goals"], ) prev_map_position = self.states["curr_map_position"] prev_step_size = ( torch.norm(curr_map_position - prev_map_position, dim=1).unsqueeze(1) * s ) # Update the state variables self.states["prev_dist2localgoal"] = prev_dist2localgoal self.states["curr_dist2localgoal"] = curr_dist2localgoal self.states["prev_map_position"] = prev_map_position self.states["local_path_length"] += prev_step_size self.states["curr_map_position"] = curr_map_position # Initialize collision and visited maps at t=0 if ep_step == 0: self.states["collision_map"] = torch.zeros(self.nplanners, M, M).to(device) self.states["visited_map"] = torch.zeros(self.nplanners, M, M).to(device) self.states["col_width"] = torch.ones(self.nplanners) # Monitors number of steps elapsed since last call to sample random explored self.states["sample_random_explored_timer"] = torch.zeros(self.nplanners) if ep_step > 0: # Update collision maps forward_step = self.config.LOCAL_POLICY.AGENT_DYNAMICS.forward_step for i in range(self.nplanners): prev_action_i = observations["prev_actions"][i, 0].item() # If not forward action, skip if prev_action_i != 0: continue x1, y1 = asnumpy(self.states["prev_map_position"][i]).tolist() x2, y2 = asnumpy(self.states["curr_map_position"][i]).tolist() t2 = global_pose[i, 2].item() - math.pi / 2 if abs(x1 - x2) < 1 and abs(y1 - y2) < 1: self.states["col_width"][i] += 2 self.states["col_width"][i] = min(self.states["col_width"][i], 9) else: self.states["col_width"][i] = 1 dist_trav_i = math.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2) * s # Add an obstacle infront of the agent if a collision happens if dist_trav_i < 0.7 * forward_step: # Collision length = 2 width = int(self.states["col_width"][i].item()) buf = 3 cmH, cmW = self.states["collision_map"][i].shape for j in range(length): for k in range(width): wx = ( x2 + ((j + buf) * math.cos(t2)) + ((k - width / 2) * math.sin(t2)) ) wy = ( y2 + ((j + buf) * math.sin(t2)) - ((k - width / 2) * math.cos(t2)) ) wx, wy = int(wx), int(wy) if wx < 0 or wx >= cmW or wy < 0 or wy >= cmH: continue self.states["collision_map"][i, wy, wx] = 1 # Update visitation maps for i in range(self.nplanners): mx, my = asnumpy(self.states["curr_map_position"][i]).tolist() mx, my = int(mx), int(my) self.states["visited_map"][i, my - 2: my + 3, mx - 2: mx + 3] = 1 # ===================== Compute rewards for previous action =================== local_rewards = self._compute_local_rewards(ep_step, s).to(device) # ====================== Global policy action selection ======================= SAMPLE_GLOBAL_GOAL_FLAG = ep_step % self.goal_interval == 0 # Sample global goal if needed if SAMPLE_GLOBAL_GOAL_FLAG: global_policy_inputs = self._create_global_policy_inputs( global_map, visited_states, self.states["curr_map_position"] ) # diff_policy baseline add if hasattr(self.config.GLOBAL_POLICY, "difference_baseline_explorer"): if self.config.GLOBAL_POLICY.difference_baseline_explorer: update_dict = {"diff_scores": prev_state_estimates.pop("diff_scores")} global_policy_inputs.update(update_dict) ( global_value, global_action, global_action_log_probs, _, ) = self.global_policy.act(global_policy_inputs, None, None, None) # Convert action to location (row-major format) G = self.global_policy.G global_action_map_x = torch.fmod( global_action.squeeze(1), G ).float() # (bs, ) global_action_map_y = (global_action.squeeze(1).float() / G).float() # (bs, ) # Convert to MxM map coordinates global_action_map_x = global_action_map_x * M / G global_action_map_y = global_action_map_y * M / G if self.config.fixed_global_goal: starting_point = (M - 1) // 2 global_action_map_x[0] = starting_point + (self.config.fixed_delta_x / s) global_action_map_y[0] = starting_point - (self.config.fixed_delta_y / s) global_action_map_xy = torch.stack( [global_action_map_x, global_action_map_y], dim=1 ) # Set the goal (bs, 2) --- (x, y) in map image coordinates self.states["curr_global_goals"] = global_action_map_xy # Update the current map to prev_map_states in order to facilitate # future reward computation. self.states["prev_map_states"] = global_map.detach() else: global_policy_inputs = None global_value = None global_action = None global_action_log_probs = None # ======================= Local policy action selection ======================= # Initialize states at t=0 if ep_step == 0: self.states["curr_local_goals"] = torch.zeros(self.nplanners, 2).to(device) self.states["local_shortest_path_length"] = torch.zeros( self.nplanners, 1 ).to(device) self.states["local_path_length"] = torch.zeros(self.nplanners, 1).to(device) # Should local goals be sampled now? if SAMPLE_GLOBAL_GOAL_FLAG: if self.config.MAPPER.debug_log: print('NEW LOCAL GOAL SAMPLED!') # Condition 1: A new global goal was sampled SAMPLE_LOCAL_GOAL_FLAGS = [1 for _ in range(self.nplanners)] else: if self.config.MAPPER.debug_log: print('REACHING LOCAL GOAL:') print('Distance to Goal: {}'.format(self.states["curr_dist2localgoal"])) print('Goal Position: {}'.format(self.states["curr_local_goals"])) print('Agent Position: {}'.format(map_xy)) # Condition 2 (a): The previous local goal was reached prev_goal_reached = ( self.states["curr_dist2localgoal"] < self.goal_success_radius ) # Condition 2 (b): The previous local goal is occupied. goals = self.states["curr_local_goals"].long().to(device) prev_gcells = global_map[ torch.arange(0, goals.shape[0]).long(), :, goals[:, 1], goals[:, 0] ] prev_goal_occupied = (prev_gcells[:, 0] > self.config.thresh_obstacle) & ( prev_gcells[:, 1] > self.config.thresh_explored ) SAMPLE_LOCAL_GOAL_FLAGS = asnumpy( (prev_goal_reached | prev_goal_occupied).float() ).tolist() # Execute planner and compute local goals self._compute_plans_and_local_goals( global_map, self.states["curr_map_position"], SAMPLE_LOCAL_GOAL_FLAGS ) # Update state variables to account for new local goals self.states["curr_dist2localgoal"] = self._compute_dist2localgoal( global_map, self.states["curr_map_position"], self.states["curr_local_goals"], ) # Sample action with local policy local_masks = 1 - torch.Tensor(SAMPLE_LOCAL_GOAL_FLAGS).to(device).unsqueeze(1) recurrent_hidden_states = prev_state_estimates["recurrent_hidden_states"] relative_goals = self._compute_relative_local_goals(global_pose, M, s) local_policy_inputs = { "rgb_at_t": observations["rgb"], "goal_at_t": relative_goals, "t": ep_time, } outputs = self.local_policy.act( local_policy_inputs, recurrent_hidden_states, None, local_masks, deterministic=deterministic, ) ( local_value, local_action, local_action_log_probs, recurrent_hidden_states, ) = outputs # If imitation learning is used, also sample the ground-truth action to take if "gt_global_map" in observations.keys(): gt_global_map = observations["gt_global_map"] # (bs, 2, M, M) gt_global_pose = observations["pose_gt"] # (bs, 3) relative_goals_aug = torch.cat( [relative_goals, torch.zeros_like(relative_goals[:, 0:1])], dim=1 ) gt_goals = add_pose(gt_global_pose, relative_goals_aug) # (bs, 3) gt_actions = self._compute_gt_local_action( gt_global_map, gt_global_pose, gt_goals, M, s ) # (bs, 1) gt_actions = gt_actions.to(device) else: gt_actions = torch.zeros_like(local_action) # ============================== Create output dicts ========================== state_estimates = { "recurrent_hidden_states": recurrent_hidden_states, "map_states": mapper_outputs["mt"], "visited_states": visited_states, "pose_estimates": global_pose, } local_policy_outputs = { "values": local_value, "actions": local_action, "action_log_probs": local_action_log_probs, "local_masks": local_masks, "gt_actions": gt_actions, } global_policy_outputs = { "values": global_value, "actions": global_action, "action_log_probs": global_action_log_probs, } rewards = { "local_rewards": local_rewards, } mapper_outputs.update({'curr_map_position': curr_map_position}) return ( mapper_inputs, local_policy_inputs, global_policy_inputs, mapper_outputs, local_policy_outputs, global_policy_outputs, state_estimates, rewards, )
def predict_deltas(self, x, masks=None): # Transpose multichannel inputs st_1 = process_image(x["rgb_at_t_1"], self.img_mean_t, self.img_std_t) dt_1 = transpose_image(x["depth_at_t_1"]) ego_map_gt_at_t_1 = transpose_image(x["ego_map_gt_at_t_1"]) st = process_image(x["rgb_at_t"], self.img_mean_t, self.img_std_t) dt = transpose_image(x["depth_at_t"]) ego_map_gt_at_t = transpose_image(x["ego_map_gt_at_t"]) # This happens only for a baseline if ("ego_map_gt_anticipated_at_t_1" in x and x["ego_map_gt_anticipated_at_t_1"] is not None): ego_map_gt_anticipated_at_t_1 = transpose_image( x["ego_map_gt_anticipated_at_t_1"]) ego_map_gt_anticipated_at_t = transpose_image( x["ego_map_gt_anticipated_at_t"]) else: ego_map_gt_anticipated_at_t_1 = None ego_map_gt_anticipated_at_t = None # Compute past and current egocentric maps bs = st_1.size(0) pu_inputs_t_1 = { "rgb": st_1, "depth": dt_1, "ego_map_gt": ego_map_gt_at_t_1, "ego_map_gt_anticipated": ego_map_gt_anticipated_at_t_1, } pu_inputs_t = { "rgb": st, "depth": dt, "ego_map_gt": ego_map_gt_at_t, "ego_map_gt_anticipated": ego_map_gt_anticipated_at_t, } pu_inputs = self._safe_cat(pu_inputs_t_1, pu_inputs_t) pu_outputs = self.projection_unit(pu_inputs) pu_outputs_t = {k: v[bs:] for k, v in pu_outputs.items()} pt_1, pt = pu_outputs["occ_estimate"][:bs], pu_outputs["occ_estimate"][ bs:] # Compute relative pose dx = subtract_pose(x["pose_at_t_1"], x["pose_at_t"]) # Estimate pose dx_hat = dx xt_hat = x["pose_at_t"] all_pose_outputs = None if not self.config.ignore_pose_estimator: all_pose_outputs = {} pose_inputs = {} if "rgb" in self.config.pose_predictor_inputs: pose_inputs["rgb_t_1"] = st_1 pose_inputs["rgb_t"] = st if "depth" in self.config.pose_predictor_inputs: pose_inputs["depth_t_1"] = dt_1 pose_inputs["depth_t"] = dt if "ego_map" in self.config.pose_predictor_inputs: pose_inputs["ego_map_t_1"] = pt_1 pose_inputs["ego_map_t"] = pt if self.config.detach_map: for k in pose_inputs.keys(): pose_inputs[k] = pose_inputs[k].detach() n_pose_inputs = self._transform_observations(pose_inputs, dx) pose_outputs = self.pose_estimator(n_pose_inputs) dx_hat = add_pose(dx, pose_outputs["pose"]) all_pose_outputs["pose_outputs"] = pose_outputs # Estimate global pose xt_hat = add_pose(x["pose_hat_at_t_1"], dx_hat) # Zero out pose prediction based on the mask if masks is not None: xt_hat = xt_hat * masks dx_hat = dx_hat * masks outputs = { "pt": pt, "dx_hat": dx_hat, "xt_hat": xt_hat, "all_pu_outputs": pu_outputs_t, "all_pose_outputs": all_pose_outputs, } if "ego_map_hat" in pu_outputs_t: outputs["ego_map_hat_at_t"] = pu_outputs_t["ego_map_hat"] return outputs