def metrics(self) -> Dict[str, Any]: if not self.is_done(): return {} else: total_reward = float(np.sum(self._rewards)) self._rewards = [] if self._success is None: return {} if self.distance_cache: dist2tget = get_distance( self.distance_cache, self.env.agent_state(), self.task_info["target"], ) spl = self.spl() if spl is None: return {} else: # TODO dist2tget = -1 # self._get_distance_to_target() spl = self.spl() if len(self.episode_optimal_corners) > 1 else 0.0 if dist2tget is None: return {} return { "success": self._success, # False also if no path to target "ep_length": self.num_steps_taken(), "total_reward": total_reward, "dist_to_target": dist2tget, "spl": spl, "task_info": self.task_info, }
def __init__( self, env: RoboThorEnvironment, sensors: List[Sensor], task_info: Dict[str, Any], max_steps: int, reward_configs: Dict[str, Any], distance_cache: Optional[Dict[str, Any]] = None, episode_info: Optional[Dict[str, Any]] = None, **kwargs ) -> None: super().__init__( env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs ) self.reward_configs = reward_configs self._took_end_action: bool = False self._success: Optional[bool] = False self.distance_cache = distance_cache if episode_info: self.episode_optimal_corners = episode_info["shortest_path"] dist = episode_info["shortest_path_length"] else: self.episode_optimal_corners = self.env.path_corners( task_info["target"] ) # assume it's valid (sampler must take care)! dist = self.env.path_corners_to_dist(self.episode_optimal_corners) if dist == float("inf"): dist = -1.0 # -1.0 for unreachable get_logger().warning( "No path for {} from {} to {}".format( self.env.scene_name, self.env.agent_state(), task_info["target"] ) ) if self.distance_cache: self.last_geodesic_distance = get_distance( self.distance_cache, self.env.agent_state(), self.task_info["target"] ) else: self.last_geodesic_distance = self.env.dist_to_point( self.task_info["target"] ) self.optimal_distance = self.last_geodesic_distance self._rewards: List[float] = [] self._distance_to_goal: List[float] = [] self._metrics = None self.path: List[Any] = ( [] ) # the initial coordinate will be directly taken from the optimal path self.task_info["followed_path"] = [self.env.agent_state()] self.task_info["action_names"] = self.action_names() self.num_moves_made = 0
def _is_goal_in_range(self) -> Optional[bool]: tget = self.task_info["target"] if self.distance_cache: dist = get_distance(self.distance_cache, self.env.agent_state(), self.task_info["target"]) else: dist = self.dist_to_target() if -0.5 < dist <= 0.2: return True elif dist > 0.2: return False else: get_logger().warning("No path for {} from {} to {}".format( self.env.scene_name, self.env.agent_state(), tget)) return None
def shaping(self) -> float: rew = 0.0 if self.reward_configs["shaping_weight"] == 0.0: return rew if self.distance_cache: geodesic_distance = get_distance(self.distance_cache, self.env.agent_state(), self.task_info["target"]) else: geodesic_distance = self.dist_to_target() if geodesic_distance == -1.0: geodesic_distance = self.last_geodesic_distance if (self.last_geodesic_distance > -0.5 and geodesic_distance > -0.5): # (robothor limits) rew += self.last_geodesic_distance - geodesic_distance self.last_geodesic_distance = geodesic_distance return rew * self.reward_configs["shaping_weight"]