Пример #1
0
    def metrics(self) -> Dict[str, Any]:
        if not self.is_done():
            return {}
        else:
            total_reward = float(np.sum(self._rewards))
            self._rewards = []

            if self._success is None:
                return {}

            if self.distance_cache:
                dist2tget = get_distance(
                    self.distance_cache,
                    self.env.agent_state(),
                    self.task_info["target"],
                )
                spl = self.spl()
                if spl is None:
                    return {}
            else:
                # TODO
                dist2tget = -1  # self._get_distance_to_target()
                spl = self.spl() if len(self.episode_optimal_corners) > 1 else 0.0
            if dist2tget is None:
                return {}

            return {
                "success": self._success,  # False also if no path to target
                "ep_length": self.num_steps_taken(),
                "total_reward": total_reward,
                "dist_to_target": dist2tget,
                "spl": spl,
                "task_info": self.task_info,
            }
Пример #2
0
    def __init__(
        self,
        env: RoboThorEnvironment,
        sensors: List[Sensor],
        task_info: Dict[str, Any],
        max_steps: int,
        reward_configs: Dict[str, Any],
        distance_cache: Optional[Dict[str, Any]] = None,
        episode_info: Optional[Dict[str, Any]] = None,
        **kwargs
    ) -> None:
        super().__init__(
            env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
        )
        self.reward_configs = reward_configs
        self._took_end_action: bool = False
        self._success: Optional[bool] = False
        self.distance_cache = distance_cache

        if episode_info:
            self.episode_optimal_corners = episode_info["shortest_path"]
            dist = episode_info["shortest_path_length"]
        else:
            self.episode_optimal_corners = self.env.path_corners(
                task_info["target"]
            )  # assume it's valid (sampler must take care)!
            dist = self.env.path_corners_to_dist(self.episode_optimal_corners)
        if dist == float("inf"):
            dist = -1.0  # -1.0 for unreachable
            get_logger().warning(
                "No path for {} from {} to {}".format(
                    self.env.scene_name, self.env.agent_state(), task_info["target"]
                )
            )

        if self.distance_cache:
            self.last_geodesic_distance = get_distance(
                self.distance_cache, self.env.agent_state(), self.task_info["target"]
            )
        else:
            self.last_geodesic_distance = self.env.dist_to_point(
                self.task_info["target"]
            )

        self.optimal_distance = self.last_geodesic_distance
        self._rewards: List[float] = []
        self._distance_to_goal: List[float] = []
        self._metrics = None
        self.path: List[Any] = (
            []
        )  # the initial coordinate will be directly taken from the optimal path

        self.task_info["followed_path"] = [self.env.agent_state()]
        self.task_info["action_names"] = self.action_names()
        self.num_moves_made = 0
Пример #3
0
    def _is_goal_in_range(self) -> Optional[bool]:
        tget = self.task_info["target"]
        if self.distance_cache:
            dist = get_distance(self.distance_cache, self.env.agent_state(),
                                self.task_info["target"])
        else:
            dist = self.dist_to_target()

        if -0.5 < dist <= 0.2:
            return True
        elif dist > 0.2:
            return False
        else:
            get_logger().warning("No path for {} from {} to {}".format(
                self.env.scene_name, self.env.agent_state(), tget))
            return None
Пример #4
0
    def shaping(self) -> float:
        rew = 0.0

        if self.reward_configs["shaping_weight"] == 0.0:
            return rew

        if self.distance_cache:
            geodesic_distance = get_distance(self.distance_cache,
                                             self.env.agent_state(),
                                             self.task_info["target"])
        else:
            geodesic_distance = self.dist_to_target()

        if geodesic_distance == -1.0:
            geodesic_distance = self.last_geodesic_distance
        if (self.last_geodesic_distance > -0.5
                and geodesic_distance > -0.5):  # (robothor limits)
            rew += self.last_geodesic_distance - geodesic_distance
        self.last_geodesic_distance = geodesic_distance

        return rew * self.reward_configs["shaping_weight"]