示例#1
0
 def compute_reward(self, achieved_goal, desired_goal,
                    info: Dict[str, Any]) -> Union[np.ndarray, float]:
     d = distance(achieved_goal, desired_goal)
     if self.reward_type == "sparse":
         return -np.array(d > self.distance_threshold, dtype=np.float64)
     else:
         return -d
示例#2
0
 def is_success(self, achieved_goal: np.ndarray, desired_goal: np.ndarray) -> Union[np.ndarray, float]:
     d = distance(achieved_goal, desired_goal)
     return np.array(d < self.distance_threshold, dtype=np.float64)