def test_evaluate_state_difficulty_4(self): difficulty = 4 pose_origin = move_cube.Pose() pose_trans = move_cube.Pose(position=[1, 2, 3]) pose_rot = move_cube.Pose( orientation=Rotation.from_euler("z", 0.42).as_quat() ) pose_both = move_cube.Pose( [1, 2, 3], Rotation.from_euler("z", 0.42).as_quat() ) # needs to be zero for exact match cost = move_cube.evaluate_state(pose_origin, pose_origin, difficulty) self.assertEqual(cost, 0) # None-zero if there is translation, rotation or both self.assertNotEqual( move_cube.evaluate_state(pose_origin, pose_trans, difficulty), 0 ) self.assertNotEqual( move_cube.evaluate_state(pose_origin, pose_rot, difficulty), 0 ) self.assertNotEqual( move_cube.evaluate_state(pose_origin, pose_both, difficulty), 0 )
def compute_reward(self, achieved_goal, desired_goal, info): """Compute the reward for the given achieved and desired goal. Args: achieved_goal : Current pose of the object. desired_goal : Goal pose of the object. info : An info dictionary containing a field "difficulty" which specifies the difficulty level. Returns: float: The reward that corresponds to the provided achieved goal w.r.t. to the desired goal. Note that the following should always hold true:: ob, reward, done, info = env.step() assert reward == env.compute_reward( ob['achieved_goal'], ob['desired_goal'], info, ) """ if self.sparse_reward: return -np.float32((move_cube.evaluate_state( move_cube.Pose(desired_goal[0:3], desired_goal[3:7]), move_cube.Pose(achieved_goal[0:3], achieved_goal[3:7]), info['difficulty'], ) > 0.01)) else: return move_cube.evaluate_state( move_cube.Pose.from_dict(desired_goal), move_cube.Pose.from_dict(achieved_goal), info['difficulty'], )
def _competition_reward(observation, difficulty): object_pose = move_cube.Pose( observation['object_position'], observation['object_orientation'] ) goal_pose = move_cube.Pose( observation['goal_object_position'], observation['goal_object_orientation'] ) return -move_cube.evaluate_state(goal_pose, object_pose, difficulty)
def main(): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument( "--logfile", "-l", required=True, type=str, help="Path to the log file.", ) parser.add_argument( "--difficulty", "-d", required=True, type=int, help="The difficulty level of the goal (for reward computation).", ) parser.add_argument( "--initial-pose", "-i", required=True, type=str, metavar="JSON", help="Initial pose of the cube as JSON string.", ) parser.add_argument( "--goal-pose", "-g", required=True, type=str, metavar="JSON", help="Goal pose of the cube as JSON string.", ) args = parser.parse_args() with open(args.logfile, "rb") as fh: log = pickle.load(fh) initial_object_pose = move_cube.Pose.from_json(args.initial_pose) goal_pose = move_cube.Pose.from_json(args.goal_pose) # verify that the initial object pose matches with the one in the log file try: np.testing.assert_array_almost_equal( initial_object_pose.position, log["initial_object_pose"].position, err_msg=( "Given initial object position does not match with log file.")) np.testing.assert_array_almost_equal( initial_object_pose.orientation, log["initial_object_pose"].orientation, err_msg=( "Given initial object orientation does not match with log file." )) except AssertionError as e: print("Failed.", file=sys.stderr) print(e, file=sys.stderr) sys.exit(1) platform = trifinger_platform.TriFingerPlatform( visualization=False, initial_object_pose=initial_object_pose) # verify that the robot is initialized to the same position as in the log # file initial_robot_position = platform.get_robot_observation(0).position try: np.testing.assert_array_almost_equal( initial_robot_position, log["initial_robot_position"], err_msg=("Initial robot position does not match with log file.")) except AssertionError as e: print("Failed.", file=sys.stderr) print(e, file=sys.stderr) sys.exit(1) # verify that the number of logged actions matches with the episode length n_actions = len(log["actions"]) assert ( n_actions == move_cube.episode_length ), "Number of actions in log does not match with expected episode length." accumulated_reward = 0 for logged_action in log["actions"]: action = logged_action["action"] t = platform.append_desired_action(action) robot_obs = platform.get_robot_observation(t) cube_pose = platform.get_object_pose(t) reward = -move_cube.evaluate_state(goal_pose, cube_pose, args.difficulty) accumulated_reward += reward assert logged_action["t"] == t np.testing.assert_array_almost_equal( robot_obs.position, logged_action["robot_observation"].position, err_msg=("Step %d: Recorded robot position does not match with" " the one achieved by the replay" % t)) np.testing.assert_array_almost_equal( robot_obs.torque, logged_action["robot_observation"].torque, err_msg=("Step %d: Recorded robot torque does not match with" " the one achieved by the replay" % t)) np.testing.assert_array_almost_equal( robot_obs.velocity, logged_action["robot_observation"].velocity, err_msg=("Step %d: Recorded robot velocity does not match with" " the one achieved by the replay" % t)) np.testing.assert_array_almost_equal( cube_pose.position, logged_action["object_pose"].position, err_msg=("Step %d: Recorded object position does not match with" " the one achieved by the replay" % t)) np.testing.assert_array_almost_equal( cube_pose.orientation, logged_action["object_pose"].orientation, err_msg=("Step %d: Recorded object orientation does not match with" " the one achieved by the replay" % t)) cube_pose = platform.get_object_pose(t) final_pose = log["final_object_pose"]["pose"] print("Accumulated Reward:", accumulated_reward) # verify that actual and logged final object pose match try: np.testing.assert_array_almost_equal( cube_pose.position, final_pose.position, decimal=3, err_msg=("Recorded object position does not match with the one" " achieved by the replay")) np.testing.assert_array_almost_equal( cube_pose.orientation, final_pose.orientation, decimal=3, err_msg=("Recorded object orientation does not match with the one" " achieved by the replay")) except AssertionError as e: print("Failed.", file=sys.stderr) print(e, file=sys.stderr) sys.exit(1) print("Passed.")