Exemplo n.º 1
0
    def rotate(self, azimuth, axis=None):
        """Rotate the trackball about the "Up" axis by azimuth radians.

        Parameters
        ----------
        azimuth : float
            The number of radians to rotate.
        """
        target = self._target

        y_axis = self._n_T_camera_world.matrix[:3, 1].flatten()
        if axis is not None:
            y_axis = axis
        x_rot_mat = transformations.rotation_matrix(-azimuth, y_axis, target)
        x_rot_tf = RigidTransform(x_rot_mat[:3, :3],
                                  x_rot_mat[:3, 3],
                                  from_frame='world',
                                  to_frame='world')
        self._n_T_camera_world = x_rot_tf.dot(self._n_T_camera_world)

        y_axis = self._T_camera_world.matrix[:3, 1].flatten()
        if axis is not None:
            y_axis = axis
        x_rot_mat = transformations.rotation_matrix(-azimuth, y_axis, target)
        x_rot_tf = RigidTransform(x_rot_mat[:3, :3],
                                  x_rot_mat[:3, 3],
                                  from_frame='world',
                                  to_frame='world')
        self._T_camera_world = x_rot_tf.dot(self._T_camera_world)
Exemplo n.º 2
0
    def scroll(self, clicks):
        """Zoom using a mouse scroll wheel motion.

        Parameters
        ----------
        clicks : int
            The number of clicks. Positive numbers indicate forward wheel movement.
        """
        target = self._target
        ratio = 0.90

        z_axis = self._n_T_camera_world.matrix[:3, 2].flatten()
        eye = self._n_T_camera_world.matrix[:3, 3].flatten()
        radius = np.linalg.norm(eye - target)
        translation = clicks * (1 - ratio) * radius * z_axis
        t_tf = RigidTransform(translation=translation,
                              from_frame='world',
                              to_frame='world')
        self._n_T_camera_world = t_tf.dot(self._n_T_camera_world)

        z_axis = self._T_camera_world.matrix[:3, 2].flatten()
        eye = self._T_camera_world.matrix[:3, 3].flatten()
        radius = np.linalg.norm(eye - target)
        translation = clicks * (1 - ratio) * radius * z_axis
        t_tf = RigidTransform(translation=translation,
                              from_frame='world',
                              to_frame='world')
        self._T_camera_world = t_tf.dot(self._T_camera_world)
def main():
    # ====================================
    # parse arguments
    # ====================================
    parser = argparse.ArgumentParser(
        description='Generate segmentation images and updated json files.')
    parser.add_argument('-d',
                        '--data-dir',
                        default=os.getcwd(),
                        help='directory containing images and json files')
    parser.add_argument(
        '-o',
        '--object-settings',
        help=
        'object_settings file where the fixed_model_transform corresponds to the poses'
        'in the frame annotation jsons.'
        'default: <data_dir>/_object_settings.json')
    parser.add_argument(
        '-t',
        '--target-object-settings',
        help=
        'if given, transform all poses into the fixed_model_transform from this file.'
    )
    args = parser.parse_args()

    if not args.object_settings:
        args.object_settings = os.path.join(args.data_dir,
                                            '_object_settings.json')
    if not args.target_object_settings:
        args.target_object_settings = args.object_settings

    # =====================
    # parse object_settings
    # =====================
    with open(args.object_settings, 'r') as f:
        object_settings_json = json.load(f)
    with open(args.target_object_settings, 'r') as f:
        target_object_settings_json = json.load(f)

    if not len(object_settings_json['exported_objects']) == len(
            target_object_settings_json['exported_objects']):
        print "FATAL: object_settings and target_object_settings do not match!"
        sys.exit(-1)
    models = {}
    for model_json, target_model_json in zip(
            object_settings_json['exported_objects'],
            target_object_settings_json['exported_objects']):
        class_name = model_json['class']
        if not class_name == target_model_json['class']:
            print "FATAL: object_settings and target_object_settings do not match!"
            sys.exit(-1)
        segmentation_class_id = model_json['segmentation_class_id']
        cuboid_dimensions = np.array(model_json['cuboid_dimensions'])

        # calculate model_transform
        fixed_model_transform_mat = np.transpose(
            np.array(model_json['fixed_model_transform']))
        fixed_model_transform = RigidTransform(
            rotation=fixed_model_transform_mat[:3, :3],
            translation=fixed_model_transform_mat[:3, 3],
            from_frame='ycb_model',
            to_frame='source_model')
        target_fixed_model_transform_mat = np.transpose(
            np.array(target_model_json['fixed_model_transform']))
        target_fixed_model_transform = RigidTransform(
            rotation=target_fixed_model_transform_mat[:3, :3],
            translation=target_fixed_model_transform_mat[:3, 3],
            from_frame='ycb_model',
            to_frame='target_model_original')
        model_transform = fixed_model_transform.dot(
            target_fixed_model_transform.inverse())

        models[class_name] = ModelConfig(class_name, segmentation_class_id,
                                         model_transform, cuboid_dimensions)

    # ==================================================
    # parse camera_settings and set up camera intrinsics
    # ==================================================
    with open(os.path.join(args.data_dir, '_camera_settings.json'), 'r') as f:
        camera_settings_json = json.load(f)['camera_settings'][0]

    camera_intrinsics = CameraIntrinsics(
        frame='camera',
        fx=camera_settings_json['intrinsic_settings']['fx'],
        fy=camera_settings_json['intrinsic_settings']['fy'],
        cx=camera_settings_json['intrinsic_settings']['cx'],
        cy=camera_settings_json['intrinsic_settings']['cy'],
        skew=camera_settings_json['intrinsic_settings']['s'],
        height=camera_settings_json['captured_image_size']['height'],
        width=camera_settings_json['captured_image_size']['width'])

    # ====================================
    # process each frame
    # ====================================
    pattern = re.compile(r'\d{3,}.json')
    json_files = sorted([
        os.path.join(args.data_dir, f) for f in os.listdir(args.data_dir)
        if pattern.match(f)
    ])
    for json_file in json_files:
        filename_prefix = json_file[:-len('json')]
        print '\n---------------------- {}*'.format(filename_prefix)
        with open(json_file, 'r') as f:
            frame_json = json.load(f)

        updated_frame_json = process_frame(frame_json, camera_intrinsics,
                                           models)
        with open(filename_prefix + 'flipped.json', 'w') as f:
            json.dump(updated_frame_json, f, indent=2, sort_keys=True)
def process_frame(frame_json, camera_intrinsics, models):
    num_objects = len(frame_json['objects'])
    if len(frame_json['objects']) == 0:
        print "no objects in frame!"
        return frame_json

    # copy relevant fields of json
    updated_frame_json = {'camera_data': {}, 'objects': []}
    if frame_json['camera_data']:
        updated_frame_json['camera_data'][
            'location_worldframe'] = copy.deepcopy(
                frame_json['camera_data']['location_worldframe'])
        updated_frame_json['camera_data'][
            'quaternion_xyzw_worldframe'] = copy.deepcopy(
                frame_json['camera_data']['quaternion_xyzw_worldframe'])
    else:
        print 'Warning: no `camera_data` field!'

    for i in range(len(frame_json['objects'])):
        updated_frame_json['objects'].append({})
        updated_frame_json['objects'][i]['class'] = copy.deepcopy(
            frame_json['objects'][i]['class'])
        updated_frame_json['objects'][i]['bounding_box'] = copy.deepcopy(
            frame_json['objects'][i]['bounding_box'])
        updated_frame_json['objects'][i]['instance_id'] = copy.deepcopy(
            frame_json['objects'][i]['instance_id'])
        updated_frame_json['objects'][i]['visibility'] = copy.deepcopy(
            frame_json['objects'][i]['visibility'])

    # get object poses and flip symmetrical objects if necessary
    object_poses = []
    for object_index in range(num_objects):
        model_name = updated_frame_json['objects'][object_index]['class']
        scene_object_json = frame_json['objects'][object_index]
        (translation, rotation) = object_pose_from_json(scene_object_json)
        object_pose = RigidTransform(rotation=rotation,
                                     translation=translation,
                                     from_frame='source_model',
                                     to_frame='camera')
        object_pose = object_pose.dot(models[model_name].model_transform)

        # Handle symmetrical objects: if z axis (= x axis in mesh) points towards camera,
        # rotate by 180 degrees around x (= z axis in mesh).
        # This always keeps the "green/yellow" short side (in nvdu_viz) pointed towards
        # the camera; the "blue/magenta" short side should never be visible. This is necessary
        # for DOPE training, since both sides look the same because the KLT box is symmetrical.
        symmetry_flip_tf = RigidTransform(from_frame='target_model',
                                          to_frame='target_model_original')
        z_axis = rotation.dot(np.array([0.0, 0.0, 1.0], dtype=np.float64))
        if z_axis.dot(translation) < 0:
            symmetry_flip_tf.rotation = RigidTransform.x_axis_rotation(math.pi)
        object_pose = object_pose.dot(symmetry_flip_tf)

        object_poses.append(object_pose)

        updated_frame_json['objects'][object_index][
            'location'] = object_pose.translation.tolist()
        updated_frame_json['objects'][object_index][
            'quaternion_xyzw'] = np.roll(object_pose.quaternion, -1).tolist()

    # add pose_transform_permuted
    for object_index in range(num_objects):
        ptp_json = pose_transform_permuted_to_json(
            object_poses[object_index].translation,
            object_poses[object_index].rotation)
        updated_frame_json['objects'][object_index].update(ptp_json)

    # compute cuboid, cuboid_centroid, projected_cuboid, projected_cuboid_centroid, bounding_box
    for object_index in range(num_objects):
        model_name = updated_frame_json['objects'][object_index]['class']
        cuboid_json = get_cuboid(object_poses[object_index],
                                 models[model_name].cuboid_dimensions,
                                 camera_intrinsics)
        updated_frame_json['objects'][object_index].update(cuboid_json)

    return updated_frame_json
Exemplo n.º 5
0
    def drag(self, point):
        """Update the tracball during a drag.

        Parameters
        ----------
        point : (2,) int
            The current x and y pixel coordinates of the mouse during a drag.
            This will compute a movement for the trackball with the relative motion
            between this point and the one marked by down().
        """
        point = np.array(point, dtype=np.float32)
        dx, dy = point - self._pdown
        mindim = 0.3 * np.min(self._size)

        target = self._target
        x_axis = self._T_camera_world.matrix[:3, 0].flatten()
        y_axis = self._T_camera_world.matrix[:3, 1].flatten()
        z_axis = self._T_camera_world.matrix[:3, 2].flatten()
        eye = self._T_camera_world.matrix[:3, 3].flatten()

        # Interpret drag as a rotation
        if self._state == Trackball.STATE_ROTATE:
            x_angle = dx / mindim
            x_rot_mat = transformations.rotation_matrix(
                x_angle, y_axis, target)
            x_rot_tf = RigidTransform(x_rot_mat[:3, :3],
                                      x_rot_mat[:3, 3],
                                      from_frame='world',
                                      to_frame='world')

            y_angle = dy / mindim
            y_rot_mat = transformations.rotation_matrix(
                y_angle, x_axis, target)
            y_rot_tf = RigidTransform(y_rot_mat[:3, :3],
                                      y_rot_mat[:3, 3],
                                      from_frame='world',
                                      to_frame='world')

            self._n_T_camera_world = y_rot_tf.dot(
                x_rot_tf.dot(self._T_camera_world))

        # Interpret drag as a roll about the camera axis
        elif self._state == Trackball.STATE_ROLL:
            center = self._size / 2.0
            v_init = self._pdown - center
            v_curr = point - center
            v_init = v_init / np.linalg.norm(v_init)
            v_curr = v_curr / np.linalg.norm(v_curr)

            theta = np.arctan2(v_curr[1], v_curr[0]) - np.arctan2(
                v_init[1], v_init[0])

            rot_mat = transformations.rotation_matrix(theta, z_axis, target)
            rot_tf = RigidTransform(rot_mat[:3, :3],
                                    rot_mat[:3, 3],
                                    from_frame='world',
                                    to_frame='world')

            self._n_T_camera_world = rot_tf.dot(self._T_camera_world)

        # Interpret drag as a camera pan in view plane
        elif self._state == Trackball.STATE_PAN:
            dx = -dx / (5.0 * mindim) * self._scale
            dy = dy / (5.0 * mindim) * self._scale

            translation = dx * x_axis + dy * y_axis
            self._n_target = self._target + translation
            t_tf = RigidTransform(translation=translation,
                                  from_frame='world',
                                  to_frame='world')
            self._n_T_camera_world = t_tf.dot(self._T_camera_world)

        # Interpret drag as a zoom motion
        elif self._state == Trackball.STATE_ZOOM:
            radius = np.linalg.norm(eye - target)
            ratio = 0.0
            if dy < 0:
                ratio = np.exp(abs(dy) / (0.5 * self._size[1])) - 1.0
            elif dy > 0:
                ratio = 1.0 - np.exp(-dy / (0.5 * (self._size[1])))
            translation = np.sign(dy) * ratio * radius * z_axis
            t_tf = RigidTransform(translation=translation,
                                  from_frame='world',
                                  to_frame='world')
            self._n_T_camera_world = t_tf.dot(self._T_camera_world)
Exemplo n.º 6
0
    translation=fixed_model_transform_mat[:3, 3],
    from_frame='ycb_model',
    to_frame='source_model')

fmt = {}
for j in target_model_json['exported_objects']:
    if j['class'] == MODEL_NAME:
        fmt = j['fixed_model_transform']
        break
target_fixed_model_transform_mat = np.transpose(np.array(fmt))
target_fixed_model_transform = RigidTransform(
    rotation=target_fixed_model_transform_mat[:3, :3],
    translation=target_fixed_model_transform_mat[:3, 3],
    from_frame='ycb_model',
    to_frame='target_model')
model_transform = fixed_model_transform.dot(
    target_fixed_model_transform.inverse())

(translation,
 rotation) = object_pose_from_json(reference_source_json['objects'][0])
reference_source_pose = RigidTransform(rotation=rotation,
                                       translation=translation,
                                       from_frame='target_model',
                                       to_frame='camera')

(translation,
 rotation) = object_pose_from_json(reference_target_json['objects'][0])
reference_target_pose = RigidTransform(rotation=rotation,
                                       translation=translation,
                                       from_frame='target_model_fixed',
                                       to_frame='camera')