Exemplo n.º 1
0
    def get_future_egomotion(self, rec, index):
        rec_t0 = rec

        # Identity
        future_egomotion = np.eye(4, dtype=np.float32)

        if index < len(self.ixes) - 1:
            rec_t1 = self.ixes[index + 1]

            if rec_t0['scene_token'] == rec_t1['scene_token']:
                egopose_t0 = self.nusc.get(
                    'ego_pose',
                    self.nusc.get(
                        'sample_data',
                        rec_t0['data']['LIDAR_TOP'])['ego_pose_token'])
                egopose_t1 = self.nusc.get(
                    'ego_pose',
                    self.nusc.get(
                        'sample_data',
                        rec_t1['data']['LIDAR_TOP'])['ego_pose_token'])

                egopose_t0 = convert_egopose_to_matrix_numpy(egopose_t0)
                egopose_t1 = convert_egopose_to_matrix_numpy(egopose_t1)

                future_egomotion = invert_matrix_egopose_numpy(egopose_t1).dot(
                    egopose_t0)
                future_egomotion[3, :3] = 0.0
                future_egomotion[3, 3] = 1.0

        future_egomotion = torch.Tensor(future_egomotion).float()

        # Convert to 6DoF vector
        future_egomotion = mat2pose_vec(future_egomotion)
        return future_egomotion.unsqueeze(0)
Exemplo n.º 2
0
    def __getitem__(self, index):
        """
        Returns
        -------
            data: dict with the following keys:
                image: torch.Tensor<float> (T, N, 3, H, W)
                    normalised cameras images with T the sequence length, and N the number of cameras.
                intrinsics: torch.Tensor<float> (T, N, 3, 3)
                    intrinsics containing resizing and cropping parameters.
                extrinsics: torch.Tensor<float> (T, N, 4, 4)
                    6 DoF pose from world coordinates to camera coordinates.
                segmentation: torch.Tensor<int64> (T, 1, H_bev, W_bev)
                    (H_bev, W_bev) are the pixel dimensions in bird's-eye view.
                instance: torch.Tensor<int64> (T, 1, H_bev, W_bev)
                centerness: torch.Tensor<float> (T, 1, H_bev, W_bev)
                offset: torch.Tensor<float> (T, 2, H_bev, W_bev)
                flow: torch.Tensor<float> (T, 2, H_bev, W_bev)
                future_egomotion: torch.Tensor<float> (T, 6)
                    6 DoF egomotion t -> t+1
                sample_token: List<str> (T,)
                'z_position': list_z_position,
                'attribute': list_attribute_label,

        """
        data = {}
        keys = [
            'image', 'intrinsics', 'extrinsics', 'segmentation', 'instance',
            'centerness', 'offset', 'flow', 'future_egomotion', 'sample_token',
            'z_position', 'attribute'
        ]
        for key in keys:
            data[key] = []

        instance_map = {}
        # Loop over all the frames in the sequence.
        for index_t in self.indices[index]:
            rec = self.ixes[index_t]

            images, intrinsics, extrinsics = self.get_input_data(rec)
            segmentation, instance, z_position, instance_map, attribute_label = self.get_label(
                rec, instance_map)

            future_egomotion = self.get_future_egomotion(rec, index_t)

            data['image'].append(images)
            data['intrinsics'].append(intrinsics)
            data['extrinsics'].append(extrinsics)
            data['segmentation'].append(segmentation)
            data['instance'].append(instance)
            data['future_egomotion'].append(future_egomotion)
            data['sample_token'].append(rec['token'])
            data['z_position'].append(z_position)
            data['attribute'].append(attribute_label)

        for key, value in data.items():
            if key in ['sample_token', 'centerness', 'offset', 'flow']:
                continue
            data[key] = torch.cat(value, dim=0)

        # If lyft need to subsample, and update future_egomotions
        if self.cfg.MODEL.SUBSAMPLE:
            for key, value in data.items():
                if key in [
                        'future_egomotion', 'sample_token', 'centerness',
                        'offset', 'flow'
                ]:
                    continue
                data[key] = data[key][::2].clone()
            data['sample_token'] = data['sample_token'][::2]

            # Update future egomotions
            future_egomotions_matrix = pose_vec2mat(data['future_egomotion'])
            future_egomotion_accum = torch.zeros_like(future_egomotions_matrix)
            future_egomotion_accum[:
                                   -1] = future_egomotions_matrix[:
                                                                  -1] @ future_egomotions_matrix[
                                                                      1:]
            future_egomotion_accum = mat2pose_vec(future_egomotion_accum)
            data['future_egomotion'] = future_egomotion_accum[::2].clone()

        instance_centerness, instance_offset, instance_flow = convert_instance_mask_to_center_and_offset_label(
            data['instance'],
            data['future_egomotion'],
            num_instances=len(instance_map),
            ignore_index=self.cfg.DATASET.IGNORE_INDEX,
            subtract_egomotion=True,
            spatial_extent=self.spatial_extent,
        )
        data['centerness'] = instance_centerness
        data['offset'] = instance_offset
        data['flow'] = instance_flow
        return data
Exemplo n.º 3
0
def convert_instance_mask_to_center_and_offset_label(instance_img, future_egomotion, num_instances, ignore_index=255,
                                                     subtract_egomotion=True, sigma=3, spatial_extent=None):
    seq_len, h, w = instance_img.shape
    center_label = torch.zeros(seq_len, 1, h, w)
    offset_label = ignore_index * torch.ones(seq_len, 2, h, w)
    future_displacement_label = ignore_index * torch.ones(seq_len, 2, h, w)
    # x is vertical displacement, y is horizontal displacement
    x, y = torch.meshgrid(torch.arange(h, dtype=torch.float), torch.arange(w, dtype=torch.float))

    if subtract_egomotion:
        future_egomotion_inv = mat2pose_vec(pose_vec2mat(future_egomotion).inverse())

    # Compute warped instance segmentation
    warped_instance_seg = {}
    for t in range(1, seq_len):
        warped_inst_t = warp_features(instance_img[t].unsqueeze(0).unsqueeze(1).float(),
                                      future_egomotion_inv[t - 1].unsqueeze(0), mode='nearest',
                                      spatial_extent=spatial_extent)
        warped_instance_seg[t] = warped_inst_t[0, 0]

    # Ignore id 0 which is the background
    for instance_id in range(1, num_instances+1):
        prev_xc = None
        prev_yc = None
        prev_mask = None
        for t in range(seq_len):
            instance_mask = (instance_img[t] == instance_id)
            if instance_mask.sum() == 0:
                # this instance is not in this frame
                prev_xc = None
                prev_yc = None
                prev_mask = None
                continue

            xc = x[instance_mask].mean().round().long()
            yc = y[instance_mask].mean().round().long()

            off_x = xc - x
            off_y = yc - y
            g = torch.exp(-(off_x ** 2 + off_y ** 2) / sigma ** 2)
            center_label[t, 0] = torch.maximum(center_label[t, 0], g)
            offset_label[t, 0, instance_mask] = off_x[instance_mask]
            offset_label[t, 1, instance_mask] = off_y[instance_mask]

            if prev_xc is not None:
                # old method
                # cur_pt = torch.stack((xc, yc)).unsqueeze(0).float()
                # if subtract_egomotion:
                #     cur_pt = warp_points(cur_pt, future_egomotion_inv[t - 1])
                # cur_pt = cur_pt.squeeze(0)

                warped_instance_mask = warped_instance_seg[t] == instance_id
                if warped_instance_mask.sum() > 0:
                    warped_xc = x[warped_instance_mask].mean().round()
                    warped_yc = y[warped_instance_mask].mean().round()

                    delta_x = warped_xc - prev_xc
                    delta_y = warped_yc - prev_yc
                    future_displacement_label[t - 1, 0, prev_mask] = delta_x
                    future_displacement_label[t - 1, 1, prev_mask] = delta_y

            prev_xc = xc
            prev_yc = yc
            prev_mask = instance_mask

    return center_label, offset_label, future_displacement_label