示例#1
0
    def test_bbox_transform(self):
        # TODO: test correct-ness
        sizes = [12]
        strides = [8]
        ratios = np.array([1, 2], K.floatx())
        scales = np.array([1, 2], K.floatx())
        anchor_params = utils.AnchorParameters(sizes, strides, ratios, scales)

        pyramid_levels = [3]
        image_shape = (16, 16)
        anchors = utils.anchors_for_shape(image_shape,
                                          pyramid_levels=pyramid_levels,
                                          anchor_params=anchor_params)

        # test custom std/mean
        targets = utils.bbox_transform(anchors,
                                       np.random.random((1, 4)),
                                       mean=[0],
                                       std=[0.2])

        self.assertTupleEqual(targets.shape, (16, 4))

        # test bad `mean` value
        with self.assertRaises(ValueError):
            utils.bbox_transform(anchors, [1], mean='invalid', std=None)
        # test image / annotation not empty
        with self.assertRaises(ValueError):
            utils.bbox_transform(anchors, [1], mean=None, std='invalid')
示例#2
0
    def test_anchor_targets_bbox(self):
        # TODO: test correct-ness
        sizes = [12]
        strides = [8]
        ratios = np.array([1, 2], K.floatx())
        scales = np.array([1, 2], K.floatx())
        anchor_params = utils.AnchorParameters(sizes, strides, ratios, scales)

        pyramid_levels = [3]
        image_shape = (16, 16)
        anchors = utils.anchors_for_shape(image_shape,
                                          pyramid_levels=pyramid_levels,
                                          anchor_params=anchor_params)

        # test image / annotation size mismatch
        with self.assertRaises(ValueError):
            utils.anchor_targets_bbox(anchors, [1], [1, 2, 3], 1)
        # test image / annotation not empty
        with self.assertRaises(ValueError):
            utils.anchor_targets_bbox(anchors, [], [], 1)
        # test annotation structure
        with self.assertRaises(ValueError):
            utils.anchor_targets_bbox(anchors, [1], [{'labels': 1}], 1)
        with self.assertRaises(ValueError):
            utils.anchor_targets_bbox(anchors, [1], [{'bboxes': 1}], 1)
示例#3
0
    def test_anchors_for_shape_dimensions(self):
        sizes = [32, 64, 128]
        strides = [8, 16, 32]
        ratios = np.array([0.5, 1, 2, 3], K.floatx())
        scales = np.array([1, 1.2, 1.6], K.floatx())
        anchor_params = utils.AnchorParameters(sizes, strides, ratios, scales)

        pyramid_levels = [3, 4, 5]
        image_shape = tensor_shape.TensorShape((64, 64))
        all_anchors = utils.anchors_for_shape(image_shape,
                                              pyramid_levels=pyramid_levels,
                                              anchor_params=anchor_params)

        self.assertTupleEqual(all_anchors.shape, (1008, 4))
        self.assertEqual(anchor_params.num_anchors(), 12)
示例#4
0
    def test_anchors_for_shape_values(self):
        sizes = [12]
        strides = [8]
        ratios = np.array([1, 2], K.floatx())
        scales = np.array([1, 2], K.floatx())
        anchor_params = utils.AnchorParameters(sizes, strides, ratios, scales)

        pyramid_levels = [3]
        image_shape = (16, 16)
        all_anchors = utils.anchors_for_shape(image_shape,
                                              pyramid_levels=pyramid_levels,
                                              anchor_params=anchor_params)

        # using almost_equal for floating point imprecisions
        self.assertAllClose(all_anchors[0, :], [
            strides[0] / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2,
            strides[0] / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2,
            strides[0] / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2,
            strides[0] / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2,
        ])
        self.assertAllClose(all_anchors[1, :], [
            strides[0] / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2,
            strides[0] / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2,
            strides[0] / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2,
            strides[0] / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2,
        ])
        self.assertAllClose(all_anchors[2, :], [
            strides[0] / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2,
            strides[0] / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2,
            strides[0] / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2,
            strides[0] / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2,
        ])
        self.assertAllClose(all_anchors[3, :], [
            strides[0] / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2,
            strides[0] / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2,
            strides[0] / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2,
            strides[0] / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2,
        ])
        self.assertAllClose(all_anchors[4, :], [
            strides[0] * 3 / 2 -
            (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2,
            strides[0] / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2,
            strides[0] * 3 / 2 +
            (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2,
            strides[0] / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2,
        ])
        self.assertAllClose(all_anchors[5, :], [
            strides[0] * 3 / 2 -
            (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2,
            strides[0] / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2,
            strides[0] * 3 / 2 +
            (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2,
            strides[0] / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2,
        ])
        self.assertAllClose(all_anchors[6, :], [
            strides[0] * 3 / 2 -
            (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2,
            strides[0] / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2,
            strides[0] * 3 / 2 +
            (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2,
            strides[0] / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2,
        ])
        self.assertAllClose(all_anchors[7, :], [
            strides[0] * 3 / 2 -
            (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2,
            strides[0] / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2,
            strides[0] * 3 / 2 +
            (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2,
            strides[0] / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2,
        ])
        self.assertAllClose(all_anchors[8, :], [
            strides[0] / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2,
            strides[0] * 3 / 2 -
            (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2,
            strides[0] / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2,
            strides[0] * 3 / 2 +
            (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2,
        ])
        self.assertAllClose(all_anchors[9, :], [
            strides[0] / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2,
            strides[0] * 3 / 2 -
            (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2,
            strides[0] / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2,
            strides[0] * 3 / 2 +
            (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2,
        ])
        self.assertAllClose(all_anchors[10, :], [
            strides[0] / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2,
            strides[0] * 3 / 2 -
            (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2,
            strides[0] / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2,
            strides[0] * 3 / 2 +
            (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2,
        ])
        self.assertAllClose(all_anchors[11, :], [
            strides[0] / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2,
            strides[0] * 3 / 2 -
            (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2,
            strides[0] / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2,
            strides[0] * 3 / 2 +
            (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2,
        ])
        self.assertAllClose(all_anchors[12, :], [
            strides[0] * 3 / 2 -
            (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2,
            strides[0] * 3 / 2 -
            (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2,
            strides[0] * 3 / 2 +
            (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2,
            strides[0] * 3 / 2 +
            (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2,
        ])
        self.assertAllClose(all_anchors[13, :], [
            strides[0] * 3 / 2 -
            (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2,
            strides[0] * 3 / 2 -
            (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2,
            strides[0] * 3 / 2 +
            (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2,
            strides[0] * 3 / 2 +
            (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2,
        ])
        self.assertAllClose(all_anchors[14, :], [
            strides[0] * 3 / 2 -
            (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2,
            strides[0] * 3 / 2 -
            (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2,
            strides[0] * 3 / 2 +
            (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2,
            strides[0] * 3 / 2 +
            (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2,
        ])
        self.assertAllClose(all_anchors[15, :], [
            strides[0] * 3 / 2 -
            (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2,
            strides[0] * 3 / 2 -
            (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2,
            strides[0] * 3 / 2 +
            (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2,
            strides[0] * 3 / 2 +
            (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2,
        ])
示例#5
0
    def _get_batches_of_transformed_samples(self, index_array):
        if self.data_format == 'channels_first':
            batch_x = np.zeros(
                (len(index_array), self.x.shape[1], self.frames_per_batch,
                 self.x.shape[3], self.x.shape[4]))
        else:
            batch_x = np.zeros(
                tuple([len(index_array), self.frames_per_batch] +
                      list(self.x.shape)[2:]))

        if self.panoptic:
            if self.data_format == 'channels_first':
                batch_y_semantic_list = [
                    np.zeros(
                        tuple([
                            len(index_array), y_semantic.shape[1],
                            self.frames_per_batch, y_semantic.shape[3],
                            y_semantic.shape[4]
                        ])) for y_semantic in self.y_semantic_list
                ]
            else:
                batch_y_semantic_list = [
                    np.zeros(
                        tuple([len(index_array), self.frames_per_batch] +
                              list(y_semantic.shape[2:])))
                    for y_semantic in self.y_semantic_list
                ]

        annotations_list = [[] for _ in range(self.frames_per_batch)]

        max_shape = []

        for i, j in enumerate(index_array):
            last_frame = self.x.shape[self.time_axis] - self.frames_per_batch
            time_start = np.random.randint(0, high=last_frame)
            time_end = time_start + self.frames_per_batch
            times = list(np.arange(time_start, time_end))

            if self.time_axis == 1:
                x = self.x[j, time_start:time_end, ...]
                y = self.y[j, time_start:time_end, ...]
            elif self.time_axis == 2:
                x = self.x[j, :, time_start:time_end, ...]
                y = self.y[j, :, time_start:time_end, ...]

            if self.panoptic:
                if self.time_axis == 1:
                    y_semantic_list = [
                        y_semantic[j, time_start:time_end, ...]
                        for y_semantic in self.y_semantic_list
                    ]
                elif self.time_axis == 2:
                    y_semantic_list = [
                        y_semantic[j, :, time_start:time_end, ...]
                        for y_semantic in self.y_semantic_list
                    ]

            # Apply transformation
            if self.panoptic:
                x, y_list = self.movie_data_generator.random_transform(
                    x, [y] + y_semantic_list)
                y = y_list[0]
                y_semantic_list = y_list[1:]
            else:
                x, y = self.movie_data_generator.random_transform(x, y)

            x = self.movie_data_generator.standardize(x)

            # Find max shape of image data.  Used for masking.
            if not max_shape:
                max_shape = list(x.shape)
            else:
                for k in range(len(x.shape)):
                    if x.shape[k] > max_shape[k]:
                        max_shape[k] = x.shape[k]

            # Get the bounding boxes from the transformed masks!
            for idx_time, time in enumerate(times):
                if self.time_axis == 1:
                    annotations = self.load_annotations(y[idx_time])
                elif self.time_axis == 2:
                    annotations = self.load_annotations(y[:, idx_time, ...])
                annotations_list[idx_time].append(annotations)

            batch_x[i] = x

            if self.panoptic:
                for k in range(len(y_semantic_list)):
                    batch_y_semantic_list[k][i] = y_semantic_list[k]

        if self.data_format == 'channels_first':
            batch_x_shape = [
                batch_x.shape[1], batch_x.shape[3], batch_x.shape[4]
            ]
        else:
            batch_x_shape = batch_x.shape[2:]

        anchors = anchors_for_shape(batch_x_shape,
                                    pyramid_levels=self.pyramid_levels,
                                    anchor_params=self.anchor_params,
                                    shapes_callback=self.compute_shapes)

        regressions_list = []
        labels_list = []

        if self.data_format == 'channels_first':
            batch_x_frame = batch_x[:, :, 0, ...]
        else:
            batch_x_frame = batch_x[:, 0, ...]
        for idx, time in enumerate(times):
            regressions, labels = anchor_targets_bbox(anchors, batch_x_frame,
                                                      annotations_list[idx],
                                                      self.num_classes)
            regressions_list.append(regressions)
            labels_list.append(labels)

        regressions = np.stack(regressions_list, axis=self.time_axis)
        labels = np.stack(labels_list, axis=self.time_axis)

        # was a list for max shape indexing
        max_shape = tuple(
            [max_shape[self.row_axis - 1], max_shape[self.col_axis - 1]])

        if self.include_masks:
            # masks_batch has shape: (batch size, max_annotations,
            #     bbox_x1 + bbox_y1 + bbox_x2 + bbox_y2 + label +
            #     width + height + max_image_dimension)

            flatten = lambda l: [item for sublist in l for item in sublist]
            annotations_list_flatten = flatten(annotations_list)
            max_annotations = max(
                len(a['masks']) for a in annotations_list_flatten)
            masks_batch_shape = (len(index_array), self.frames_per_batch,
                                 max_annotations,
                                 5 + 2 + max_shape[0] * max_shape[1])
            masks_batch = np.zeros(masks_batch_shape, dtype=K.floatx())

            for idx_time, time in enumerate(times):
                annotations_frame = annotations_list[idx_time]
                for idx_batch, ann in enumerate(annotations_frame):
                    masks_batch[
                        idx_batch,
                        idx_time, :ann['bboxes'].shape[0], :4] = ann['bboxes']
                    masks_batch[idx_batch, idx_time, :ann['labels'].shape[0],
                                4] = ann['labels']
                    masks_batch[idx_batch, idx_time, :,
                                5] = max_shape[1]  # width
                    masks_batch[idx_batch, idx_time, :,
                                6] = max_shape[0]  # height

                    # add flattened mask
                    for idx_mask, mask in enumerate(ann['masks']):
                        masks_batch[idx_batch, idx_time, idx_mask,
                                    7:] = mask.flatten()

        if self.save_to_dir:
            for i, j in enumerate(index_array):
                for frame in range(batch_x.shape[self.time_axis]):
                    if self.time_axis == 2:
                        img = array_to_img(batch_x[i, :, frame],
                                           self.data_format,
                                           scale=True)
                    else:
                        img = array_to_img(batch_x[i, frame],
                                           self.data_format,
                                           scale=True)
                    fname = '{prefix}_{index}_{hash}.{format}'.format(
                        prefix=self.save_prefix,
                        index=j,
                        hash=np.random.randint(1e4),
                        format=self.save_format)
                    img.save(os.path.join(self.save_to_dir, fname))

        batch_outputs = [regressions, labels]
        if self.include_masks:
            batch_outputs.append(masks_batch)
        if self.include_final_detection_layer:
            batch_outputs.append(masks_batch)
        if self.panoptic:
            batch_outputs += batch_y_semantic_list

        return batch_x, batch_outputs
示例#6
0
    def _get_batches_of_transformed_samples(self, index_array):
        batch_x = np.zeros(tuple([len(index_array)] + list(self.x.shape)[1:]))

        batch_y_semantic_list = []
        for y_sem in self.y_semantic_list:
            shape = tuple([len(index_array)] + list(y_sem.shape[1:]))
            batch_y_semantic_list.append(np.zeros(shape, dtype=y_sem.dtype))

        annotations_list = []

        max_shape = []

        for i, j in enumerate(index_array):
            x = self.x[j]
            y = self.y[j]

            y_semantic_list = [y_sem[j] for y_sem in self.y_semantic_list]

            # Apply transformation
            x, y_list = self.image_data_generator.random_transform(
                x, [y] + y_semantic_list)

            y = y_list[0]
            y_semantic_list = y_list[1:]

            # Find max shape of image data.  Used for masking.
            if not max_shape:
                max_shape = list(x.shape)
            else:
                for k in range(len(x.shape)):
                    if x.shape[k] > max_shape[k]:
                        max_shape[k] = x.shape[k]

            # Get the bounding boxes from the transformed masks!
            annotations = self.load_annotations(y)
            annotations_list.append(annotations)

            x = self.image_data_generator.standardize(x)

            batch_x[i] = x

            for k, y_sem in enumerate(y_semantic_list):
                batch_y_semantic_list[k][i] = y_sem

        anchors = anchors_for_shape(batch_x.shape[1:],
                                    pyramid_levels=self.pyramid_levels,
                                    anchor_params=self.anchor_params,
                                    shapes_callback=self.compute_shapes)

        regressions, labels = anchor_targets_bbox(anchors, batch_x,
                                                  annotations_list,
                                                  self.num_classes)

        max_shape = tuple(max_shape)  # was a list for max shape indexing

        if self.include_masks:
            # masks_batch has shape: (batch size, max_annotations,
            #     bbox_x1 + bbox_y1 + bbox_x2 + bbox_y2 + label +
            #     width + height + max_image_dimension)
            max_annotations = max(len(a['masks']) for a in annotations_list)
            masks_batch_shape = (len(index_array), max_annotations,
                                 5 + 2 + max_shape[0] * max_shape[1])
            masks_batch = np.zeros(masks_batch_shape, dtype=K.floatx())

            for i, ann in enumerate(annotations_list):
                masks_batch[i, :ann['bboxes'].shape[0], :4] = ann['bboxes']
                masks_batch[i, :ann['labels'].shape[0], 4] = ann['labels']
                masks_batch[i, :, 5] = max_shape[1]  # width
                masks_batch[i, :, 6] = max_shape[0]  # height

                # add flattened mask
                for j, mask in enumerate(ann['masks']):
                    masks_batch[i, j, 7:] = mask.flatten()

        if self.save_to_dir:
            for i, j in enumerate(index_array):
                if self.data_format == 'channels_first':
                    img_x = np.expand_dims(batch_x[i, 0, ...], 0)
                else:
                    img_x = np.expand_dims(batch_x[i, ..., 0], -1)
                img = array_to_img(img_x, self.data_format, scale=True)
                fname = '{prefix}_{index}_{hash}.{format}'.format(
                    prefix=self.save_prefix,
                    index=j,
                    hash=np.random.randint(1e4),
                    format=self.save_format)
                img.save(os.path.join(self.save_to_dir, fname))

        batch_outputs = [regressions, labels]

        if self.include_masks:
            batch_outputs.append(masks_batch)

        batch_outputs.extend(batch_y_semantic_list)

        return batch_x, batch_outputs