def test_bbox_transform(self): # TODO: test correct-ness sizes = [12] strides = [8] ratios = np.array([1, 2], K.floatx()) scales = np.array([1, 2], K.floatx()) anchor_params = utils.AnchorParameters(sizes, strides, ratios, scales) pyramid_levels = [3] image_shape = (16, 16) anchors = utils.anchors_for_shape(image_shape, pyramid_levels=pyramid_levels, anchor_params=anchor_params) # test custom std/mean targets = utils.bbox_transform(anchors, np.random.random((1, 4)), mean=[0], std=[0.2]) self.assertTupleEqual(targets.shape, (16, 4)) # test bad `mean` value with self.assertRaises(ValueError): utils.bbox_transform(anchors, [1], mean='invalid', std=None) # test image / annotation not empty with self.assertRaises(ValueError): utils.bbox_transform(anchors, [1], mean=None, std='invalid')
def test_anchor_targets_bbox(self): # TODO: test correct-ness sizes = [12] strides = [8] ratios = np.array([1, 2], K.floatx()) scales = np.array([1, 2], K.floatx()) anchor_params = utils.AnchorParameters(sizes, strides, ratios, scales) pyramid_levels = [3] image_shape = (16, 16) anchors = utils.anchors_for_shape(image_shape, pyramid_levels=pyramid_levels, anchor_params=anchor_params) # test image / annotation size mismatch with self.assertRaises(ValueError): utils.anchor_targets_bbox(anchors, [1], [1, 2, 3], 1) # test image / annotation not empty with self.assertRaises(ValueError): utils.anchor_targets_bbox(anchors, [], [], 1) # test annotation structure with self.assertRaises(ValueError): utils.anchor_targets_bbox(anchors, [1], [{'labels': 1}], 1) with self.assertRaises(ValueError): utils.anchor_targets_bbox(anchors, [1], [{'bboxes': 1}], 1)
def test_anchors_for_shape_dimensions(self): sizes = [32, 64, 128] strides = [8, 16, 32] ratios = np.array([0.5, 1, 2, 3], K.floatx()) scales = np.array([1, 1.2, 1.6], K.floatx()) anchor_params = utils.AnchorParameters(sizes, strides, ratios, scales) pyramid_levels = [3, 4, 5] image_shape = tensor_shape.TensorShape((64, 64)) all_anchors = utils.anchors_for_shape(image_shape, pyramid_levels=pyramid_levels, anchor_params=anchor_params) self.assertTupleEqual(all_anchors.shape, (1008, 4)) self.assertEqual(anchor_params.num_anchors(), 12)
def test_anchors_for_shape_values(self): sizes = [12] strides = [8] ratios = np.array([1, 2], K.floatx()) scales = np.array([1, 2], K.floatx()) anchor_params = utils.AnchorParameters(sizes, strides, ratios, scales) pyramid_levels = [3] image_shape = (16, 16) all_anchors = utils.anchors_for_shape(image_shape, pyramid_levels=pyramid_levels, anchor_params=anchor_params) # using almost_equal for floating point imprecisions self.assertAllClose(all_anchors[0, :], [ strides[0] / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2, strides[0] / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2, strides[0] / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2, strides[0] / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2, ]) self.assertAllClose(all_anchors[1, :], [ strides[0] / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2, strides[0] / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2, strides[0] / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2, strides[0] / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2, ]) self.assertAllClose(all_anchors[2, :], [ strides[0] / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2, strides[0] / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2, strides[0] / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2, strides[0] / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2, ]) self.assertAllClose(all_anchors[3, :], [ strides[0] / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2, strides[0] / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2, strides[0] / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2, strides[0] / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2, ]) self.assertAllClose(all_anchors[4, :], [ strides[0] * 3 / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2, strides[0] / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2, strides[0] / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2, ]) self.assertAllClose(all_anchors[5, :], [ strides[0] * 3 / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2, strides[0] / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2, strides[0] / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2, ]) self.assertAllClose(all_anchors[6, :], [ strides[0] * 3 / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2, strides[0] / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2, strides[0] / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2, ]) self.assertAllClose(all_anchors[7, :], [ strides[0] * 3 / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2, strides[0] / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2, strides[0] / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2, ]) self.assertAllClose(all_anchors[8, :], [ strides[0] / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2, strides[0] * 3 / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2, strides[0] / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2, ]) self.assertAllClose(all_anchors[9, :], [ strides[0] / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2, strides[0] * 3 / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2, strides[0] / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2, ]) self.assertAllClose(all_anchors[10, :], [ strides[0] / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2, strides[0] * 3 / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2, strides[0] / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2, ]) self.assertAllClose(all_anchors[11, :], [ strides[0] / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2, strides[0] * 3 / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2, strides[0] / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2, ]) self.assertAllClose(all_anchors[12, :], [ strides[0] * 3 / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2, strides[0] * 3 / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2, ]) self.assertAllClose(all_anchors[13, :], [ strides[0] * 3 / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2, strides[0] * 3 / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2, ]) self.assertAllClose(all_anchors[14, :], [ strides[0] * 3 / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2, strides[0] * 3 / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2, ]) self.assertAllClose(all_anchors[15, :], [ strides[0] * 3 / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2, strides[0] * 3 / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2, ])
def _get_batches_of_transformed_samples(self, index_array): if self.data_format == 'channels_first': batch_x = np.zeros( (len(index_array), self.x.shape[1], self.frames_per_batch, self.x.shape[3], self.x.shape[4])) else: batch_x = np.zeros( tuple([len(index_array), self.frames_per_batch] + list(self.x.shape)[2:])) if self.panoptic: if self.data_format == 'channels_first': batch_y_semantic_list = [ np.zeros( tuple([ len(index_array), y_semantic.shape[1], self.frames_per_batch, y_semantic.shape[3], y_semantic.shape[4] ])) for y_semantic in self.y_semantic_list ] else: batch_y_semantic_list = [ np.zeros( tuple([len(index_array), self.frames_per_batch] + list(y_semantic.shape[2:]))) for y_semantic in self.y_semantic_list ] annotations_list = [[] for _ in range(self.frames_per_batch)] max_shape = [] for i, j in enumerate(index_array): last_frame = self.x.shape[self.time_axis] - self.frames_per_batch time_start = np.random.randint(0, high=last_frame) time_end = time_start + self.frames_per_batch times = list(np.arange(time_start, time_end)) if self.time_axis == 1: x = self.x[j, time_start:time_end, ...] y = self.y[j, time_start:time_end, ...] elif self.time_axis == 2: x = self.x[j, :, time_start:time_end, ...] y = self.y[j, :, time_start:time_end, ...] if self.panoptic: if self.time_axis == 1: y_semantic_list = [ y_semantic[j, time_start:time_end, ...] for y_semantic in self.y_semantic_list ] elif self.time_axis == 2: y_semantic_list = [ y_semantic[j, :, time_start:time_end, ...] for y_semantic in self.y_semantic_list ] # Apply transformation if self.panoptic: x, y_list = self.movie_data_generator.random_transform( x, [y] + y_semantic_list) y = y_list[0] y_semantic_list = y_list[1:] else: x, y = self.movie_data_generator.random_transform(x, y) x = self.movie_data_generator.standardize(x) # Find max shape of image data. Used for masking. if not max_shape: max_shape = list(x.shape) else: for k in range(len(x.shape)): if x.shape[k] > max_shape[k]: max_shape[k] = x.shape[k] # Get the bounding boxes from the transformed masks! for idx_time, time in enumerate(times): if self.time_axis == 1: annotations = self.load_annotations(y[idx_time]) elif self.time_axis == 2: annotations = self.load_annotations(y[:, idx_time, ...]) annotations_list[idx_time].append(annotations) batch_x[i] = x if self.panoptic: for k in range(len(y_semantic_list)): batch_y_semantic_list[k][i] = y_semantic_list[k] if self.data_format == 'channels_first': batch_x_shape = [ batch_x.shape[1], batch_x.shape[3], batch_x.shape[4] ] else: batch_x_shape = batch_x.shape[2:] anchors = anchors_for_shape(batch_x_shape, pyramid_levels=self.pyramid_levels, anchor_params=self.anchor_params, shapes_callback=self.compute_shapes) regressions_list = [] labels_list = [] if self.data_format == 'channels_first': batch_x_frame = batch_x[:, :, 0, ...] else: batch_x_frame = batch_x[:, 0, ...] for idx, time in enumerate(times): regressions, labels = anchor_targets_bbox(anchors, batch_x_frame, annotations_list[idx], self.num_classes) regressions_list.append(regressions) labels_list.append(labels) regressions = np.stack(regressions_list, axis=self.time_axis) labels = np.stack(labels_list, axis=self.time_axis) # was a list for max shape indexing max_shape = tuple( [max_shape[self.row_axis - 1], max_shape[self.col_axis - 1]]) if self.include_masks: # masks_batch has shape: (batch size, max_annotations, # bbox_x1 + bbox_y1 + bbox_x2 + bbox_y2 + label + # width + height + max_image_dimension) flatten = lambda l: [item for sublist in l for item in sublist] annotations_list_flatten = flatten(annotations_list) max_annotations = max( len(a['masks']) for a in annotations_list_flatten) masks_batch_shape = (len(index_array), self.frames_per_batch, max_annotations, 5 + 2 + max_shape[0] * max_shape[1]) masks_batch = np.zeros(masks_batch_shape, dtype=K.floatx()) for idx_time, time in enumerate(times): annotations_frame = annotations_list[idx_time] for idx_batch, ann in enumerate(annotations_frame): masks_batch[ idx_batch, idx_time, :ann['bboxes'].shape[0], :4] = ann['bboxes'] masks_batch[idx_batch, idx_time, :ann['labels'].shape[0], 4] = ann['labels'] masks_batch[idx_batch, idx_time, :, 5] = max_shape[1] # width masks_batch[idx_batch, idx_time, :, 6] = max_shape[0] # height # add flattened mask for idx_mask, mask in enumerate(ann['masks']): masks_batch[idx_batch, idx_time, idx_mask, 7:] = mask.flatten() if self.save_to_dir: for i, j in enumerate(index_array): for frame in range(batch_x.shape[self.time_axis]): if self.time_axis == 2: img = array_to_img(batch_x[i, :, frame], self.data_format, scale=True) else: img = array_to_img(batch_x[i, frame], self.data_format, scale=True) fname = '{prefix}_{index}_{hash}.{format}'.format( prefix=self.save_prefix, index=j, hash=np.random.randint(1e4), format=self.save_format) img.save(os.path.join(self.save_to_dir, fname)) batch_outputs = [regressions, labels] if self.include_masks: batch_outputs.append(masks_batch) if self.include_final_detection_layer: batch_outputs.append(masks_batch) if self.panoptic: batch_outputs += batch_y_semantic_list return batch_x, batch_outputs
def _get_batches_of_transformed_samples(self, index_array): batch_x = np.zeros(tuple([len(index_array)] + list(self.x.shape)[1:])) batch_y_semantic_list = [] for y_sem in self.y_semantic_list: shape = tuple([len(index_array)] + list(y_sem.shape[1:])) batch_y_semantic_list.append(np.zeros(shape, dtype=y_sem.dtype)) annotations_list = [] max_shape = [] for i, j in enumerate(index_array): x = self.x[j] y = self.y[j] y_semantic_list = [y_sem[j] for y_sem in self.y_semantic_list] # Apply transformation x, y_list = self.image_data_generator.random_transform( x, [y] + y_semantic_list) y = y_list[0] y_semantic_list = y_list[1:] # Find max shape of image data. Used for masking. if not max_shape: max_shape = list(x.shape) else: for k in range(len(x.shape)): if x.shape[k] > max_shape[k]: max_shape[k] = x.shape[k] # Get the bounding boxes from the transformed masks! annotations = self.load_annotations(y) annotations_list.append(annotations) x = self.image_data_generator.standardize(x) batch_x[i] = x for k, y_sem in enumerate(y_semantic_list): batch_y_semantic_list[k][i] = y_sem anchors = anchors_for_shape(batch_x.shape[1:], pyramid_levels=self.pyramid_levels, anchor_params=self.anchor_params, shapes_callback=self.compute_shapes) regressions, labels = anchor_targets_bbox(anchors, batch_x, annotations_list, self.num_classes) max_shape = tuple(max_shape) # was a list for max shape indexing if self.include_masks: # masks_batch has shape: (batch size, max_annotations, # bbox_x1 + bbox_y1 + bbox_x2 + bbox_y2 + label + # width + height + max_image_dimension) max_annotations = max(len(a['masks']) for a in annotations_list) masks_batch_shape = (len(index_array), max_annotations, 5 + 2 + max_shape[0] * max_shape[1]) masks_batch = np.zeros(masks_batch_shape, dtype=K.floatx()) for i, ann in enumerate(annotations_list): masks_batch[i, :ann['bboxes'].shape[0], :4] = ann['bboxes'] masks_batch[i, :ann['labels'].shape[0], 4] = ann['labels'] masks_batch[i, :, 5] = max_shape[1] # width masks_batch[i, :, 6] = max_shape[0] # height # add flattened mask for j, mask in enumerate(ann['masks']): masks_batch[i, j, 7:] = mask.flatten() if self.save_to_dir: for i, j in enumerate(index_array): if self.data_format == 'channels_first': img_x = np.expand_dims(batch_x[i, 0, ...], 0) else: img_x = np.expand_dims(batch_x[i, ..., 0], -1) img = array_to_img(img_x, self.data_format, scale=True) fname = '{prefix}_{index}_{hash}.{format}'.format( prefix=self.save_prefix, index=j, hash=np.random.randint(1e4), format=self.save_format) img.save(os.path.join(self.save_to_dir, fname)) batch_outputs = [regressions, labels] if self.include_masks: batch_outputs.append(masks_batch) batch_outputs.extend(batch_y_semantic_list) return batch_x, batch_outputs