def prepare_image(self, image_id): """use config to processing coco image size and others, augment: (deprecated. Use augmentation instead). If true, apply random image augmentation. Currently, only horizontal flipping is offered. augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation. For example, passing imgaug.augmenters.Fliplr(0.5) flips images right/left 50% of the time. Returns: image: [height, width, 3] image_meta: the original shape of the image and resizing and cropping. class_ids: [instance_count] Integer class IDs bbox: [instance_count, (y1, x1, y2, x2)] mask: [height, width, instance_count]. The height and width are those of the image. gt_y: [instance_count] gt_x: [instance_count] vector_mask: [height, width, 2*class_num]. Set pixel relative center vector. """ # Load image and mask image = self.load_image(image_id=image_id) mask, class_ids = self.load_mask(image_id=image_id) original_shape = image.shape # print(original_shape) # print(type(original_shape)) image, window, scale, padding, crop = cocoutils.resize_image( image, min_dim=self.config.IMAGE_MIN_DIM, min_scale=self.config.IMAGE_MIN_SCALE, max_dim=self.config.IMAGE_MAX_DIM, mode=self.config.IMAGE_RESIZE_MODE) mask = cocoutils.resize_mask(mask, scale, padding, 0, crop) _idx = np.sum(mask, axis=(0, 1)) > 16 class_ids = class_ids[_idx] if len(class_ids) != 0: # print(class_ids) # [y, x, num_instance] mask = mask[:, :, _idx] # print(np.amax(mask, axis=(0, 1))) # Bounding boxes. Note that some boxes might be all zeros # if the corresponding mask got cropped out. # bbox: [num_instances, (y1, x1, y2, x2)] bbox = cocoutils.extract_bboxes(mask) gt_cy, gt_cx = cocoutils.gravity_center(mask) return image, class_ids, bbox, mask, gt_cy, gt_cx print("return nothing") return None
def generator(self, image_id): """Load and return ground truth data for an image (image, mask, bounding boxes). augment: (deprecated. Use augmentation instead). If true, apply random image augmentation. Currently, only horizontal flipping is offered. augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation. For example, passing imgaug.augmenters.Fliplr(0.5) flips images right/left 50% of the time. Returns: image: [height, width, 3] # shape: the original shape of the image before resizing and cropping. gt: { class_ids: [instance_count] Integer class IDs, bbox: [instance_count, (y1, x1, y2, x2)] } stride_mask: [height, width, class_num*2]. The height and width are 1/4 those of the image. """ # print("=========prepare for gt=========") gt = self.prepare_image(image_id, augment=self.augment, augmentation=self.augmentation) if gt is None: return None else: image, class_ids, bbox, gt_y, gt_x, gt_mask = gt floor_y = np.floor(gt_y / int(self.config.STRIDE)).astype(int) floor_x = np.floor(gt_x / int(self.config.STRIDE)).astype(int) mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) mean = np.reshape(mean, [1, 1, 3]) std = np.reshape(std, [1, 1, 3]) image = (image / 255. - mean) / std # print(class_ids) # bbox: [num_instances, (y1, x1, y2, x2)] gt_top = np.expand_dims((gt_y - bbox[..., 0]), axis=-1) gt_bot = np.expand_dims((bbox[..., 2] - gt_y), axis=-1) gt_left = np.expand_dims((gt_x - bbox[..., 1]), axis=-1) gt_right = np.expand_dims((bbox[..., 3] - gt_x), axis=-1) gt_y = np.expand_dims(gt_y, axis=-1) gt_x = np.expand_dims(gt_x, axis=-1) class_ids = np.expand_dims(class_ids, axis=-1) gt_basic = [ gt_y, gt_x, gt_top, gt_left, gt_bot, gt_right, class_ids ] gt = np.concatenate(gt_basic, axis=-1) instance_num = np.shape(gt)[0] # for normal distribution scalar scalar_y = [] scalar_x = [] # TODO: pshape without 56 for i in range(instance_num): vertical_ones = np.tile( [[1]], [self.config.IMAGE_MAX_DIM // int(self.config.STRIDE), 1]) # print(np.shape(vertical_ones)) horizontal_ones = tf.tile( [[1]], [1, self.config.IMAGE_MAX_DIM // int(self.config.STRIDE)]) pad_l = floor_x[i] pad_r = self.config.IMAGE_MAX_DIM // int( self.config.STRIDE) - floor_x[i] - 1 vertical_pads = np.pad( vertical_ones, [[0, 0], [pad_l, pad_r]], "constant", constant_values=(int(gt_left[i][0] / self.config.STRIDE), int(gt_right[i][0] / self.config.STRIDE))) vertical_pads = np.expand_dims(vertical_pads, -1) # print(np.shape(vertical_pads)) pad_t = floor_y[i] pad_b = self.config.IMAGE_MAX_DIM // int( self.config.STRIDE) - floor_y[i] - 1 horizontal_pads = np.pad( horizontal_ones, [[pad_t, pad_b], [0, 0]], "constant", constant_values=(int(gt_top[i][0] / self.config.STRIDE), int(gt_bot[i][0] / self.config.STRIDE))) horizontal_pads = np.expand_dims(horizontal_pads, -1) # print(np.shape(horizontal_pads)) scalar_y.append(horizontal_pads) scalar_x.append(vertical_pads) scalar_y = np.concatenate(scalar_y, axis=-1) # [y, x, num_g] scalar_x = np.concatenate(scalar_x, axis=-1) # [y, x, num_g] # print(np.shape(scalar_x)) padding = [(0, 0), (0, 0), (0, 0)] stride_mask = resize_mask(gt_mask, 1 / self.config.STRIDE, padding, 0) masks = stride_mask.astype(np.uint8).astype(np.float) gravity_y = gt_y[..., 0] / self.config.STRIDE # [gt_valid,1] gravity_x = gt_x[..., 0] / self.config.STRIDE gbbox_y1 = bbox[ ..., 0] / self.config.STRIDE # not a rect-box shape, is mask shape gbbox_x1 = bbox[..., 1] / self.config.STRIDE gbbox_y2 = bbox[..., 2] / self.config.STRIDE gbbox_x2 = bbox[..., 3] / self.config.STRIDE class_id = class_ids[..., 0] # for gravity center gravi_yx = gt[..., 0:2] / self.config.STRIDE gravi_yx_round_int = np.floor(gravi_yx).astype(int) gravity_y = np.reshape(gravity_y, [1, 1, -1]) gravity_x = np.reshape(gravity_x, [1, 1, -1]) # for mask part gravity_y_tile = np.reshape(gravity_y, [1, 1, -1]) gravity_x_tile = np.reshape(gravity_x, [1, 1, -1]) gbbox_y1 = np.reshape(gbbox_y1, [1, 1, -1]) gbbox_x1 = np.reshape(gbbox_x1, [1, 1, -1]) gbbox_y2 = np.reshape(gbbox_y2, [1, 1, -1]) gbbox_x2 = np.reshape(gbbox_x2, [1, 1, -1]) num_g = np.shape(gbbox_y1)[-1] h = np.arange(self.config.IMAGE_MAX_DIM / self.config.STRIDE).astype(np.float32) w = np.arange(self.config.IMAGE_MAX_DIM / self.config.STRIDE).astype(np.float32) # shape of coordinate equals [h_y_num, w_x_mun] [grid_x, grid_y] = tf.meshgrid(w, h) grid_y = np.expand_dims(grid_y, -1) grid_x = np.expand_dims(grid_x, -1) grid_y = np.tile(grid_y, [1, 1, num_g]) # (y, x, num_g) grid_x = np.tile(grid_x, [1, 1, num_g]) dist_l = grid_x - gbbox_x1 # (y, x, num_g) dist_r = gbbox_x2 - grid_x dist_t = grid_y - gbbox_y1 dist_b = gbbox_y2 - grid_y grid_y_mask = (dist_t > 0.).astype( np.float32) * (dist_b > 0.).astype(np.float32) grid_x_mask = (dist_l > 0.).astype( np.float32) * (dist_r > 0.).astype(np.float32) heatmask = grid_y_mask * grid_x_mask * masks # not a rect-box shape, is mask shape (y, x, num_g) dist_l *= heatmask # not a rect-box shape, is mask shape shape (y, x, num_g) dist_r *= heatmask dist_t *= heatmask dist_b *= heatmask loc = np.max(heatmask, axis=-1) # (y, x) objects mask dist_area = (dist_l + dist_r) * ( dist_t + dist_b ) # not a rect-box shape, is mask shape shape (y, x, num_g) dist_area_ = dist_area + (1. - heatmask) * 1e8 dist_area_min = np.min( dist_area_, axis=-1, keepdims=True) # small things on the top, background is 1e8 # not overlap things mask (y, x, num_g) dist_mask = np.equal(dist_area, dist_area_min).astype( np.float32) * np.expand_dims(loc, axis=-1) # gravity_y_tile = dist_mask * gravity_y_tile # not a rect-box shape, gravity center mask (y, x, num_g) # gravity_x_tile = dist_mask * gravity_x_tile # gbbox_y1 *= dist_mask # (y, x, num_g) # gbbox_x1 *= dist_mask # gbbox_y2 *= dist_mask # gbbox_x2 *= dist_mask dist_l *= dist_mask # valid dist l, r, t, b dist_r *= dist_mask dist_t *= dist_mask dist_b *= dist_mask dist_l = np.expand_dims(np.max(dist_l, axis=-1), -1) # not overlap 1 (y, x) dist_r = np.expand_dims(np.max(dist_r, axis=-1), -1) dist_t = np.expand_dims(np.max(dist_t, axis=-1), -1) dist_b = np.expand_dims(np.max(dist_b, axis=-1), -1) gt_reg = np.concatenate([dist_t, dist_l, dist_b, dist_r], axis=-1).astype(np.float32) # for normal distribution reduction = np.exp( -(((grid_y - gravity_y // 1) / np.sqrt(scalar_y + 1e-8))**2 + ((grid_x - gravity_x // 1) / np.sqrt(scalar_x + 1e-8))**2) / (2 * 1**2)) iou_reduction = np.max(reduction, axis=2) # [y, x, num_g] --> [y, x] iou_reduction = np.expand_dims(loc * iou_reduction, axis=-1).astype(np.float32) zero_like = np.zeros( (int(self.config.IMAGE_MAX_DIM / self.config.STRIDE), int(self.config.IMAGE_MAX_DIM / self.config.STRIDE), 1), np.float32) gt_keypoints = [] heatmap_gt = [] reduction_gt = [] for i in range(self.num_classes): # [num_g, 1] exist_i = np.equal(class_id - 1, i) # pass BG CLASS_ID: 0 gy = gravi_yx_round_int[..., 0][exist_i] gx = gravi_yx_round_int[..., 1][exist_i] num_i = np.sum(exist_i.astype(np.int32)) # [num_g_of_i, y, x] reduce_i = reduction[..., exist_i] heatmask_i = dist_mask[..., exist_i] gbbox_yx_i = gravi_yx_round_int[exist_i, ...] # [y, x, 1] heat_map for class i , if null class i, product zero_like_map if np.shape(reduce_i)[-1] == 0: reduce_i = zero_like heatmap_i = zero_like gt_keypoints_i = zero_like else: reduce_i = np.expand_dims(np.max(reduce_i, axis=2), axis=-1) heatmap_i = np.expand_dims(np.max(heatmask_i, axis=2), axis=-1) gt_keypoints_i = csr_matrix( (np.ones(num_i), (gy, gx)), shape=(int(self.config.IMAGE_MAX_DIM / self.config.STRIDE), int(self.config.IMAGE_MAX_DIM / self.config.STRIDE))).toarray() gt_keypoints_i = np.expand_dims(gt_keypoints_i, -1) reduction_gt.append(reduce_i) heatmap_gt.append(heatmap_i) gt_keypoints.append(gt_keypoints_i) reduction_gt = np.concatenate(reduction_gt, axis=-1).astype(np.float32) heatmap_gt = np.concatenate(heatmap_gt, axis=-1).astype(np.float32) gt_keypoints = np.concatenate(gt_keypoints, axis=-1).astype(np.float32) # bg_gt = np.expand_dims(1.0 - np.max(heatmap_gt, axis=2), axis=-1) # heatmap_gt = np.concatenate([heatmap_gt, bg_gt], axis=-1).astype(np.float32) # plt.imshow(image) # plt.show() # plt.imshow(iou_reduction[:, :, 0]) # plt.show() # plt.imshow(reduction_gt[:, :, 0]**2) # plt.show() # size_center = np.max(gt_keypoints, axis=2) # print(np.max(gt_keypoints)) # plt.imshow(size_center) # plt.show() # print(np.max(dist_r)) # plt.imshow(dist_l[:,:,0]) # plt.show() # plt.imshow(gt_keypoints[:, :, 0]) # plt.show() gt = np.concatenate([ gt_reg, iou_reduction, gt_keypoints, reduction_gt, heatmap_gt ], axis=-1) return image, gt
def prepare_image(self, image_id, augment=False, augmentation=None): """use config to processing coco image size and others, augment: (deprecated. Use augmentation instead). If true, apply random image augmentation. Currently, only horizontal flipping is offered. augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation. For example, passing imgaug.augmenters.Fliplr(0.5) flips images right/left 50% of the time. Returns: image: [height, width, 3] image_meta: the original shape of the image and resizing and cropping. class_ids: [instance_count] Integer class IDs bbox: [instance_count, (y1, x1, y2, x2)] mask: [height, width, instance_count]. The height and width are those of the image. gt_y: [instance_count] gt_x: [instance_count] vector_mask: [height, width, 2*class_num]. Set pixel relative center vector. """ # Load image and mask image = self.load_image(image_id) mask, class_ids = self.load_mask(image_id) # original_shape = image.shape # print(original_shape) # print(type(original_shape)) image, window, scale, padding, crop = cocoutils.resize_image( image, min_dim=self.config.IMAGE_MIN_DIM, min_scale=self.config.IMAGE_MIN_SCALE, max_dim=self.config.IMAGE_MAX_DIM, mode=self.config.IMAGE_RESIZE_MODE) mask = cocoutils.resize_mask(mask, scale, padding, 0, crop) # Random horizontal flips. # TODO: will be removed in a future update in favor of augmentation if self.augment: logging.warning( "'augment' is deprecated. Use 'augmentation' instead.") if random.randint(0, 1): image = np.fliplr(image) mask = np.fliplr(mask) # Augmentation # This requires the imgaug lib (https://github.com/aleju/imgaug) if self.augmentation: import imgaug # Augmenters that are safe to apply to masks # Some, such as Affine, have settings that make them unsafe, so always # test your augmentation on masks MASK_AUGMENTERS = [ "Sequential", "SomeOf", "OneOf", "Sometimes", "Fliplr", "Flipud", "CropAndPad", "Affine", "PiecewiseAffine" ] def hook(images, augmenter, parents, default): """Determines which augmenters to apply to masks.""" return augmenter.__class__.__name__ in MASK_AUGMENTERS # Store shapes before augmentation to compare image_shape = image.shape mask_shape = mask.shape # Make augmenters deterministic to apply similarly to images and masks det = augmentation.to_deterministic() image = det.augment_image(image) # Change mask to np.uint8 because imgaug doesn't support np.bool mask = det.augment_image(mask.astype(np.uint8), hooks=imgaug.HooksImages(activator=hook)) # Verify that shapes didn't change assert image.shape == image_shape, "Augmentation shouldn't change image size" assert mask.shape == mask_shape, "Augmentation shouldn't change mask size" # Change mask back to bool mask = mask.astype(np.bool) _idx = np.sum(mask, axis=(0, 1)) > 48 # print(_idx) class_ids = class_ids[_idx] if len(class_ids) != 0: # print(class_ids) # [y, x, num_instance] mask = mask[:, :, _idx] # print(np.amax(mask, axis=(0, 1))) # Bounding boxes. Note that some boxes might be all zeros # if the corresponding mask got cropped out. # bbox: [num_instances, (y1, x1, y2, x2)] bbox = cocoutils.extract_bboxes(mask) gt_cy, gt_cx = cocoutils.gravity_center(mask) # Image meta data # image_meta = cocoutils.compose_image_meta(image_id, original_shape, image.shape, window, scale) # vector_mask = self.vector_mask(self.num_classes, class_ids, mask, gt_cx, gt_cy, image.shape, bbox) return image, class_ids, bbox, gt_cy, gt_cx, mask return None
def load_image_gt(dataset, config, image_id, augment=False, augmentation=None, use_mini_mask=False): # 载入图片和语义分割效果 image = dataset.load_image(image_id) mask, class_ids = dataset.load_mask(image_id) # print("\nbefore:",image_id,np.shape(mask),np.shape(class_ids)) # 原始shape original_shape = image.shape # 获得新图片,原图片在新图片中的位置,变化的尺度,填充的情况等 image, window, scale, padding, crop = utils.resize_image( image, min_dim=config.IMAGE_MIN_DIM, min_scale=config.IMAGE_MIN_SCALE, max_dim=config.IMAGE_MAX_DIM, mode=config.IMAGE_RESIZE_MODE) mask = utils.resize_mask(mask, scale, padding, crop) # print("\nafter:",np.shape(mask),np.shape(class_ids)) # print(np.shape(image),np.shape(mask)) # 可以把图片进行翻转 if augment: logging.warning("'augment' is deprecated. Use 'augmentation' instead.") if random.randint(0, 1): image = np.fliplr(image) mask = np.fliplr(mask) if augmentation: import imgaug # 可用于图像增强 MASK_AUGMENTERS = [ "Sequential", "SomeOf", "OneOf", "Sometimes", "Fliplr", "Flipud", "CropAndPad", "Affine", "PiecewiseAffine" ] def hook(images, augmenter, parents, default): """Determines which augmenters to apply to masks.""" return augmenter.__class__.__name__ in MASK_AUGMENTERS image_shape = image.shape mask_shape = mask.shape det = augmentation.to_deterministic() image = det.augment_image(image) mask = det.augment_image(mask.astype(np.uint8), hooks=imgaug.HooksImages(activator=hook)) assert image.shape == image_shape, "Augmentation shouldn't change image size" assert mask.shape == mask_shape, "Augmentation shouldn't change mask size" mask = mask.astype(np.bool) # 检漏,防止某些层内部实际上不存在语义分割情况 _idx = np.sum(mask, axis=(0, 1)) > 0 # print("\nafterer:",np.shape(mask),np.shape(_idx)) mask = mask[:, :, _idx] class_ids = class_ids[_idx] # 找到mask对应的box bbox = utils.extract_bboxes(mask) active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32) source_class_ids = dataset.source_class_ids[dataset.image_info[image_id] ["source"]] active_class_ids[source_class_ids] = 1 if use_mini_mask: mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE) # 生成Image_meta image_meta = utils.compose_image_meta(image_id, original_shape, image.shape, window, scale, active_class_ids) return image, image_meta, class_ids, bbox, mask
final_masks = np.zeros([ int(Config.IMAGE_MAX_DIM // Config.STRIDE), int(Config.IMAGE_MAX_DIM // Config.STRIDE), num_select ], np.float32) for i in range(Config.NUM_CLASSES): exist_i = np.equal(select_class_id, i) # [0,1,...] exist_int = exist_i.astype(int) index = np.where(exist_int > 0)[0] # [a, b, 5, 8..] num_i = np.sum(exist_int) masks = ccc(Config, num_select, index, select_bbox, exist_i, class_seg[..., i], num_i, pic_preg) final_masks = final_masks + masks # TODO: resize masks padding = [(0, 0), (0, 0), (0, 0)] stride_mask = resize_mask(final_masks, 4, padding, 0) stride_mask = cv2.medianBlur(stride_mask, 5) masks = stride_mask.astype(np.uint8).astype(np.float) if len(np.shape(masks)) is 2: masks = np.expand_dims(masks, -1) visualize.display_instances(image, select_center * 4 + 2, select_bbox * 4, masks, select_class_id + 1, class_names, select_scores, show_mask=True)
def generator(self, image_id): """Load and return ground truth data for an image (image, mask, bounding boxes). augment: (deprecated. Use augmentation instead). If true, apply random image augmentation. Currently, only horizontal flipping is offered. augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation. For example, passing imgaug.augmenters.Fliplr(0.5) flips images right/left 50% of the time. Returns: image: [height, width, 3] # shape: the original shape of the image before resizing and cropping. gt: { class_ids: [instance_count] Integer class IDs, bbox: [instance_count, (y1, x1, y2, x2)] } stride_mask: [height, width, class_num*2]. The height and width are 1/4 those of the image. """ # print("=========prepare for gt=========") gt = self.prepare_image(image_id, augment=self.augment, augmentation=self.augmentation) if gt is None: return None else: image, class_ids, bbox, gt_y, gt_x, gt_mask = gt # print("gt_y: ", gt_y) # print("gt_x: ", gt_x) floor_y = np.floor(gt_y / 8).astype(int) floor_x = np.floor(gt_x / 8).astype(int) mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) mean = np.reshape(mean, [1, 1, 3]) std = np.reshape(std, [1, 1, 3]) image = (image / 255. - mean) / std # print(class_ids) # bbox: [num_instances, (y1, x1, y2, x2)] gt_top = np.expand_dims((gt_y - bbox[..., 0]), axis=-1) gt_bot = np.expand_dims((bbox[..., 2] - gt_y), axis=-1) gt_left = np.expand_dims((gt_x - bbox[..., 1]), axis=-1) gt_right = np.expand_dims((bbox[..., 3] - gt_x), axis=-1) gt_y = np.expand_dims(gt_y, axis=-1) gt_x = np.expand_dims(gt_x, axis=-1) class_ids = np.expand_dims(class_ids, axis=-1) gt_basic = [gt_y, gt_x, bbox, class_ids] # gt_basic = [gt_y, gt_x, gt_top, gt_left, gt_bot, gt_right, class_ids] gt = np.concatenate(gt_basic, axis=-1) instance_num = np.shape(gt)[0] # for normal distribution scalar scalar_y = [] scalar_x = [] # TODO: pshape without 56 for i in range(instance_num): vertical_ones = np.tile([[1]], [self.config.IMAGE_MAX_DIM // 8, 1]) # print(np.shape(vertical_ones)) horizontal_ones = tf.tile([[1]], [1, self.config.IMAGE_MAX_DIM // 8]) pad_l = floor_x[i] pad_r = self.config.IMAGE_MAX_DIM // 8 - floor_x[i] - 1 vertical_pads = np.pad(vertical_ones, [[0, 0], [pad_l, pad_r]], "constant", constant_values=(gt_left[i][0] // 8, gt_right[i][0] // 8)) vertical_pads = np.expand_dims(vertical_pads, -1) # print(np.shape(vertical_pads)) pad_t = floor_y[i] pad_b = self.config.IMAGE_MAX_DIM // 8 - floor_y[i] - 1 horizontal_pads = np.pad(horizontal_ones, [[pad_t, pad_b], [0, 0]], "constant", constant_values=(gt_top[i][0] // 8, gt_bot[i][0] // 8)) horizontal_pads = np.expand_dims(horizontal_pads, -1) # print(np.shape(horizontal_pads)) scalar_y.append(horizontal_pads) scalar_x.append(vertical_pads) scalar_y = np.concatenate(scalar_y, axis=-1) # [y, x, num_g] scalar_x = np.concatenate(scalar_x, axis=-1) # [y, x, num_g] # print(np.shape(scalar_x)) if instance_num <= self.config.MAX_GT_INSTANCES: gt = np.pad(gt, ((0, self.config.MAX_GT_INSTANCES - instance_num), (0, 0)), mode='constant') else: gt = gt[:self.config.MAX_GT_INSTANCES, ...] if instance_num <= self.config.MAX_GT_INSTANCES: masks = np.pad( gt_mask, ((0, 0), (0, 0), (0, self.config.MAX_GT_INSTANCES - instance_num)), mode='constant') scalar_y = np.pad( scalar_y, ((0, 0), (0, 0), (0, self.config.MAX_GT_INSTANCES - instance_num)), mode='constant') scalar_x = np.pad( scalar_x, ((0, 0), (0, 0), (0, self.config.MAX_GT_INSTANCES - instance_num)), mode='constant') else: masks = gt_mask[:, :, 0:self.config.MAX_GT_INSTANCES] scalar_y = scalar_y[:, :, 0:self.config.MAX_GT_INSTANCES] scalar_x = scalar_x[:, :, 0:self.config.MAX_GT_INSTANCES] padding = [(0, 0), (0, 0), (0, 0)] stride_mask = resize_mask(masks, 0.125, padding, 0) masks = stride_mask.astype(np.uint8).astype(np.float) # TODO: test and visual for img, mask and gaussion kernal # plt.imshow(image) # plt.show() # plt.imshow(masks[:, :, 0]) # plt.show() # gravity_y = np.reshape(gt_y // 4, [1, 1, instance_num]) # gravity_x = np.reshape(gt_x // 4, [1, 1, instance_num]) # h = np.arange(56) # w = np.arange(56) # [grid_x, grid_y] = np.meshgrid(w, h) # grid_x = np.expand_dims(grid_x, -1) # grid_y = np.expand_dims(grid_y, -1) # reduction = np.exp(-(((grid_y - gravity_y) / np.sqrt(scalar_y)) ** 2 + # ((grid_x - gravity_x) / np.sqrt(scalar_x)) ** 2) / (2 * 1 ** 2)) # reduction = np.amax(reduction, -1) # plt.imshow(reduction) # plt.show() return image, gt, masks, scalar_y, scalar_x
def parse_fn(image_id, dataset, anchors_path, augmentation=None, dtype=np.float32, max_num_boxes_per_image=20, image_size=416): """Load and return ground truth data for an image (image, mask, bounding boxes).""" image = dataset.load_image(image_id) # original_shape = image.shape image, window, scale, padding, crop = utils.resize_image( image, min_dim=0, min_scale=0, max_dim=image_size, mode='square') mask, class_ids = dataset.load_mask(image_id) mask = utils.resize_mask(mask, scale, padding, crop) if augmentation: import imgaug # Augmenters that are safe to apply to masks # Some, such as Affine, have settings that make them unsafe, so always # test your augmentation on masks MASK_AUGMENTERS = [ "Sequential", "SomeOf", "OneOf", "Sometimes", "Fliplr", "Flipud", "CropAndPad", "Affine", "PiecewiseAffine" ] def hook(images, augmenter, parents, default): """Determines which augmenters to apply to masks.""" return augmenter.__class__.__name__ in MASK_AUGMENTERS # Store shapes before augmentation to compare image_shape = image.shape mask_shape = mask.shape # Make augmenters deterministic to apply similarly to images and masks det = augmentation.to_deterministic() image = det.augment_image(image) # Change mask to np.uint8 because imgaug doesn't support np.bool mask = det.augment_image(mask.astype(np.uint8), hooks=imgaug.HooksImages(activator=hook)) # Verify that shapes didn't change assert image.shape == image_shape, "Augmentation shouldn't change image size" assert mask.shape == mask_shape, "Augmentation shouldn't change mask size" # Change mask back to bool mask = mask.astype(np.bool) # Note that some boxes might be all zeros if the corresponding mask got cropped out. # and here is to filter them out _idx = np.sum(mask, axis=(0, 1)) > 0 mask = mask[:, :, _idx] class_ids = class_ids[_idx] # Bounding boxes. Note that some boxes might be all zeros # if the corresponding mask got cropped out. # bbox: [num_instances, (y1, x1, y2, x2)] bbox = utils.extract_bboxes(mask) if mask.shape[-1] > max_num_boxes_per_image: ids = np.random.choice(np.arange(mask.shape[-1]), max_num_boxes_per_image, replace=False) class_ids = class_ids[ids] bbox = bbox[ids, :] # confs = np.ones((bbox.shape[0], 1), dtype=dtype) # bbox = np.concatenate([bbox, confs], axis=-1) # Active classes # Different datasets have different classes, so track the # classes supported in the dataset of this image. # active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32) # source_class_ids = dataset.source_class_ids[dataset.image_info[image_id]["source"]] # active_class_ids[source_class_ids] = 1 # image_meta = utils.compose_image_meta(image_id, original_shape, image.shape, # window, scale, active_class_ids) # image_meta.astype(dtype) # gt_mask = np.zeros((mask.shape[0], mask.shape[1], 20), mask.dtype) gt_class_ids = np.zeros(max_num_boxes_per_image, class_ids.dtype) gt_bbox = np.zeros((max_num_boxes_per_image, bbox.shape[1]), bbox.dtype) # gt_data = np.zeros((max_num_boxes_per_image, bbox.shape[1] + dataset.num_classes), dtype=dtype) if class_ids.shape[0] > 0: gt_class_ids[:class_ids.shape[0]] = class_ids # gt_mask[:, :, :mask.shape[-1]] = mask gt_bbox[:bbox.shape[0], :] = bbox gt_class_ids = np.expand_dims(gt_class_ids, axis=-1).astype(dtype) gt_bbox = np.concatenate([gt_bbox, gt_class_ids], axis=-1) anchors = utils.get_anchors(anchors_path) anchors = np.array(anchors, dtype=np.float32) boxes_yx = (gt_bbox[:, 0:2] + gt_bbox[:, 2:4]) // 2 boxes_hw = gt_bbox[:, 2:4] - gt_bbox[:, 0:2] gt_bbox[:, 0] = boxes_yx[..., 1] / image_size gt_bbox[:, 1] = boxes_yx[..., 0] / image_size gt_bbox[:, 2] = boxes_hw[..., 1] / image_size gt_bbox[:, 3] = boxes_hw[..., 0] / image_size hw = np.expand_dims(boxes_hw, -2) anchors_broad = np.expand_dims(anchors, 0) anchor_maxes = anchors_broad / 2. anchor_mins = -anchor_maxes box_maxes = hw / 2. box_mins = -box_maxes intersect_mins = np.maximum(box_mins, anchor_mins) intersect_maxes = np.minimum(box_maxes, anchor_maxes) intersect_hw = np.maximum(intersect_maxes - intersect_mins, 0.) intersect_area = intersect_hw[..., 0] * intersect_hw[..., 1] box_area = hw[..., 0] * hw[..., 1] anchor_area = anchors[..., 0] * anchors[..., 1] iou = intersect_area / (box_area + anchor_area - intersect_area) best_anchors = np.argmax(iou, axis=-1) # TODO: write a function to calculate the stride automatically. large_obj_image_size = image_size // 32 medium_obj_image_size = image_size // 16 small_obj_image_size = image_size // 8 large_obj_detectors, large_obj_boxes = get_detector_heatmap_each_scale( gt_bbox, best_anchors_=best_anchors, anchors_mask=[6, 7, 8], grid_size=(large_obj_image_size, large_obj_image_size), num_classes=dataset.num_classes) medium_obj_detectors, medium_obj_boxes = get_detector_heatmap_each_scale( gt_bbox, best_anchors_=best_anchors, anchors_mask=[3, 4, 5], grid_size=(medium_obj_image_size, medium_obj_image_size), num_classes=dataset.num_classes) small_obj_detectors, small_obj_boxes = get_detector_heatmap_each_scale( gt_bbox, best_anchors_=best_anchors, anchors_mask=[0, 1, 2], grid_size=(small_obj_image_size, small_obj_image_size), num_classes=dataset.num_classes) yolo_true_data = np.concatenate( [large_obj_detectors, medium_obj_detectors, small_obj_detectors], axis=0).reshape([-1]) yolo_true_boxes = np.concatenate( [large_obj_boxes, medium_obj_boxes, small_obj_boxes], axis=0).reshape([-1]) yolo_gt = np.concatenate([yolo_true_data, yolo_true_boxes], axis=-1) return image.astype(dtype) / 255., yolo_gt.astype(dtype)
final_masks = np.zeros([ int(Config.IMAGE_MAX_DIM // Config.STRIDE), int(Config.IMAGE_MAX_DIM // Config.STRIDE), num_select ], np.float32) for i in range(Config.NUM_CLASSES): exist_i = np.equal(select_class_id, i) # [0,1,...] exist_int = exist_i.astype(int) index = np.where(exist_int > 0)[0] # [a, b, 5, 8..] num_i = np.sum(exist_int) masks = ccc(Config, num_select, index, select_bbox, exist_i, class_seg[..., i], num_i, pic_preg) final_masks = final_masks + masks # TODO: resize masks padding = [(0, 0), (0, 0), (0, 0)] stride_mask = resize_mask(final_masks, Config.STRIDE, padding, 0) stride_mask = cv2.medianBlur(stride_mask, 5) masks = stride_mask.astype(np.uint8).astype(np.float) if len(np.shape(masks)) is 2: masks = np.expand_dims(masks, -1) class_names = {0: "bg", 1: 'person', 2: "car"} visualize.display_instances(image, select_center * 4 + 2, select_bbox * 4, masks, select_class_id + 1, class_names, select_scores, show_mask=True)
def load_image_gt(dataset, config, image_id, augment=False, use_mini_mask=False): """Load and return ground truth data for an image (image, mask, bounding boxes). augment: If true, apply random image augmentation. Currently, only horizontal flipping is offered. use_mini_mask: If False, returns full-size masks that are the same height and width as the original image. These can be big, for example 1024x1024x100 (for 100 instances). Mini masks are smaller, typically, 224x224 and are generated by extracting the bounding box of the object and resizing it to MINI_MASK_SHAPE. Returns: image: [height, width, 3] shape: the original shape of the image before resizing and cropping. class_ids: [instance_count] Integer class IDs bbox: [instance_count, (y1, x1, y2, x2)] mask: [height, width, instance_count]. The height and width are those of the image unless use_mini_mask is True, in which case they are defined in MINI_MASK_SHAPE. """ # Load image and mask image = dataset.load_image(image_id) mask, class_ids = dataset.load_mask(image_id) shape = image.shape image, window, scale, padding = utils.resize_image( image, min_dim=config.TRAIN.IMAGE_MIN_DIM, max_dim=config.TRAIN.IMAGE_MAX_DIM, padding=config.TRAIN.IMAGE_PADDING) mask = utils.resize_mask(mask, scale, padding) # Random horizontal flips. if augment: if random.randint(0, 1): image = np.fliplr(image) mask = np.fliplr(mask) # Bounding boxes. Note that some boxes might be all zeros # if the corresponding mask got cropped out. # bbox: [num_instances, (y1, x1, y2, x2)] bbox = utils.extract_bboxes(mask) # Active classes # Different datasets have different classes, so track the # classes supported in the dataset of this image. active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32) source_class_ids = dataset.source_class_ids[dataset.image_info[image_id] ["source"]] active_class_ids[source_class_ids] = 1 # Resize masks to smaller size to reduce memory usage if use_mini_mask: mask = utils.minimize_mask(bbox, mask, config.MRCNN.MINI_MASK_SHAPE) # Image meta data image_meta = compose_image_meta(image_id, shape, window, active_class_ids) return image, image_meta, class_ids, bbox, mask
def load_image_gt(dataset, config, image_id, augment=False, use_mini_mask=False): # Load image and mask image_name = dataset.image_info[str(image_id)]['image_name'] # print(image_name) image = dataset.load_image(image_id) shape = image.shape image, window, scale, padding = resize_image( image, min_dim=config.IMAGE_MIN_DIM, max_dim=config.IMAGE_MAX_DIM, padding=config.IMAGE_PADDING) image_meta = compose_image_meta(image_id, shape, window) thing_mask, thing_class_ids, stuff_mask, stuff_class_ids, influence_mask, influence_class_ids = dataset.load_mask(image_id) thing_mask = resize_mask(thing_mask, scale, padding) # 1024 stuff_mask = resize_mask(stuff_mask, scale, padding) # 1024 influence_mask = resize_mask(influence_mask, scale, padding) # 1024 influence_mask = resize_map(influence_mask, 1 / 8) # 1024 -> 128 # Resize masks to smaller size to reduce memory usage thing_bbox = extract_bboxes(thing_mask) stuff_bbox = extract_bboxes(stuff_mask) influence_bbox = extract_bboxes(influence_mask) if use_mini_mask: thing_mask = minimize_mask( thing_bbox, thing_mask, config.MINI_MASK_SHAPE) stuff_mask = minimize_mask( stuff_bbox, stuff_mask, config.MINI_MASK_SHAPE) segmentation = skimage.io.imread(os.path.join(dataset.annotation_dir, image_name.replace("jpg", "png"))) semantic_label = np.zeros_like(segmentation) segmentation_instance_id_map=rgb2id(segmentation) instance_id_list=list(dataset.image_info[str(image_id)]['instances'].keys()) for instance_id in instance_id_list: instance=dataset.image_info[str(image_id)]['instances'][instance_id] instance_mask=segmentation_instance_id_map==int(instance_id) semantic_label[instance_mask]=dataset.category_info[str(instance['category_id'])]['class_id'] semantic_label=semantic_label[:,:,0] semantic_label_h = semantic_label.shape[0] semantic_label_w = semantic_label.shape[1] semantic_label_scale = min(500 / semantic_label_h, 500 / semantic_label_w) semantic_label = scipy.misc.imresize(semantic_label, (round(semantic_label_h * semantic_label_scale), round(semantic_label_w * semantic_label_scale)), interp="nearest") h, w = semantic_label.shape[:2] top_pad = (500 - h) // 2 bottom_pad = 500 - h - top_pad left_pad = (500 - w) // 2 right_pad = 500 - w - left_pad padding = [(top_pad, bottom_pad), (left_pad, right_pad)] semantic_label = np.pad(semantic_label, padding, mode='constant', constant_values=0) image_info = dataset.image_info[str(image_id)] # Random horizontal flips. if augment: if random.randint(0, 1): image = np.fliplr(image) thing_mask = np.fliplr(thing_mask) semantic_label = np.fliplr(semantic_label) segmentation = np.fliplr(segmentation) return image, image_meta, thing_class_ids, thing_bbox, thing_mask, stuff_class_ids, stuff_bbox, stuff_mask, \ semantic_label, segmentation, image_info, influence_class_ids, influence_bbox, influence_mask