def __call__(self, in_data): img, bbox, label = in_data # 1. Color augumentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand( img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox( bbox, y_offset=param["y_offset"], x_offset=param["x_offset"]) # 3. Random cropping img, param = random_crop_with_bbox_constraints( img, bbox, return_param=True) bbox, param = transforms.crop_bbox( bbox, y_slice=param["y_slice"], x_slice=param["x_slice"], allow_outside_center=False, return_param=True) label = label[param["index"]] # 4. Resizing with random interpolation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Transformation for SSD network input img -= self.mean mb_loc, mb_lab = self.coder.encode(bbox, label) return img, mb_loc, mb_lab
def test_translate_bbox(self): bbox = np.random.uniform( low=0., high=32., size=(10, 4)) out = translate_bbox(bbox, y_offset=5, x_offset=3) bbox_expected = np.empty_like(bbox) bbox_expected[:, 0] = bbox[:, 0] + 5 bbox_expected[:, 1] = bbox[:, 1] + 3 bbox_expected[:, 2] = bbox[:, 2] + 5 bbox_expected[:, 3] = bbox[:, 3] + 3 np.testing.assert_equal(out, bbox_expected)
def __call__(self, in_data): # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping img, bbox, label = in_data # 1. Color augmentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand(img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # 3. Random cropping img, param = random_crop_with_bbox_constraints(img, bbox, return_param=True) bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. Resizing with random interpolatation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), x_flip=params['x_flip']) # Preparation for SSD network img -= self.mean mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def __call__(self, in_data): # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping img, bbox, label = in_data bbox = np.array(bbox).astype(np.float32) if len(bbox) == 0: warnings.warn("No bounding box detected", RuntimeWarning) img = resize_with_random_interpolation(img, (self.size, self.size)) mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label # 1. Color augmentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand( img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox( bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # 3. Random cropping img, param = random_crop_with_bbox_constraints( img, bbox, return_param=True) bbox, param = transforms.crop_bbox( bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. Resizing with random interpolatation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (self.size, self.size), x_flip=params['x_flip']) mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def __call__(self, in_data): # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping img, bbox, label = in_data # 1. Color augmentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand( img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox( bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # 3. Random cropping img, param = random_crop_with_bbox_constraints( img, bbox, return_param=True) bbox, param = transforms.crop_bbox( bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. Resizing with random interpolatation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (self.size, self.size), x_flip=params['x_flip']) # Preparation for SSD network img -= self.mean mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def __call__(self, in_data): img, bbox, label = in_data img = random_distort(img) if np.random.randint(2): img, param = transforms.random_expand(img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) img, param = random_crop_with_bbox_constraints(img, bbox, return_param=True) bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), x_flip=params['x_flip']) img -= self.mean mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def __call__(self, in_data): img, bbox, label = in_data # 1. Color augumentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand(img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param["y_offset"], x_offset=param["x_offset"]) # 3. Random cropping img, param = random_crop_with_bbox_constraints(img, bbox, return_param=True) bbox, param = transforms.crop_bbox(bbox, y_slice=param["y_slice"], x_slice=param["x_slice"], allow_outside_center=False, return_param=True) label = label[param["index"]] # 4. Resizing with random interpolation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Transformation for SSD network input img -= self.mean mb_loc, mb_lab = self.coder.encode(bbox, label) return img, mb_loc, mb_lab
def predict(self, imgs): """Detect objects from images. This method predicts objects for each image. Args: imgs (iterable of numpy.ndarray): Arrays holding images. All images are in CHW and RGB format and the range of their value is :math:`[0, 255]`. Returns: tuple of lists: This method returns a tuple of three lists, :obj:`(bboxes, labels, scores)`. * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \ where :math:`R` is the number of bounding boxes in a image. \ Each bouding box is organized by \ :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \ in the second axis. * **labels** : A list of integer arrays of shape :math:`(R,)`. \ Each value indicates the class of the bounding box. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : A list of float arrays of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ x = [] params = [] for img in imgs: _, H, W = img.shape img, param = transforms.resize_contain(img / 255, (self.insize, self.insize), fill=0.5, return_param=True) x.append(self.xp.array(img)) param['size'] = (H, W) params.append(param) with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): locs, objs, confs = self(self.xp.stack(x)) locs = locs.array objs = objs.array confs = confs.array bboxes = [] labels = [] scores = [] for loc, obj, conf, param in zip(locs, objs, confs, params): bbox, label, score = self._decode(loc, obj, conf) bbox = cuda.to_cpu(bbox) label = cuda.to_cpu(label) score = cuda.to_cpu(score) bbox = transforms.translate_bbox(bbox, -self.insize / 2, -self.insize / 2) bbox = transforms.resize_bbox(bbox, param['scaled_size'], param['size']) bbox = transforms.translate_bbox(bbox, param['size'][0] / 2, param['size'][1] / 2) bboxes.append(bbox) labels.append(label) scores.append(score) return bboxes, labels, scores
def __call__(self, in_data): """in_data includes three datas. Args: img(array): Shape is (3, H, W). range is [0, 255]. bbox(array): Shape is (N, 4). (y_min, x_min, y_max, x_max). range is [0, max size of boxes]. label(array): Classes of bounding boxes. Returns: img(array): Shape is (3, out_H, out_W). range is [0, 1]. interpolation value equals to self.value. """ # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping if self.count % 10 == 0 and self.count % self.batchsize == 0 and self.count != 0: self.i += 1 i = self.i % len(self.dim) self.output_shape = (self.dim[i], self.dim[i]) self.count += 1 img, bbox, label = in_data # 1. Color augmentation img = random_distort(img, brightness_delta=32, contrast_low=0.5, contrast_high=1.5, saturation_low=0.5, saturation_high=1.5, hue_delta=25) # Normalize. range is [0, 1] img /= 255.0 _, H, W = img.shape scale = np.random.uniform(0.25, 2) random_expand = np.random.uniform(0.8, 1.2, 2) net_h, net_w = self.output_shape out_h = net_h * scale # random_expand[0] out_w = net_w * scale # random_expand[1] if H > W: out_w = out_h * (float(W) / H) * np.random.uniform(0.8, 1.2) elif H < W: out_h = out_w * (float(H) / W) * np.random.uniform(0.8, 1.2) out_h = int(out_h) out_w = int(out_w) img = resize_with_random_interpolation(img, (out_h, out_w)) bbox = transforms.resize_bbox(bbox, (H, W), (out_h, out_w)) if out_h < net_h and out_w < net_w: img, param = expand(img, out_h=net_h, out_w=net_w, fill=self.value, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) else: out_h = net_h if net_h > out_h else int(out_h * 1.05) out_w = net_w if net_w > out_w else int(out_w * 1.05) img, param = expand(img, out_h=out_h, out_w=out_w, fill=self.value, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) img, param = crop_with_bbox_constraints(img, bbox, return_param=True, crop_height=net_h, crop_width=net_w) bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 5. Random horizontal flipping # OK img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, self.output_shape, x_flip=params['x_flip']) # Preparation for Yolov2 network. scale=[0, 1] bbox[:, ::2] /= self.output_shape[0] # y bbox[:, 1::2] /= self.output_shape[1] # x num_bbox = len(bbox) len_max = max(num_bbox, self.max_target) out_bbox = np.zeros((len_max, 4), dtype='f') out_bbox[:num_bbox] = bbox[:num_bbox] out_label = np.zeros((len_max), dtype='i') out_label[:num_bbox] = label out_bbox = out_bbox[:self.max_target] out_label = out_label[:self.max_target] num_array = min(num_bbox, self.max_target) gmap = create_map_anchor_gt(bbox, self.anchors, self.output_shape, self.downscale, self.n_boxes, len_max) gmap = gmap[:self.max_target] img = np.clip(img, 0, 1) return img, out_bbox, out_label, gmap, np.array([num_array], dtype='i')
def __call__(self, in_data): img, bbox, label = in_data _, H, W = img.shape # random brightness and contrast img = random_distort(img) # rotate image # return a tuple whose elements are rotated image, param. # k (int in param)represents the number of times the image is rotated by 90 degrees. img, params = transforms.random_rotate(img, return_param=True) # restore the new hight and width _, t_H, t_W = img.shape # rotate bbox based on renewed parameters bbox = rotate_bbox(bbox, (H, W), params['k']) # Random expansion:This method randomly place the input image on # a larger canvas. The size of the canvas is (rH,rW), r is a random ratio drawn from [1,max_ratio]. # The canvas is filled by a value fill except for the region where the original image is placed. if np.random.randint(2): fill_value = img.mean(axis=1).mean(axis=1).reshape(-1, 1, 1) img, param = transforms.random_expand(img, max_ratio=2, fill=fill_value, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # Random crop # crops the image with bounding box constraints img, param = random_crop_with_bbox_constraints(img, bbox, min_scale=0.5, max_aspect_ratio=1.5, return_param=True) # this translates bounding boxes to fit within the cropped area of an image, bounding boxes whose centers are outside of the cropped area are removed. bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) #assigning new labels to the bounding boxes after cropping label = label[param['index']] # if the bounding boxes are all removed, if bbox.shape[0] == 0: img, bbox, label = in_data # update the height and width of the image _, t_H, t_W = img.shape img = self.faster_rcnn.prepare(img) # prepares the image to match the size of the image to be input into the RCNN _, o_H, o_W = img.shape # resize the bounding box according to the image resize bbox = transforms.resize_bbox(bbox, (t_H, t_W), (o_H, o_W)) # horizontally & vertical flip # simutaneously flip horizontally and vertically of the image img, params = transforms.random_flip(img, x_random=True, y_random=True, return_param=True) # flip the bounding box with respect to the parameter bbox = transforms.flip_bbox(bbox, (o_H, o_W), x_flip=params['x_flip'], y_flip=params['y_flip']) scale = o_H / t_H return img, bbox, label, scale
def __call__(self, in_data): image, label, bbox, mask = in_data _, H, W = image.shape cell_size = 32 * self.downscale # Horizontal flip if self.options['x_flip']: if np.random.randint(2) == 0: image = transforms.flip(image, x_flip=True) bbox = transforms.flip_bbox(bbox, (H, W), x_flip=True) mask = transforms.flip(mask, x_flip=True) # Random rotation (90 or 270 degrees) if self.options['rotate90']: assert H == W, 'Height and width must match when `rotate90` is set.' if np.random.randint(2) == 0: # Rotate? if np.random.randint(2) == 0: # Counter-clockwise? bbox = rotate_bbox(bbox, 1, (H, W)) image = np.rot90(image, 1, axes=(1, 2)) mask = np.rot90(mask, 1, axes=(1, 2)) else: bbox = rotate_bbox(bbox, 3, (H, W)) image = np.rot90(image, 3, axes=(1, 2)) mask = np.rot90(mask, 3, axes=(1, 2)) _, H, W = image.shape # Zoom in / zoom out if self.options['zoom'] > 1: assert self.options[ 'scale'] <= 1.0, "`scale` shouldn't be set if `zoom` is set." max_log_zoom = np.log(self.options['zoom']) log_zoom = np.random.random() * 2 * max_log_zoom - max_log_zoom zoom = np.exp(log_zoom) if zoom > 1: # Zoom in y_size, x_size = int(H / zoom), int(W / zoom) y_offset = np.random.randint(H - y_size + 1) x_offset = np.random.randint(W - x_size + 1) y_slice = slice(y_offset, y_offset + y_size) x_slice = slice(x_offset, x_offset + x_size) bbox = transforms.crop_bbox(bbox, y_slice, x_slice) bbox *= zoom image = transforms.resize(image[:, y_slice, x_slice], (H, W)) mask = transforms.resize(mask[:, y_slice, x_slice], (H, W), interpolation=Image.NEAREST) elif zoom < 1: # Zoom out y_size, x_size = int(H / zoom), int(W / zoom) y_offset = np.random.randint(y_size - H + 1) x_offset = np.random.randint(x_size - W + 1) bbox = transforms.translate_bbox(bbox, y_offset, x_offset) new_image = np.zeros((1, y_size, x_size), dtype=np.float32) new_image[:, y_offset:y_offset + H, x_offset:x_offset + W] = image new_mask = np.zeros((1, y_size, x_size), dtype=np.float32) new_mask[:, y_offset:y_offset + H, x_offset:x_offset + W] = mask bbox *= zoom image = transforms.resize(new_image, (H, W)) mask = transforms.resize(new_mask, (H, W), interpolation=Image.NEAREST) # Random scale if self.options['scale'] > 1.0: assert self.options[ 'crop'], '`crop` must be set if `scale` is set.' max_log_scale = np.log(self.options['scale']) log_scale = np.random.random() * 2 * max_log_scale - max_log_scale scale = np.exp(log_scale) image = transforms.resize(image, (int(H * scale), int(W * scale))) mask = transforms.resize(mask, (int(H * scale), int(W * scale)), interpolation=Image.NEAREST) _, H, W = image.shape bbox *= scale # Random crop if self.options['crop']: y_margin = (H - 1) % cell_size + 1 x_margin = (W - 1) % cell_size + 1 y_offset = np.random.randint(y_margin) x_offset = np.random.randint(x_margin) y_size = H - y_margin x_size = W - x_margin y_slice = slice(y_offset, y_offset + y_size) x_slice = slice(x_offset, x_offset + x_size) image = image[:, y_slice, x_slice] bbox = transforms.crop_bbox(bbox, y_slice, x_slice) mask = mask[:, y_slice, x_slice] # Change window width if self.options['window_width'] > 1.0: image = (image - 128) * self.options['window_width'] + 128 # Change contrast if self.options['contrast']: image += np.random.randint(self.options['contrast'] * 2 + 1) - self.options['contrast'] image = np.clip(image, 0, 255) # save_data(image, bbox, mask) return image, label, bbox, mask
def __call__(self, in_data): # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping # mask = None img, bbox, label, mask = in_data # TODO: show information # self._show_img(img) # self._show_mask(mask) # 1. Color augmentation img = random_distort(img) # self._show_img(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand( img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox( bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) if mask is not None: _, new_height, new_width = img.shape param['new_height'] = new_height param['new_width'] = new_width mask = self._random_expand_mask(mask, param) # self._show_img(img) # self._show_mask(mask) # 3. Random cropping img, param = random_crop_with_bbox_constraints( img, bbox, return_param=True) # self._show_img(img) mask = self._fixed_crop_mask(mask, param['y_slice'], param['x_slice']) # self._show_mask(mask) bbox, param = transforms.crop_bbox( bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. Resizing with random interpolatation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) # self._show_img(img) if mask is not None: if mask.size == 0: raise RuntimeError mask = self._resize_with_nearest(mask, (self.size, self.size)) # self._show_mask(mask) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (self.size, self.size), x_flip=params['x_flip']) if mask is not None: mask = self._random_flip_mask(mask, x_flip=params['x_flip'], y_flip=params['y_flip']) # self._show_img(img) # self._show_mask(mask) # Preparation for SSD network img -= self.mean mb_loc, mb_label = self.coder.encode(bbox, label) if mask is None: mask = np.ones([self.size, self.size], dtype=np.int32) * -1 # print("Dtype is :"+str(mask.dtype)) data_type = str(mask.dtype) target_type = 'int32' if data_type != target_type: mask = mask.astype(np.int32) if img is None: raise RuntimeError return img, mb_loc, mb_label, mask