def __call__(self, in_data): img, bbox, label = in_data # 1. Color augumentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand( img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox( bbox, y_offset=param["y_offset"], x_offset=param["x_offset"]) # 3. Random cropping img, param = random_crop_with_bbox_constraints( img, bbox, return_param=True) bbox, param = transforms.crop_bbox( bbox, y_slice=param["y_slice"], x_slice=param["x_slice"], allow_outside_center=False, return_param=True) label = label[param["index"]] # 4. Resizing with random interpolation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Transformation for SSD network input img -= self.mean mb_loc, mb_lab = self.coder.encode(bbox, label) return img, mb_loc, mb_lab
def __call__(self, in_data): # There are five data augmentation steps # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping img, bbox, label = in_data # 3. Random cropping if self.random_crop and np.random.rand() > 0.5: next_img, param = random_crop_with_bbox_constraints( img, bbox, min_scale=min(self.crop_rate), max_scale=max(self.crop_rate), return_param=True) next_bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) if (len(label[param['index']]) != 0): label = label[param['index']] img, bbox = next_img, next_bbox # 4. Resizing with random interpolatation _, H, W = img.shape img = transforms.resize(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping if self.flip: img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), x_flip=params['x_flip']) img -= self.mean img /= self.std _, height, width = img.shape ymin = bbox[:, 0] xmin = bbox[:, 1] ymax = bbox[:, 2] xmax = bbox[:, 3] one_hot_label = np.eye(self.n_class)[label] xs = (xmin + (xmax - xmin) // 2) / width ws = (xmax - xmin) / width ys = (ymin + (ymax - ymin) // 2) / height hs = (ymax - ymin) / height t = [{ 'label': l, 'x': x, 'w': w, 'y': y, 'h': h, 'one_hot_label': hot } for l, x, w, y, h, hot in zip(label, xs, ws, ys, hs, one_hot_label)] return img, t
def test_random_crop_with_bbox_constraints(self): img = np.random.randint(0, 256, size=(3, 480, 640)).astype(np.float32) bbox = generate_random_bbox(10, img.shape[1:], 0.1, 0.9) out, param = random_crop_with_bbox_constraints(img, bbox, min_scale=0.3, max_scale=1, max_aspect_ratio=2, return_param=True) if param['constraint'] is None: np.testing.assert_equal(out, img) else: np.testing.assert_equal(out, img[:, param['y_slice'], param['x_slice']]) self.assertGreaterEqual(out.size, img.size * 0.3 * 0.3) self.assertLessEqual(out.size, img.size * 1 * 1) # to ignore rounding error, add 1 self.assertLessEqual(out.shape[1] / (out.shape[2] + 1), img.shape[1] / img.shape[2] * 2) self.assertLessEqual(out.shape[2] / (out.shape[1] + 1), img.shape[2] / img.shape[1] * 2) bb = np.array((param['y_slice'].start, param['x_slice'].start, param['y_slice'].stop, param['x_slice'].stop)) iou = bbox_iou(bb[np.newaxis], bbox) min_iou, max_iou = param['constraint'] if min_iou: self.assertGreaterEqual(iou.min(), min_iou) if max_iou: self.assertLessEqual(iou.max(), max_iou)
def __call__(self, in_data): # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping # 6. Random vertical flipping img, bbox, label = in_data # 1. Color augmentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand(img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # 3. Random cropping img, param = random_crop_with_bbox_constraints(img, bbox, return_param=True) bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. Resizing with random interpolatation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), x_flip=params['x_flip']) # 6. Random vertical flipping img, params = transforms.random_flip(img, y_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), y_flip=params['y_flip']) # Preparation for SSD network img -= self.mean mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def __call__(self, in_data): # 5段階のステップでデータの水増しを行う # 1. 色の拡張 # 2. ランダムな拡大 # 3. ランダムなトリミング # 4. ランダムな補完の再補正 # 5. ランダムな水平反転 img, bbox, label = in_data # 1. 色の拡張 # 明るさ,コントラスト,彩度,色相を組み合わせ,データ拡張をする img = random_distort(img) # 2. ランダムな拡大 if np.random.randint(2): # キャンバスの様々な座標に入力画像を置いて,様々な比率の画像を生成し,bounding boxを更新 img, param = transforms.random_expand(img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # 3. ランダムなトリミング img, param = random_crop_with_bbox_constraints(img, bbox, return_param=True) # トリミングされた画像内にbounding boxが入るように調整 bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. ランダムな補完の再補正 ## 画像とbounding boxのリサイズ _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. ランダムな水平反転 ## 画像とbounding boxをランダムに水平方向に反転 img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), x_flip=params['x_flip']) # SSDのネットワークに入力するための準備の処理 img -= self.mean ## SSDに入力するためのloc(デフォルトbounding boxのオフセットとスケール)と ## mb_label(クラスを表す配列)を出力 mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def __call__(self, in_data): # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping img, bbox, label = in_data bbox = np.array(bbox).astype(np.float32) if len(bbox) == 0: warnings.warn("No bounding box detected", RuntimeWarning) img = resize_with_random_interpolation(img, (self.size, self.size)) mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label # 1. Color augmentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand( img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox( bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # 3. Random cropping img, param = random_crop_with_bbox_constraints( img, bbox, return_param=True) bbox, param = transforms.crop_bbox( bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. Resizing with random interpolatation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (self.size, self.size), x_flip=params['x_flip']) mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def __call__(self, in_data): # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping img, bbox, label = in_data # 1. Color augmentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand( img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox( bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # 3. Random cropping img, param = random_crop_with_bbox_constraints( img, bbox, return_param=True) bbox, param = transforms.crop_bbox( bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. Resizing with random interpolatation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (self.size, self.size), x_flip=params['x_flip']) # Preparation for SSD network img -= self.mean mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def __call__(self, in_data): img, bbox, label = in_data img = random_distort(img) if np.random.randint(2): img, param = transforms.random_expand(img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) img, param = random_crop_with_bbox_constraints(img, bbox, return_param=True) bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), x_flip=params['x_flip']) img -= self.mean mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def test_random_crop_with_bbox_constraints(self): img = np.random.randint(0, 256, size=(3, 480, 640)).astype(np.float32) bbox = generate_random_bbox(10, img.shape[1:], 0.1, 0.9) out, param = random_crop_with_bbox_constraints( img, bbox, min_scale=0.3, max_scale=1, max_aspect_ratio=2, return_param=True) if param['constraint'] is None: np.testing.assert_equal(out, img) else: np.testing.assert_equal( out, img[:, param['y_slice'], param['x_slice']]) # to ignore rounding error, add 1 self.assertGreaterEqual( out.shape[0] * (out.shape[1] + 1) * (out.shape[2] + 1), img.size * 0.3 * 0.3) self.assertLessEqual(out.size, img.size * 1 * 1) self.assertLessEqual( out.shape[1] / (out.shape[2] + 1), img.shape[1] / img.shape[2] * 2) self.assertLessEqual( out.shape[2] / (out.shape[1] + 1), img.shape[2] / img.shape[1] * 2) bb = np.array(( param['y_slice'].start, param['x_slice'].start, param['y_slice'].stop, param['x_slice'].stop)) iou = bbox_iou(bb[np.newaxis], bbox) min_iou, max_iou = param['constraint'] if min_iou: self.assertGreaterEqual(iou.min(), min_iou) if max_iou: self.assertLessEqual(iou.max(), max_iou)
def __getitem__(self, index): img_path = self.img_files[index % len(self.img_files)].rstrip() img = np.array(Image.open(img_path)) h, w, _ = img.shape label_path = self.label_files[index % len(self.img_files)].rstrip() if not os.path.exists(label_path): raise Exception( "the label file(.txt) is not found corresponding + " + img_path) labels = np.loadtxt(label_path).reshape(-1, 5) #-------------------- # data augmentation #-------------------- lx = w * (labels[:, 1] - labels[:, 3] / 2) ly = h * (labels[:, 2] - labels[:, 4] / 2) bx = w * (labels[:, 1] + labels[:, 3] / 2) by = h * (labels[:, 2] + labels[:, 4] / 2) # convert to chainercv format: (ly, lx, by, bx) cv_bbox = np.stack([ly, lx, by, bx], axis=1) cv_labels = labels[:, 0].reshape(-1).astype(np.int) cv_img = img.transpose(2, 0, 1) # 1. Random distort cv_img = random_distort(cv_img) # 2. Random cropping cv_img, param = random_crop_with_bbox_constraints(cv_img, cv_bbox, min_scale=0.3, return_param=True) cv_bbox, param = transforms.crop_bbox(cv_bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) cv_labels = cv_labels[param['index']] # 3. Random horizontal flipping _, _h, _w = cv_img.shape cv_img, params = transforms.random_flip(cv_img, x_random=True, return_param=True) cv_bbox = transforms.flip_bbox(cv_bbox, (_h, _w), x_flip=params['x_flip']) # update params img = cv_img.transpose(1, 2, 0) h, w, _ = img.shape # convert to default format: (cx, cy, w, h) labels = np.zeros((cv_labels.size, 5)) labels[:, 0] = cv_labels labels[:, 1] = (cv_bbox[:, 3] + cv_bbox[:, 1]) / 2.0 / w # cx labels[:, 2] = (cv_bbox[:, 2] + cv_bbox[:, 0]) / 2.0 / h # cy labels[:, 3] = (cv_bbox[:, 3] - cv_bbox[:, 1]) / w # w labels[:, 4] = (cv_bbox[:, 2] - cv_bbox[:, 0]) / h # x #--------- # image #--------- dim_diff = np.abs(h - w) # Upper (left) and lower (right) padding pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2 # Determine padding pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0)) # Add padding input_img = np.pad(img, pad, 'constant', constant_values=128) / 255. padded_h, padded_w, _ = input_img.shape # Resize and normalize input_img = resize(input_img, (*self.img_shape, 3), mode='reflect') # Channels-first input_img = np.transpose(input_img, (2, 0, 1)) # As pytorch tensor input_img = torch.from_numpy(input_img).float() #--------- # Label #--------- # Extract coordinates for unpadded + unscaled image x1 = w * (labels[:, 1] - labels[:, 3] / 2) y1 = h * (labels[:, 2] - labels[:, 4] / 2) x2 = w * (labels[:, 1] + labels[:, 3] / 2) y2 = h * (labels[:, 2] + labels[:, 4] / 2) # Adjust for added padding x1 += pad[1][0] y1 += pad[0][0] x2 += pad[1][0] y2 += pad[0][0] # Calculate ratios from coordinates labels[:, 1] = ((x1 + x2) / 2) / padded_w labels[:, 2] = ((y1 + y2) / 2) / padded_h labels[:, 3] *= w / padded_w labels[:, 4] *= h / padded_h # Fill matrix filled_labels = np.zeros((self.max_objects, 5)) if labels is not None: filled_labels[range( len(labels))[:self.max_objects]] = labels[:self.max_objects] filled_labels = torch.from_numpy(filled_labels) return img_path, input_img, filled_labels
def __call__(self, in_data): # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping # mask = None img, bbox, label, mask = in_data # TODO: show information # self._show_img(img) # self._show_mask(mask) # 1. Color augmentation img = random_distort(img) # self._show_img(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand( img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox( bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) if mask is not None: _, new_height, new_width = img.shape param['new_height'] = new_height param['new_width'] = new_width mask = self._random_expand_mask(mask, param) # self._show_img(img) # self._show_mask(mask) # 3. Random cropping img, param = random_crop_with_bbox_constraints( img, bbox, return_param=True) # self._show_img(img) mask = self._fixed_crop_mask(mask, param['y_slice'], param['x_slice']) # self._show_mask(mask) bbox, param = transforms.crop_bbox( bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. Resizing with random interpolatation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) # self._show_img(img) if mask is not None: if mask.size == 0: raise RuntimeError mask = self._resize_with_nearest(mask, (self.size, self.size)) # self._show_mask(mask) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (self.size, self.size), x_flip=params['x_flip']) if mask is not None: mask = self._random_flip_mask(mask, x_flip=params['x_flip'], y_flip=params['y_flip']) # self._show_img(img) # self._show_mask(mask) # Preparation for SSD network img -= self.mean mb_loc, mb_label = self.coder.encode(bbox, label) if mask is None: mask = np.ones([self.size, self.size], dtype=np.int32) * -1 # print("Dtype is :"+str(mask.dtype)) data_type = str(mask.dtype) target_type = 'int32' if data_type != target_type: mask = mask.astype(np.int32) if img is None: raise RuntimeError return img, mb_loc, mb_label, mask