def random_flip(img, bboxes, ratio=0.5): if random.random() > ratio or len(bboxes) == 0: return img, bboxes img = mmcv.imflip(img, direction='horizontal') bboxes = bbox_flip(bboxes, img.shape, 'horizontal') return img, bboxes
def random_vflip(img, bboxes, ratio=0.5): if random.random() > ratio or len(bboxes) == 0: return img, bboxes img = mmcv.imflip(img, direction="vertical") bboxes = bbox_flip(bboxes, img.shape, 'vertical') return img, bboxes
def __call__(self, results): img_group = results['img_group'] img_h, img_w = img_group[0].shape[:2] crop_w, crop_h = self.crop_size offsets = MultiScaleCrop.fill_fix_offset(False, img_w, img_h, crop_w, crop_h) oversample_group = list() for o_w, o_h in offsets: normal_group = list() flip_group = list() for i, img in enumerate(img_group): crop = mmcv.imcrop( img, np.array([o_w, o_h, o_w + crop_w - 1, o_h + crop_h - 1])) normal_group.append(crop) flip_crop = mmcv.imflip(crop) if results['modality'] == 'Flow' and i % 2 == 0: flip_group.append(mmcv.iminvert(flip_crop)) else: flip_group.append(flip_crop) oversample_group.extend(normal_group) oversample_group.extend(flip_group) results['img_group'] = oversample_group results['crop_bbox'] = None results['img_shape'] = results['img_group'][0].shape return results
def __call__(self, results): els = (['ref_semantic_seg', 'gt_semantic_seg'] if 'ref_semantic_seg' in results else ['gt_semantic_seg']) for el in els: if results['keep_ratio']: gt_seg = mmcv.imrescale(results[el], results['scale'], interpolation='nearest') else: gt_seg = mmcv.imresize(results[el], results['scale'], interpolation='nearest') if results['flip']: gt_seg = mmcv.imflip(gt_seg).copy() if 'crop_coords' in results: crds = results['crop_coords'] gt_seg = gt_seg[crds[0]:crds[1], crds[2]:crds[3]] if gt_seg.shape != results['pad_shape'][:2]: # raise ValueError('gt_seg shape does not match with pad_shape') gt_seg = mmcv.impad(gt_seg, results['pad_shape'][:2]) if self.scale_factor != 1: gt_seg = mmcv.imrescale(gt_seg, self.scale_factor, interpolation='nearest') results[el] = gt_seg if self.another_scale is not None: gt_seg_Nx = mmcv.imrescale(gt_seg, self.another_scale, interpolation='nearest') results[el + '_Nx'] = gt_seg_Nx return results
def __call__(self, img, scale, flip=False, keep_ratio=True): if keep_ratio: img, scale_factor = mmcv.imrescale(img, scale, return_scale=True) else: # rescale the img and store the scale_factor. # use np.array to store the scales. img, w_scale, h_scale = mmcv.imresize(img, scale, return_scale=True) scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) # img_shape is the shape of rescaled img. img_shape = img.shape img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) if flip: img = mmcv.imflip(img) if self.size_divisor is not None: # pad the img. img = mmcv.impad_to_multiple(img, self.size_divisor) pad_shape = img.shape else: pad_shape = img_shape # here to change the order of channels img = img.transpose(2, 0, 1) return img, img_shape, pad_shape, scale_factor
def __call__(self, img, scale, flip=False, keep_ratio=True, device='cuda:0'): if keep_ratio: img, scale_factor = mmcv.imrescale(img, scale, return_scale=True) else: img, w_scale, h_scale = mmcv.imresize(img, scale, return_scale=True) scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) img_shape = img.shape img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) if flip: img = mmcv.imflip(img) if self.size_divisor is not None: img = mmcv.impad_to_multiple(img, self.size_divisor) pad_shape = img.shape else: pad_shape = img_shape img = img.transpose(2, 0, 1) img = torch.from_numpy(img).to(device).unsqueeze(0) return img, img_shape, pad_shape, scale_factor
def img_loading_func(path, do_augment=False): mean=[104, 117, 128] std=[1, 1, 1] path = str(path) if path.endswith('png'): img = cv2.imread(path) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = img.astype(np.float32) elif path.endswith('npy'): img = np.load(path) img = img.astype(np.float32) scale = (np.Inf, 256) img_group = [img] tuple_list = [mmcv.imrescale( img, scale, return_scale=True) for img in img_group] img_group, scale_factors = list(zip(*tuple_list)) scale_factor = scale_factors[0] op_crop = GroupCenterCrop(224) img_group, crop_quadruple = op_crop(img_group, is_flow=False) img_shape = img_group[0].shape if do_augment and np.random.rand() < 0.5: img_group = [mmcv.imflip(img) for img in img_group] img_group = [ mmcv.imnormalize(img, mean, std, to_rgb=False) for img in img_group ] img_group = [img.transpose(2, 0, 1) for img in img_group] return img_group[0]
def __call__(self, img, scale, flip=False, keep_ratio=True): """Call function of ImageTransform. :param img: input image :type img: numpy or tensor :param scale: a random scaler :type scale: float :param flip: wheather flip or not, defaults to False :type flip: bool, optional :param keep_ratio: whether to keep the aspect ratio or not, defaults to True :type keep_ratio: bool, optional :return: the image after transform and other paras :rtype: list """ if keep_ratio: img, scale_factor = mmcv.imrescale(img, scale, return_scale=True) else: img, w_scale, h_scale = mmcv.imresize(img, scale, return_scale=True) scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) img_shape = img.shape img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) if flip: img = mmcv.imflip(img) if self.size_divisor is not None: img = mmcv.impad_to_multiple(img, self.size_divisor) pad_shape = img.shape else: pad_shape = img_shape img = img.transpose(2, 0, 1) return img, img_shape, pad_shape, scale_factor
def __call__(self, img_group, is_flow=False): image_h = img_group[0].shape[0] image_w = img_group[0].shape[1] crop_w, crop_h = self.crop_size # print(image_h,image_w) # print(crop_h, crop_w) offsets = GroupMultiScaleCrop.fill_fix_offset(False, image_w, image_h, crop_w, crop_h) oversample_group = list() for o_w, o_h in offsets: normal_group = list() flip_group = list() for i, img in enumerate(img_group): crop = mmcv.imcrop( img, np.array([o_w, o_h, o_w + crop_w - 1, o_h + crop_h - 1])) normal_group.append(crop) flip_crop = mmcv.imflip(crop) if is_flow and i % 2 == 0: flip_group.append(mmcv.iminvert(flip_crop)) else: flip_group.append(flip_crop) oversample_group.extend(normal_group) oversample_group.extend(flip_group) return oversample_group, None
def __call__(self, img_group, scale, crop_history=None, flip=False, keep_ratio=True, div_255=False, is_flow=False): # 1. rescale if keep_ratio: tuple_list = [mmcv.imrescale( img, scale, return_scale=True) for img in img_group] img_group, scale_factors = list(zip(*tuple_list)) scale_factor = scale_factors[0] else: tuple_list = [mmcv.imresize( img, scale, return_scale=True) for img in img_group] img_group, w_scales, h_scales = list(zip(*tuple_list)) scale_factor = np.array([w_scales[0], h_scales[0], w_scales[0], h_scales[0]], dtype=np.float32) # 2. crop (if necessary) if crop_history is not None: self.op_crop = GroupCrop(crop_history) if self.op_crop is not None: img_group, crop_quadruple = self.op_crop( img_group, is_flow=is_flow) else: crop_quadruple = None img_shape = img_group[0].shape # 3. flip if flip: img_group = [mmcv.imflip(img) for img in img_group] if is_flow: for i in range(0, len(img_group), 2): img_group[i] = mmcv.iminvert(img_group[i]) # 4a. div_255 if div_255: img_group = [mmcv.imnormalize(img, 0, 255, False) for img in img_group] # 4. normalize img_group = [mmcv.imnormalize( img, self.mean, self.std, self.to_rgb) for img in img_group] # 5. pad if self.size_divisor is not None: img_group = [mmcv.impad_to_multiple( img, self.size_divisor) for img in img_group] pad_shape = img_group[0].shape else: pad_shape = img_shape if is_flow: assert len(img_group[0].shape) == 2 img_group = [np.stack((flow_x, flow_y), axis=2) for flow_x, flow_y in zip( img_group[0::2], img_group[1::2])] # 6. transpose img_group = [img.transpose(2, 0, 1) for img in img_group] # Stack into numpy.array img_group = np.stack(img_group, axis=0) return img_group, img_shape, pad_shape, scale_factor, crop_quadruple
def test_imflip(self): # direction must be "horizontal" or "vertical" or "diagonal" with pytest.raises(AssertionError): mmcv.imflip(np.random.rand(80, 60, 3), direction='random') # test horizontal flip (color image) img = np.random.rand(80, 60, 3) h, w, c = img.shape flipped_img = mmcv.imflip(img) assert flipped_img.shape == img.shape for i in range(h): for j in range(w): for k in range(c): assert flipped_img[i, j, k] == img[i, w - 1 - j, k] # test vertical flip (color image) flipped_img = mmcv.imflip(img, direction='vertical') assert flipped_img.shape == img.shape for i in range(h): for j in range(w): for k in range(c): assert flipped_img[i, j, k] == img[h - 1 - i, j, k] # test diagonal flip (color image) flipped_img = mmcv.imflip(img, direction='diagonal') assert flipped_img.shape == img.shape for i in range(h): for j in range(w): for k in range(c): assert flipped_img[i, j, k] == img[h - 1 - i, w - 1 - j, k] # test horizontal flip (grayscale image) img = np.random.rand(80, 60) h, w = img.shape flipped_img = mmcv.imflip(img) assert flipped_img.shape == img.shape for i in range(h): for j in range(w): assert flipped_img[i, j] == img[i, w - 1 - j] # test vertical flip (grayscale image) flipped_img = mmcv.imflip(img, direction='vertical') assert flipped_img.shape == img.shape for i in range(h): for j in range(w): assert flipped_img[i, j] == img[h - 1 - i, j] # test diagonal flip (grayscale image) flipped_img = mmcv.imflip(img, direction='diagonal') assert flipped_img.shape == img.shape for i in range(h): for j in range(w): assert flipped_img[i, j] == img[h - 1 - i, w - 1 - j]
def __call__(self, results): if 'flip' not in results: flip = True if np.random.rand() < self.flip_ratio else False results['flip'] = flip if results['flip']: seed = np.random.randint(0, 2) # flip image if seed == 0: results['img'] = mmcv.imflip(results['img'], 'horizontal') else: results['img'] = mmcv.imflip(results['img'], 'vertical') # flip bboxes for key in results.get('bbox_fields', []): results[key] = self.bbox_flip(results[key], results['img_shape'], seed) # flip masks for key in results.get('mask_fields', []): results[key] = [mask[:, ::-1] for mask in results[key]] return results
def __call__(self, results): if 'flip' not in results: flip = True if np.random.rand() < self.flip_ratio else False results['flip'] = flip if results['flip']: # flip image results['img'] = [ mmcv.imflip(results['img'][0]), mmcv.imflip(results['img'][1]) ] # results['template_img'] = mmcv.imflip(results['template_img']) # flip bboxes for key in results.get('bbox_fields', []): results[key] = self.bbox_flip(results[key], results['img_shape']) # flip masks for key in results.get('mask_fields', []): results[key] = [mask[:, ::-1] for mask in results[key]] return results
def __call__(self, img, scale, flip=False, keep_ratio=True, crop=False): if crop: h, w, c = img.shape nh = int(h * scale) nw = int(w * scale) img = mmcv.imresize(img, (nw, nh)) h, w, c = img.shape inp_h = h | 127 inp_w = w | 127 center = np.array([h // 2, w // 2]) if flip: img = mmcv.imflip(img) img, border, offset = crop_image(img, center, [inp_h, inp_w]) img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) img = img.transpose(2, 0, 1) return img, border, offset '''if keep_ratio: img, scale_factor = mmcv.imrescale(img, scale, return_scale=True) else: img, w_scale, h_scale = mmcv.imresize( img, scale, return_scale=True) scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) img_shape = img.shape''' #img = mmcv.imnormalize(img, np.array((0, 0, 0), dtype=np.float32), np.array((1.0/float(self.pixel_scale), 1.0/float(self.pixel_scale), 1.0/float(self.pixel_scale)), dtype=np.float32), False) #img = img * float(self.pixel_scale) h, w, _ = img.shape img = mmcv.imresize(img, (511, 511)) ratio = 511.0 / float(h) img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) if flip: img = mmcv.imflip(img) '''if self.size_divisor is not None: img = mmcv.impad_to_multiple(img, self.size_divisor) pad_shape = img.shape else: pad_shape = img_shape''' img = img.transpose(2, 0, 1) #return img, (511, 511, 3), ratio#, pad_shape, scale_factor return img, (511, 511, 3), None, ratio
def __call__(self, img, scale, flip=False, keep_ratio=True): if keep_ratio: img = mmcv.imrescale(img, scale, interpolation='nearest') else: img = mmcv.imresize(img, scale, interpolation='nearest') if flip: img = mmcv.imflip(img) if self.size_divisor is not None: img = mmcv.impad_to_multiple(img, self.size_divisor) return img
def random_flip(self, img, boxes): # if np.random.rand() < 0.5: if np.random.randint(2): img = mmcv.imflip(img) w = img.shape[1] flipped = boxes.copy() flipped[..., 0] = w - boxes[..., 2] - 1 flipped[..., 2] = w - boxes[..., 0] - 1 boxes = flipped return img, boxes
def flip_column(img, img_shape, gt_bboxes, gt_label, gt_num): """flip operation for image""" img_data = img img_data = mmcv.imflip(img_data) flipped = gt_bboxes.copy() _, w, _ = img_data.shape flipped[..., 0::4] = w - gt_bboxes[..., 2::4] - 1 flipped[..., 2::4] = w - gt_bboxes[..., 0::4] - 1 return (img_data, img_shape, flipped, gt_label, gt_num)
def __call__(self, results): if 'vflip' not in results: flip = True if np.random.rand() < self.flip_ratio else False results['vflip'] = flip if results['vflip']: # flip image results['img'] = mmcv.imflip(results['img'], direction="vertical") # flip bboxes for key in results.get('bbox_fields', []): results[key] = self.bbox_flip(results[key], results['img_shape']) return results
def __call__(self, img, scale, flip=False, pad_val=(0, 0, 0), keep_ratio=True): """ :param img: :param scale: (w, h) :param flip: :param pad_val: :param keep_ratio: :return: """ # 1. rescale/resize the image to expected size if keep_ratio: # Resize image while keeping the aspect ratio. # The image will be rescaled as large as possible within the scale. img, scale_factor = imrescale( img=img, scale=scale, return_scale=True, interpolation='bilinear', ) else: # Resize image to a given size ignoring the aspect ratio. img, w_scale, h_scale = mmcv.imresize( img=img, size=scale, return_scale=True, interpolation='bilinear', ) scale_factor = np.array( [w_scale, h_scale, w_scale, h_scale], dtype=np.float32 ) # 2. normalize the image img_shape = img.shape img = imnormalize(img, self.mean, self.std, self.to_rgb) # 3. flip the image (if needed) if flip: img = mmcv.imflip(img) # 4. pad the image to the exact scale value if img_shape != scale: img = mmcv.impad(img=img, shape=scale if isinstance(scale, (int, float)) else (scale[1], scale[0]), pad_val=pad_val) pad_shape = img.shape else: pad_shape = img_shape # 5. transpose to (c, h, w) img = img.transpose(2, 0, 1) return img, img_shape, pad_shape, scale_factor
def flip(self, flip_direction='horizontal'): """See :func:`BaseInstanceMasks.flip`.""" assert flip_direction in ('horizontal', 'vertical', 'diagonal') if len(self.masks) == 0: flipped_masks = self.masks else: flipped_masks = np.stack([ mmcv.imflip(mask, direction=flip_direction) for mask in self.masks ]) return BitmapMasks(flipped_masks, self.height, self.width)
def __call__(self, results): img_group = results['img_group'] img_h, img_w = img_group[0].shape[:2] crop_w, crop_h = self.crop_size # assert crop_h == img_h or crop_w == img_w if crop_h == img_h: w_step = (img_w - crop_w) // 2 offsets = [ (0, 0), # left (2 * w_step, 0), # right (w_step, 0), # middle ] elif crop_w == img_w: h_step = (img_h - crop_h) // 2 offsets = [ (0, 0), # top (0, 2 * h_step), # down (0, h_step), # middle ] else: w_step = (img_w - crop_w) // 4 h_step = (img_h - crop_h) // 4 offsets = list() offsets.append((0 * w_step, 2 * h_step)) # left offsets.append((4 * w_step, 2 * h_step)) # right offsets.append((2 * w_step, 2 * h_step)) # center oversample_group = list() for o_w, o_h in offsets: normal_group = list() flip_group = list() for i, img in enumerate(img_group): crop = mmcv.imcrop( img, np.array([o_w, o_h, o_w + crop_w - 1, o_h + crop_h - 1])) normal_group.append(crop) flip_crop = mmcv.imflip(crop) if results['modality'] == 'Flow' and i % 2 == 0: flip_group.append(mmcv.iminvert(flip_crop)) else: flip_group.append(flip_crop) oversample_group.extend(normal_group) results['img_group'] = oversample_group results['crop_bbox'] = None results['img_shape'] = results['img_group'][0].shape return results
def flip_column(img, img_shape, gt_bboxes, gt_label, gt_num, gt_mask): """flip operation for image""" img_data = img img_data = mmcv.imflip(img_data) flipped = gt_bboxes.copy() _, w, _ = img_data.shape flipped[..., 0::4] = w - gt_bboxes[..., 2::4] - 1 # x1 = W-x2-1 flipped[..., 2::4] = w - gt_bboxes[..., 0::4] - 1 # x2 = W-x1-1 gt_mask_data = np.array([mask[:, ::-1] for mask in gt_mask]) return (img_data, img_shape, flipped, gt_label, gt_num, gt_mask_data)
def __call__(self, img, scale, flip=False): img, scale_factor = mmcv.imrescale(img, scale, return_scale=True) img_shape = img.shape img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) if flip: img = mmcv.imflip(img) if self.size_divisor is not None: img = mmcv.impad_to_multiple(img, self.size_divisor) pad_shape = img.shape else: pad_shape = img_shape img = img.transpose(2, 0, 1) return img, img_shape, pad_shape, scale_factor
def __call__(self, results): if 'flip' not in results: flip = True if np.random.rand() < self.flip_ratio else False results['flip'] = flip if 'flip_direction' not in results: results['flip_direction'] = self.direction if results['flip']: # flip image results['img'] = mmcv.imflip( results['img'], direction=results['flip_direction']) # flip bboxes for key in results.get('bbox_fields', []): results[key] = self.bbox_flip(results[key], results['img_shape'], results['flip_direction']) # flip masks for key in results.get('mask_fields', []): results[key] = [ mmcv.imflip(mask, direction=results['flip_direction']) for mask in results[key] ] return results
def random_flip_data_3d(self, input_dict, direction='horizontal'): """Flip 3D data randomly. Args: input_dict (dict): Result dict from loading pipeline. direction (str): Flip direction. Default: horizontal. Returns: dict: Flipped results, 'points', 'bbox3d_fields' keys are \ updated in the result dict. """ assert direction in ['horizontal', 'vertical'] if len(input_dict['bbox3d_fields']) == 0: # test mode input_dict['bbox3d_fields'].append('empty_box3d') input_dict['empty_box3d'] = input_dict['box_type_3d'](np.array( [], dtype=np.float32)) assert len(input_dict['bbox3d_fields']) == 1 for key in input_dict['bbox3d_fields']: if 'points' in input_dict: input_dict['points'] = input_dict[key].flip( direction, points=input_dict['points']) else: input_dict[key].flip(direction) if 'centers2d' in input_dict: assert self.sync_2d is True and direction == 'horizontal', \ 'Only support sync_2d=True and horizontal flip with images' w = input_dict['img_shape'][1] input_dict['centers2d'][..., 0] = \ w - input_dict['centers2d'][..., 0] if 'bev_seg_image' in input_dict: assert input_dict['bev_seg_image'] is not None, \ 'bev seg image is None, please check segimage path' if direction == 'horizontal': seg_img = mmcv.imflip(input_dict['bev_seg_image'], 'vertical') else: seg_img = mmcv.imflip(input_dict['bev_seg_image'], 'horizontal') input_dict['bev_seg_image'] = seg_img
def __call__(self, results): img_group = results['img_group'] flip = True if np.random.rand() < self.flip_ratio else False if flip: img_group = [mmcv.imflip(img, self.direction) for img in img_group] if results['modality'] == 'Flow': for i in range(0, len(img_group), 2): img_group[i] = mmcv.iminvert(img_group[i]) results['flip'] = flip results['flip_direction'] = self.direction results['img_group'] = img_group return results
def __call__(self, results): """Call function to flip bounding boxes, masks, semantic segmentation maps. Args: results (dict): Result dict from loading pipeline. Returns: dict: Flipped results, 'flip', 'flip_direction' keys are added into result dict. """ if 'flip' not in results: flip = True if np.random.rand() < self.flip_ratio else False results['flip'] = flip if 'flip_direction' not in results: results['flip_direction'] = self.direction if results['flip']: # flip image for key in results.get('img_fields', ['img']): results[key] = mmcv.imflip(results[key], direction=results['flip_direction']) # flip bboxes for key in results.get('bbox_fields', []): results[key] = self.bbox_flip(results[key], results['img_shape'], results['flip_direction']) # flip masks for key in results.get('mask_fields', []): results[key] = results[key].flip(results['flip_direction']) # flip segs for key in results.get('seg_fields', []): results[key] = mmcv.imflip(results[key], direction=results['flip_direction']) return results
def _prepare_mixup_image(self, idx): image = mmcv.imread(self.mixup_image_paths[idx].path) rescaled_image = mmcv.imrescale(image, self.img_scale) cropped_images, _ = self.mixup_op_crop([rescaled_image]) cropped_image = cropped_images[0] if np.random.randint(2): cropped_image = mmcv.imflip(cropped_image) out_images = self.convert_color([cropped_image], self.to_grayscale, self.to_rgb) out_image = out_images[0] return out_image.astype(np.float32)
def display_shifted_masks(shifted_masks, img_meta=None): n, h, w = shifted_masks.size() for i in range(n): if img_meta is not None: path = ''.join(['results/results_1227_1/embedding_map/', str(img_meta['video_id']), '_', str(img_meta['frame_id']), '_', str(i), '_shifted_masks.png']) else: path = 'results/results_1227_1/fea_ref/0_shifted_mask.png' shifted_masks = shifted_masks.gt(0.3).float() shifted_masks_numpy = shifted_masks[i].cpu().numpy() plt.axis('off') plt.pcolormesh(mmcv.imflip(shifted_masks_numpy*10, direction='vertical')) plt.savefig(path) plt.clf()
def __call__(self, results): if 'flip' not in results: flip = True if np.random.rand() < self.flip_ratio else False results['flip'] = flip if results['flip']: # flip image results['img'] = mmcv.imflip( results['img']) #default:horizontal np.flip(img, axis=1) # flip bboxes for key in results.get('bbox_fields', []): results[key] = self.bbox_flip(results[key], results['img_shape']) # flip masks for key in results.get('mask_fields', []): results[key] = [mask[:, ::-1] for mask in results[key]] return results