def __call__(self, img, scale, flip=False, keep_ratio=True): if keep_ratio: img, scale_factor = mmcv.imrescale(img, scale, return_scale=True, interpolation='nearest') else: img, w_scale, h_scale = mmcv.imresize( img, scale, return_scale=True) scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) img_shape = img.shape if flip: img = mmcv.imflip(img) if self.size_divisor is not None: img = mmcv.impad_to_multiple(img, self.size_divisor) pad_shape = img.shape else: pad_shape = img_shape img = img[np.newaxis,:,:] return img
def _resize_masks(self, results): for key in results.get('mask_fields', []): if results[key] is None: continue if self.keep_ratio: masks = [ mmcv.imrescale(mask, results['scale_factor'], interpolation='nearest') for mask in results[key] ] else: mask_size = (results['img_shape'][1], results['img_shape'][0]) masks = [ mmcv.imresize(mask, mask_size, interpolation='nearest') for mask in results[key] ] results[key] = np.stack(masks)
def _resize_img(self, results): els = ['ref_img', 'img'] if 'ref_img' in results else ['img'] for el in els: if self.keep_ratio: img, scale_factor = mmcv.imrescale(results[el], results['scale'], return_scale=True) else: img, w_scale, h_scale = mmcv.imresize(results[el], results['scale'], return_scale=True) scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) results[el] = img results['img_shape'] = img.shape results['pad_shape'] = img.shape # in case that there is no padding results['scale_factor'] = scale_factor results['keep_ratio'] = self.keep_ratio
def __call__(self, img, scale, flip=False, keep_ratio=True): # 1. rescale/resize the image to expected size if keep_ratio: # Resize image while keeping the aspect ratio. # The image will be rescaled as large as possible within the scale. img, scale_factor = mmcv.imrescale( img=img, scale=scale, return_scale=True, interpolation='bilinear', ) else: # Resize image to a given size ignoring the aspect ratio. img, w_scale, h_scale = mmcv.imresize( img=img, size=scale, return_scale=True, interpolation='bilinear', ) scale_factor = np.array( [w_scale, h_scale, w_scale, h_scale], dtype=np.float32 ) # 2. normalize the image img_shape = img.shape img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) # 3. flip the image (if needed) if flip: img = mmcv.imflip(img) # 4. pad the image if size_divisor is not None. # size_divisor=32 means sizes are multiplier of 32. if self.size_divisor is not None: img = mmcv.impad_to_multiple(img, self.size_divisor) pad_shape = img.shape else: pad_shape = img_shape # 5. transpose to (c, h, w) img = img.transpose(2, 0, 1) return img, img_shape, pad_shape, scale_factor
def __call__(self, img, scale, flip=False, crop_info=None, keep_ratio=True): # image jittering try: img = Image.fromarray(img) except: print(img) if hasattr(self, 'random_color'): img = self.random_color(img) if hasattr(self, 'random_contrast'): img = self.random_contrast(img) if hasattr(self, 'random_sharpness'): img = self.random_sharpness(img) img = np.array(img) if keep_ratio: img, scale_factor = mmcv.imrescale(img, scale, return_scale=True) else: img, w_scale, h_scale = mmcv.imresize(img, scale, return_scale=True) scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) img_shape = img.shape img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) if flip: img = mmcv.imflip(img) if crop_info is not None: # if crop, no need to pad cx1, cy1, cx2, cy2 = crop_info img = img[cy1:cy2, cx1:cx2] pad_shape = img.shape # pad and set pad_shape if crop_info is None and self.size_divisor is not None: img = mmcv.impad_to_multiple(img, self.size_divisor) pad_shape = img.shape else: pad_shape = img_shape img = img.transpose(2, 0, 1) return img, img_shape, pad_shape, scale_factor
def _resize_img(self, results): #print("Results",results) if self.keep_ratio: img, scale_factor = mmcv.imrescale(results['img'], results['scale'], return_scale=True) else: img, w_scale, h_scale = mmcv.imresize(results['img'], results['scale'], return_scale=True) scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) results['img'] = img #print("******************************************************",img.shape) #print("Scale Factor",scale_factor,results['scale']) results['img_shape'] = img.shape results['pad_shape'] = img.shape # in case that there is no padding results['scale_factor'] = scale_factor results['keep_ratio'] = self.keep_ratio
def __call__(self, masks, pad_shape, scale, flip=False, keep_ratio=True): if keep_ratio: masks = [ mmcv.imrescale(mask, scale, interpolation='nearest') for mask in masks ] else: masks = [ mmcv.imresize(mask, scale, interpolation='nearest') for mask in masks ] if flip: masks = [mask[:, ::-1] for mask in masks] padded_masks = [ mmcv.impad(mask, shape=pad_shape[:2], pad_val=0) for mask in masks ] padded_masks = np.stack(padded_masks, axis=0) return padded_masks
def __call__(self, img, scale, flip=False, keep_ratio=True): if keep_ratio: img, scale_factor = mmcv.imrescale(img, scale, return_scale=True)# scale 包括一个长边一个短边,顺序无所谓 else:# 做识别 不需要保持ratio不变,resize到 224 img, w_scale, h_scale = mmcv.imresize( img, scale, return_scale=True) scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) img_shape = img.shape img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) if flip: img = mmcv.imflip(img) if self.size_divisor is not None: img = mmcv.impad_to_multiple(img, self.size_divisor)# pad 边,为了每条边都能被divisor这个数字整除,比如 32 pad_shape = img.shape # pad后的图片形状 else: pad_shape = img_shape img = img.transpose(2, 0, 1) return img, img_shape, pad_shape, scale_factor
def __call__(self, img, scale, flip=False, keep_ratio=True): if keep_ratio: img, scale_factor = mmcv.imrescale(img, scale, return_scale=True) else: img, w_scale, h_scale = mmcv.imresize( img, scale, return_scale=True) scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) img_shape = img.shape img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) if flip: img = mmcv.imflip(img) if self.size_divisor is not None: img = mmcv.impad_to_multiple(img, self.size_divisor) pad_shape = img.shape else: pad_shape = img_shape img = img.transpose(2, 0, 1) return img, img_shape, pad_shape, scale_factor
def __call__(self, img, scale, flip=False, keep_ratio=True): if keep_ratio: img, scale_factor = mmcv.imrescale(img, scale, return_scale=True) else: img, w_scale, h_scale = mmcv.imresize( img, scale, return_scale=True) scale_factor = np.array( [w_scale, h_scale, w_scale, h_scale], dtype=np.float32) img_shape = img.shape img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) # (x - mean) / std , brg2rgb if flip: img = mmcv.imflip(img) # flip images if self.size_divisor is not None: img = mmcv.impad_to_multiple(img, self.size_divisor) # padding image to make sure divided by divisor. pad_shape = img.shape else: pad_shape = img_shape img = img.transpose(2, 0, 1) # HWC to CHW return img, img_shape, pad_shape, scale_factor
def _resize_img(self, results): """Resize images with ``results['scale']``.""" # print(self.dataset) if self.keep_ratio: if self.multi_scale_test or self.dataset == 'pascal_context': # print("multi_scale") min_short = { 'cityscapes': 768, 'ade20k': 512, 'pascal_context': 480 } if min(results['scale']) < min_short[self.dataset]: new_short = min_short[self.dataset] else: new_short = results['scale'][0] h, w = results['img'].shape[:2] if h > w: new_h, new_w = new_short * h / w, new_short else: new_h, new_w = new_short, new_short * w / h results['scale'] = (new_h, new_w) img, scale_factor = mmcv.imrescale(results['img'], results['scale'], return_scale=True) # the w_scale and h_scale has minor difference # a real fix should be done in the mmcv.imrescale in the future new_h, new_w = img.shape[:2] h, w = results['img'].shape[:2] w_scale = new_w / w h_scale = new_h / h else: img, w_scale, h_scale = mmcv.imresize(results['img'], results['scale'], return_scale=True) scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) results['img'] = img results['img_shape'] = img.shape results['pad_shape'] = img.shape # in case that there is no padding results['scale_factor'] = scale_factor results['keep_ratio'] = self.keep_ratio
def _resize_seg(self, results): """Resize semantic segmentation map with ``results['scale']``.""" for key in results.get('seg_fields', []): if self.keep_ratio: gt_seg = mmcv.imrescale(results[key], results['scale'], interpolation='nearest') gt_seg = self._align(gt_seg, self.size_divisor, interpolation='nearest') else: gt_seg = mmcv.imresize(results[key], results['scale'], interpolation='nearest') h, w = gt_seg.shape[:2] assert int(np.ceil(h / self.size_divisor)) * self.size_divisor == h and \ int(np.ceil(w / self.size_divisor)) * self.size_divisor == w, \ "gt_seg size not align. h:{} w:{}".format(h, w) results[key] = gt_seg
def _resize_img(self, results): ori_shape = results['ori_shape'] results['scale'] = self.ori_width_2_scale[ori_shape[1]] if self.keep_ratio: img, scale_factor = mmcv.imrescale(results['img'], results['scale'], return_scale=True) else: img, w_scale, h_scale = mmcv.imresize(results['img'], results['scale'], return_scale=True) scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) results['img'] = img results['img_shape'] = img.shape results['pad_shape'] = img.shape # in case that there is no padding results['scale_factor'] = scale_factor results['keep_ratio'] = self.keep_ratio
def _load_semantic_map_from_mask(self, results): gt_bboxes = results['gt_bboxes'] gt_masks = results['gt_masks'].masks gt_labels = results['gt_labels'] pad_shape = results['pad_shape'] gt_sem_map = np.zeros((int(pad_shape[0] / 8), int(pad_shape[1] / 8)), dtype=np.int64) for i in range(gt_bboxes.shape[0]): mask_rescale = mmcv.imrescale(gt_masks[i], 1. / 8, interpolation='nearest') gt_sem_map = np.maximum(gt_sem_map, mask_rescale * (gt_labels[i] + 1)) gt_sem_map = gt_sem_map - 1 gt_sem_map = gt_sem_map[None, ...] results['gt_sem_map'] = gt_sem_map return results
def _resize_img(self, results): if self.keep_ratio: img, scale_factor = mmcv.imrescale( results['img'], results['scale'], return_scale=True) # the w_scale and h_scale has minor difference # a real fix should be done in the mmcv.imrescale in the future new_h, new_w = img.shape[:2] h, w = results['img'].shape[:2] w_scale = new_w / w h_scale = new_h / h else: img, w_scale, h_scale = mmcv.imresize( results['img'], results['scale'], return_scale=True) scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) results['img'] = img results['img_shape'] = img.shape results['pad_shape'] = img.shape # in case that there is no padding results['scale_factor'] = scale_factor results['keep_ratio'] = self.keep_ratio
def __call__(self, img_group): shortedge = float(random.randint(*self.scale)) w, h, _ = img_group[0].shape scale = max(shortedge / w, shortedge / h) img_group = [mmcv.imrescale(img, scale) for img in img_group] w, h, _ = img_group[0].shape w_offset = random.randint(0, w - self.size[0]) h_offset = random.randint(0, h - self.size[1]) box = np.array([ w_offset, h_offset, w_offset + self.size[0] - 1, h_offset + self.size[1] - 1 ], dtype=np.float32) return ([ img[w_offset:w_offset + self.size[0], h_offset:h_offset + self.size[1]] for img in img_group ], box)
def process_mask(self, gt_masks, mask_feat_size): resize_img = [] h, w = tuple(mask_feat_size) h = h * self.mask_downsample w = w * self.mask_downsample shape = (h, w) for per_im_mask in gt_masks: # pad pad = (per_im_mask.shape[0], ) + shape padding = np.empty(pad, dtype=per_im_mask.dtype) padding[...] = 0 padding[..., :per_im_mask.shape[1], :per_im_mask. shape[2]] = per_im_mask # rescale for img in padding: img = mmcv.imrescale(img, 1. / self.mask_out_stride) img = torch.Tensor(img) resize_img.append(img) resize_img = torch.stack(resize_img, 0) return resize_img
def _resize_img(self, results): """Resize images with ``results['scale']``.""" img = results["img"] if self.keep_aspect: img, _ = mmcv.imrescale(img, results["scale"], return_scale=True) new_h, new_w = img.shape[:2] h, w = results["img"].shape[:2] w_scale = new_w / w h_scale = new_h / h else: img, w_scale, h_scale = mmcv.imresize( img, results["scale"], return_scale=True ) results["img"] = img scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) results["img_shape"] = img.shape results["pad_shape"] = img.shape results["scale_factor"] = scale_factor results["keep_aspect"] = self.keep_aspect
def single_call(self, results, img_ref): if results['keep_ratio']: img_ref = mmcv.imrescale( img_ref, results['scale'], return_scale=False) else: img_ref = mmcv.imresize( img_ref, results['scale'], return_scale=False) if results['flip']: img_ref = mmcv.imflip(img_ref) if results['img_norm_cfg']: img_norm_cfg = results['img_norm_cfg'] img_ref = mmcv.imnormalize( img_ref, img_norm_cfg['mean'], img_norm_cfg['std'], img_norm_cfg['to_rgb']) if 'crop_coords' in results: crds = results['crop_coords'] img_ref = img_ref[crds[0]:crds[1], crds[2]:crds[3], :] if img_ref.shape != results['pad_shape']: img_ref = mmcv.impad(img_ref, results['pad_shape'][:2]) return img_ref
def mask_target_single(self, pos_proposals, pos_assigned_gt_inds, gt_masks, stride): num_pos = pos_proposals.size(0) rois_gt_targets = [] if num_pos > 0: pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() pos_bboxes = (pos_proposals.detach() / stride).long() pos_bboxes = pos_bboxes.cpu().numpy() for i in range(num_pos): gt_mask = gt_masks[pos_assigned_gt_inds[i]] gt_mask = mmcv.imrescale(gt_mask, scale=1. / stride, interpolation='nearest') bbox = pos_bboxes[i] x1, y1, x2, y2 = bbox w = np.maximum(x2 - x1 + 1, 1) h = np.maximum(y2 - y1 + 1, 1) target = gt_mask[y1:y1 + h, x1:x1 + w][None, ...] target = torch.from_numpy(target).float().to(pos_proposals.device) rois_gt_targets.append(target) else: rois_gt_targets = pos_proposals.new_zeros((0, 0, 0)) return rois_gt_targets
def get_img(img_meta): img = cv2.imread(img_meta[0]['filename']) # Resize sf = img_meta[0]['scale_factor'] img, scale_factor = mmcv.imrescale(img, sf, True) # Normalize m = img_meta[0]['img_norm_cfg']['mean'] s = img_meta[0]['img_norm_cfg']['std'] t = img_meta[0]['img_norm_cfg']['to_rgb'] img = mmcv.imnormalize(img, m, s, t) # Pad sd = 32 # size_divisor img = mmcv.impad_to_multiple(img, 32, 0) # H x W x C -> C x H x W and expand an dim img = torch.from_numpy(np.transpose(img, (2, 0, 1))).expand(1, -1, -1, -1) return img
def rescale(self, scale, interpolation='nearest'): """Rescale masks as large as possible while keeping the aspect ratio. For details can refer to `mmcv.imrescale` Args: scale (tuple[int]): the maximum size (h, w) of rescaled mask interpolation (str): same as :func:`mmcv.imrescale` Returns: BitmapMasks: the rescaled masks """ if len(self.masks) == 0: new_w, new_h = mmcv.rescale_size((self.width, self.height), scale) rescaled_masks = np.empty((0, new_h, new_w), dtype=np.uint8) else: rescaled_masks = np.stack([ mmcv.imrescale(mask, scale, interpolation=interpolation) for mask in self.masks ]) height, width = rescaled_masks.shape[1:] return BitmapMasks(rescaled_masks, height, width)
def _resize_img(self, results): """Resize images with ``results['scale']``.""" if self.keep_ratio: if self.min_size is not None: # TODO: Now 'min_size' is an 'int' which means the minimum # shape of images is (min_size, min_size, 3). 'min_size' # with tuple type will be supported, i.e. the width and # height are not equal. if min(results['scale']) < self.min_size: new_short = self.min_size else: new_short = min(results['scale']) h, w = results['img'].shape[:2] if h > w: new_h, new_w = new_short * h / w, new_short else: new_h, new_w = new_short, new_short * w / h results['scale'] = (new_h, new_w) img, scale_factor = mmcv.imrescale(results['img'], results['scale'], return_scale=True) # the w_scale and h_scale has minor difference # a real fix should be done in the mmcv.imrescale in the future new_h, new_w = img.shape[:2] h, w = results['img'].shape[:2] w_scale = new_w / w h_scale = new_h / h else: img, w_scale, h_scale = mmcv.imresize(results['img'], results['scale'], return_scale=True) scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) results['img'] = img results['img_shape'] = img.shape results['pad_shape'] = img.shape # in case that there is no padding results['scale_factor'] = scale_factor results['keep_ratio'] = self.keep_ratio
def scale_genrate_warpper(label_path, img_name, img_path, outdir, phase, SCALE): annotation = [] original_img = cv2.imread(img_path) scales = copy.deepcopy(SCALE) fp = open(label_path).readlines()[2:] for scale in scales: img, scale_factor = mmcv.imrescale(original_img, scale, return_scale=True) H, W, _ = img.shape labels, masks, boxes = get_scale_info(fp, scale_factor) if (len(labels) <= 0): continue if max(H, W) > 512: fchips, masks, boxes, labels = _pygenerate(boxes, masks, labels, W, H, 512, 416) count = 0 for chip, mask, box, label in zip(fchips, masks, boxes, labels): count += 1 xmin, ymin, xmax, ymax = chip filename = img_name.split(".")[0] + "_{}_{}_{}_{}".format( str(scale), str(xmin), str(ymin), str(0)) + "part" + str(count) + ".jpg" img2 = copy.deepcopy(img[int(ymin):int(ymax), int(xmin):int(xmax), :]) h, w, _ = img2.shape cv2.imwrite("{}/{}/".format(outdir, phase) + filename, img2) image = [filename, h, w] anns = generate_ann(box, mask, label, xmin, ymin) annotation.append({"image": image, "anns": anns}) else: filename = img_name.split(".")[0] + "_{}_{}_{}_{}".format( str(scale), str(0), str(0), str(0)) + "part" + str(0) + ".jpg" image = [filename, H, W] cv2.imwrite("{}/{}/".format(outdir, phase) + filename, img) anns = generate_ann(boxes, masks, labels, 0, 0) annotation.append({"image": image, "anns": anns}) return annotation
def _resize_masks(self, results): for key in results.get("mask_fields", []): if results[key] is None: continue if self.keep_ratio: masks = [ mmcv.imrescale(mask, results["scale_factor"], interpolation="nearest") for mask in results[key] ] else: mask_size = (results["img_shape"][1], results["img_shape"][0]) masks = [ mmcv.imresize(mask, mask_size, interpolation="nearest") for mask in results[key] ] if masks: results[key] = np.stack(masks) else: results[key] = np.empty((0, ) + results["img_shape"], dtype=np.uint8)
def _resize_masks(self, results): els = ['ref_mask_fields', 'mask_fields'] if 'ref_mask_fields' in results else ['mask_fields'] for el in els: for key in results.get(el, []): if results[key] is None: continue if self.keep_ratio: masks = [ mmcv.imrescale( mask, results['scale_factor'], interpolation='nearest') for mask in results[key] ] else: mask_size = (results['img_shape'][1], results['img_shape'][0]) masks = [ mmcv.imresize(mask, mask_size, interpolation='nearest') for mask in results[key] ] results[key] = masks
def __call__(self, img, scale, flip=False, crop_info=None, keep_ratio=True, pad_val=0): if keep_ratio: img = mmcv.imrescale(img, scale, interpolation='nearest') else: img = mmcv.imresize(img, scale, interpolation='nearest') if flip: img = mmcv.imflip(img) if crop_info is not None: # if crop, no need to pad cx1, cy1, cx2, cy2 = crop_info img = img[cy1:cy2, cx1:cx2] elif self.size_divisor is not None: img = mmcv.impad_to_multiple(img, self.size_divisor, pad_val=pad_val) return img
def __call__(self, results): img_group = results['img_group'] shortedge = float(random.randint(*self.scale)) w, h, _ = img_group[0].shape scale = max(shortedge / w, shortedge / h) img_group = [mmcv.imrescale(img, scale) for img in img_group] w, h, _ = img_group[0].shape w_offset = random.randint(0, w - self.input_size[0]) h_offset = random.randint(0, h - self.input_size[1]) box = np.array([ w_offset, h_offset, w_offset + self.input_size[0] - 1, h_offset + self.input_size[1] - 1 ], dtype=np.float32) results['img_group'] = [ img[w_offset:w_offset + self.input_size[0], h_offset:h_offset + self.input_size[1]] for img in img_group ] results['crop_bbox'] = box results['img_shape'] = results['img_group'][0].shape return results
def processing_one_image(file_path): img_meta = {} img_meta['filename'] = file_path img_meta['ori_filename'] = file_path img_meta['flip'] = False # 1. Read image file_client = mmcv.FileClient(backend='disk') img_bytes = file_client.get(file_path) orig_img = mmcv.imfrombytes(img_bytes, flag='color') # BGR order img_meta['ori_shape'] = orig_img.shape # 2. Resize test_scale = (1333, 800) img, scale_factor = mmcv.imrescale(orig_img, test_scale, return_scale=True) # the w_scale and h_scale has minor difference # a real fix should be done in the mmcv.imrescale in the future new_h, new_w = img.shape[:2] h, w = orig_img.shape[:2] w_scale = new_w / w h_scale = new_h / h scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) img_meta['scale_factor'] = scale_factor img_meta['img_shape'] = img.shape # 3. Normalize # mean = np.array([102.9801, 115.9465, 122.7717], dtype=np.float32) # std = np.array([1.0, 1.0, 1.0], dtype=np.float32) mean = np.array([103.53, 116.28, 123.675], dtype=np.float32) std = np.array([1.0, 1.0, 1.0], dtype=np.float32) to_rgb = False img = mmcv.imnormalize(img, mean, std, to_rgb) img_meta['img_norm_cfg'] = dict(mean=mean, std=std, to_rgb=to_rgb) # 4. Pad img = mmcv.impad_to_multiple(img, divisor=32, pad_val=0) img_meta["pad_shape"] = img.shape # 5. ToTensor img = torch.from_numpy(img.transpose(2, 0, 1)) return img, img_meta
def _resize_img(self, results): KITTI = True if KITTI: scale_factor = results['scale'][1] / results['img'].shape[0] h = np.round(results['img'].shape[0] * scale_factor).astype(int) w = np.round(results['img'].shape[1] * scale_factor).astype(int) # resize img = cv2.resize(results['img'], (w, h)) if len(results['scale']) > 1: # crop in if img.shape[1] > results['scale'][0]: img = img[:, 0:results['scale'][0], :] # pad out elif img.shape[1] < results['scale'][0]: padW = results['scale'][0] - img.shape[1] img = np.pad(img, [(0, 0), (0, padW), (0, 0)], 'constant') else: if self.keep_ratio: img, scale_factor = mmcv.imrescale(results['img'], results['scale'], return_scale=True) else: img, w_scale, h_scale = mmcv.imresize(results['img'], results['scale'], return_scale=True) scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], dtype=np.float32) results['img'] = img results['img_shape'] = img.shape results['pad_shape'] = img.shape # in case that there is no padding results['scale_factor'] = scale_factor results['keep_ratio'] = self.keep_ratio