def prepare_train_img(self, idx): img_info = self.img_infos[idx] # load image img = mmcv.imread(osp.join(self.img_prefix, img_info['filename'])) ann = self.get_ann_info(idx) gt_bboxes = ann['bboxes'] gt_labels = ann['labels'] if self.with_ignore: gt_bboxes_ignore = ann['bboxes_ignore'] # skip the image if there is no valid gt bbox if len(gt_bboxes) == 0: return None # aug 1: apply extra augmentation if self.extra_aug is not None: img, gt_bboxes, gt_labels = \ self.extra_aug(img, gt_bboxes, gt_labels) # aug 2: apply ordinary augmentations: flipping flip = True if np.random.rand() < self.flip_ratio else False # aug 3: apply ordinary augmentations: scaling img_scale = random_scale(self.img_scales, self.multiscale_mode) img, img_shape, pad_shape, scale_factor = \ self.img_transform( img=img, scale=img_scale, flip=flip, pad_val=self.pad_values, keep_ratio=self.resize_keep_ratio ) img = img.copy() gt_bboxes = self.bbox_transform(bboxes=gt_bboxes, img_shape=img_shape, scale_factor=scale_factor, flip=flip) if self.with_ignore: gt_bboxes_ignore = self.bbox_transform(bboxes=gt_bboxes_ignore, img_shape=img_shape, scale_factor=scale_factor, flip=flip) img_meta = dict(ori_shape=(img_info['height'], img_info['width'], 3), img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=flip) data = dict(img=DataContainer(to_tensor(img), stack=True), img_meta=DataContainer(data=img_meta, cpu_only=True), gt_bboxes=DataContainer(data=to_tensor(gt_bboxes))) if self.with_ignore: data['gt_bboxes_ignore'] = DataContainer( data=to_tensor(gt_bboxes_ignore)) if self.with_label: data['gt_labels'] = DataContainer(to_tensor(gt_labels)) return data
def inference_single(model, img, img_transform, scale, flip, resize_keep_ratio, rescale, device): img = mmcv.imread(img) ori_shape = img.shape img, img_shape, pad_shape, scale_factor = img_transform( img=img, scale=scale, flip=flip, keep_ratio=resize_keep_ratio, ) img = to_tensor(img).to(device).unsqueeze(0) img_meta = [ dict(ori_shape=ori_shape, img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=flip) ] with torch.no_grad(): result = model.forward_test(img=img, img_meta=img_meta, rescale=rescale) return result
def prepare_train_img(self, idx): img_info = self.img_infos[idx] # load image img = mmcv.imread(osp.join(self.img_prefix, img_info['filename'])) ann = self.get_ann_info(idx) gt_labels = np.zeros([len(self.CLASSES)], dtype=np.float32) for index, item in enumerate(self.CLASSES): gt_labels[index] = int(ann[item] == 1) # aug 1: apply extra augmentation if self.extra_aug is not None: img, gt_bboxes, gt_labels = \ self.extra_aug(img, None, gt_labels) # aug 2: apply ordinary augmentations: flipping flip = True if np.random.rand() < self.flip_ratio else False # aug 3: apply ordinary augmentations: scaling img_scale = random_scale(self.img_scales, self.multiscale_mode) img, img_shape, pad_shape, scale_factor = \ self.img_transform( img=img, scale=img_scale, flip=flip, pad_val=self.pad_values, keep_ratio=self.resize_keep_ratio ) img = img.copy() img_meta = dict(ori_shape=(img_info['height'], img_info['width'], 3), img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=flip) data = dict( img=DataContainer(to_tensor(img), stack=True), img_meta=DataContainer(data=img_meta, cpu_only=True), ) if self.with_label: data['gt_labels'] = DataContainer(to_tensor(gt_labels), stack=False) return data
def _prepare_data(img, img_transform, img_scale, img_resize_keep_ratio, img_flip, device): ori_shape = img.shape img, img_shape, pad_shape, scale_factor = img_transform( img=img, scale=img_scale, flip=img_flip, keep_ratio=img_resize_keep_ratio, ) img = to_tensor(img).to(device).unsqueeze(0) img_meta = [ dict(ori_shape=ori_shape, img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=img_flip) ] return dict(img=[img], img_meta=[img_meta])
def prepare_test_img(self, idx): """Prepare an image for testing""" img_info = self.img_infos[idx] # load image img = mmcv.imread(osp.join(self.img_prefix, img_info['filename'])) img, img_shape, pad_shape, scale_factor = \ self.img_transform( img=img, scale=self.img_scales[0], flip=False, pad_val=self.pad_values, keep_ratio=self.resize_keep_ratio ) img_meta = dict(ori_shape=(img_info['height'], img_info['width'], 3), img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=False) data = dict(img=to_tensor(img), img_meta=DataContainer(img_meta, cpu_only=True)) return data
def inference_func_CPU(self, img): # load classes if 'CLASSES' in self.checkpoint['meta']: self.model.CLASSES = self.checkpoint['meta']['CLASSES'] else: raise RuntimeError( 'classes not found in self.checkpoint file meta. ') self.model.to(self.device) self.model.eval() # preprocess img img_transform = ImageTransform( mean=self.img_transform_cfg['mean'], std=self.img_transform_cfg['std'], to_rgb=self.img_transform_cfg['to_rgb'], size_divisor=self.img_transform_cfg['size_divisor'], ) # inference ori_shape = img.shape img, img_shape, pad_shape, scale_factor = img_transform( img=img, scale=self.img_transform_cfg['scale'], flip=self.img_transform_cfg['flip'], keep_ratio=self.img_transform_cfg['resize_keep_ratio'], ) img = to_tensor(img).to(self.device).unsqueeze(0) img_meta = [ dict(ori_shape=ori_shape, img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=self.img_transform_cfg['flip']) ] with torch.no_grad(): result = self.model.forward_test(img=img, img_meta=img_meta, rescale=True) for x in locals().keys(): del locals()[x] gc.collect() return result
def main(): # define the model and restore checkpoint model = VGGClassifier( with_bn=False, num_classes=len(CLASSES), num_stages=5, dilations=(1, 1, 1, 1, 1), out_indices=(30, ), frozen_stages=-1, bn_eval=True, bn_frozen=False, ceil_mode=True, with_last_pool=True, dimension_before_fc=(10, 15), dropout_rate=0.5, pos_loss_weights=None, ) checkpoint = load_checkpoint(model=model, filename=checkpoint_file, map_location='cpu', strict=False, logger=None) # define classes model.CLASSES = checkpoint['meta']['CLASSES'] # put to device and freeze model.to(device) model.eval() # backbone # backbone.features # backbone.features.0 # backbone.features.1 # backbone.features.2 # backbone.features.3 # backbone.features.4 # backbone.features.5 # backbone.features.6 # backbone.features.7 # backbone.features.8 # backbone.features.9 # backbone.features.10 # backbone.features.11 # backbone.features.12 # backbone.features.13 # backbone.features.14 # backbone.features.15 # backbone.features.16 # backbone.features.17 # backbone.features.18 # backbone.features.19 # backbone.features.20 # backbone.features.21 # backbone.features.22 # backbone.features.23 # backbone.features.24 # backbone.features.25 # backbone.features.26 # backbone.features.27 # backbone.features.28 # backbone.features.29 # backbone.features.30 # classifier # classifier.0 # classifier.1 # classifier.2 # classifier.3 # classifier.4 # classifier.5 # classifier.6 # for name, module in model.named_modules(): # print(name) img_transform = ImageTransform( mean=img_transform_cfg['mean'], std=img_transform_cfg['std'], to_rgb=img_transform_cfg['to_rgb'], ) # inference # read image raw_img = mmcv.imread(img_path) ori_shape = raw_img.shape print(ori_shape) # transform image img, img_shape, pad_shape, scale_factor = img_transform( img=raw_img, scale=img_scale, flip=img_transform_cfg['flip'], pad_val=img_transform_cfg['pad_values'], keep_ratio=img_transform_cfg['resize_keep_ratio'], ) img = to_tensor(img).to(device).unsqueeze(0) img_meta = [ dict(ori_shape=ori_shape, img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=img_transform_cfg['flip']) ] # with torch.no_grad(): # result = model.forward_test(img=img, img_meta=img_meta, rescale=False) target_layers = ["backbone.features.30"] target_class = 0 gcam = GradCAM(model=model) probs = gcam.forward(img) ids_ = torch.LongTensor([[target_class]]).to(device) gcam.backward(ids=ids_) print(probs) for target_layer in target_layers: print("Generating Grad-CAM @{}".format(target_layer)) # Grad-CAM # regions: [1, 1, H, W] # raw_img: [H, W, 3] regions = gcam.generate(target_layer=target_layer) regions = regions[0, 0].cpu().numpy() ori_regions = image_transfer_back(img=regions, scale=scale_factor, cur_shape=regions.shape, ori_shape=raw_img.shape[0:2]) print(ori_regions.shape)
def main(): # load image img = mmcv.imread(IMG_PATH) img_height = img.shape[0] img_width = img.shape[1] # image pre-processing img_transform = ImageTransform( mean=IMG_TRANSFORM_CONFIG['mean'], std=IMG_TRANSFORM_CONFIG['std'], to_rgb=IMG_TRANSFORM_CONFIG['to_rgb'], ) img, img_shape, pad_shape, scale_factor = \ img_transform( img=img, scale=IMG_SCALE, flip=False, pad_val=IMG_TRANSFORM_CONFIG['pad_values'], keep_ratio=IMG_TRANSFORM_CONFIG['resize_keep_ratio'] ) img_meta = dict(ori_shape=(img_height, img_width, 3), img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=False) data = dict(img=DataContainer(to_tensor(img), stack=True), img_meta=DataContainer(img_meta, cpu_only=True)) # define the model model = SSDDetector( # basic input_size=IMG_SCALE, num_classes=NUM_CLASS, in_channels=(512, 1024, 512, 256, 256), use_dropout=False, dropout_rate=None, # anchor generate anchor_ratios=([1 / 2.0, 1.0, 2.0], [1 / 3.0, 1 / 2.0, 1.0, 2.0, 3.0], [1 / 3.0, 1 / 2.0, 1.0, 2.0, 3.0], [1 / 3.0, 1 / 2.0, 1.0, 2.0, 3.0], [1 / 2.0, 1.0, 2.0]), anchor_strides=((16, 16), (16, 16), (30, 30), (60, 60), (100, 100)), basesizes=((12, 12), (16, 16), (24, 24), (30, 30), (36, 36)), allowed_border=-1, # regression target_means=(.0, .0, .0, .0), target_stds=(0.1, 0.1, 0.2, 0.2), # box assign pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0., gt_max_assign_all=False, # sampling sampling=False, # balancing the loss neg_pos_ratio=3, # loss smoothl1_beta=1., # inference nms nms_pre=-1, score_thr=0.02, min_size=100.0, max_scale_ratio=10.0, nms_cfg=['nms', 0.45, None], max_per_img=200, # device device='cpu', ) # load checkpoint _ = load_checkpoint(model=model, filename=CHECKPOINT_FILE, map_location='cpu', strict=True, logger=None) # parallelize model model.eval() # results and progress bar # inference the data with torch.no_grad(): result = model(is_test=True, rescale=True, img=data['img'].data.unsqueeze(0), img_meta=(data['img_meta'].data, )) show_one_image(result[0], IMG_PATH, SAVE_PATH)
def prepare_train_img(self, idx): img_info = self.img_infos[idx] # load image img = mmcv.imread(osp.join(self.img_prefix, img_info['filename'])) ann = self.get_ann_info(idx) gt_bboxes = ann['bboxes'] gt_labels = ann['labels'] # skip the image if there is no valid gt bbox if len(gt_bboxes) == 0: return None # aug 1: apply extra augmentation if self.extra_aug is not None: img, gt_bboxes, gt_labels = \ self.extra_aug(img, gt_bboxes, gt_labels) # visualize augmented data # import string # import random # import os # temp_data = np.copy(img) # letters = string.ascii_lowercase # name = ''.join(random.choice(letters) for i in range(10)) # dir_path = os.path.dirname(os.getcwd()) + '/MobileDentist' # work_dir = dir_path + '/work_dirs/dental_1009_w_pretrained_wt_fix_w_phonaugment/' # mmcv.imwrite(temp_data, work_dir + '{}.jpg'.format(name)) # aug 2: apply ordinary augmentations: flipping flip = True if np.random.rand() < self.flip_ratio else False # aug 3: apply ordinary augmentations: scaling img_scale = random_scale(self.img_scales, self.multiscale_mode) img, img_shape, pad_shape, scale_factor = \ self.img_transform( img=img, scale=img_scale, flip=flip, pad_val=self.pad_values, keep_ratio=self.resize_keep_ratio ) img = img.copy() gt_bboxes = self.bbox_transform( bboxes=gt_bboxes, img_shape=img_shape, scale_factor=scale_factor, flip=flip ) img_meta = dict( ori_shape=(img_info['height'], img_info['width'], 3), img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=flip ) data = dict( img=DataContainer(to_tensor(img), stack=True), img_meta=DataContainer(data=img_meta, cpu_only=True), gt_bboxes=DataContainer(data=to_tensor(gt_bboxes)), gt_labels = DataContainer(to_tensor(gt_labels.astype(np.long))) ) return data
def inference_func_CPU(self, raw_img, start_r=-1, end_r=-1): # define the model and restore checkpoint # define classes self.model.CLASSES = self.checkpoint['meta']['CLASSES'] # put to device and freeze self.model.to(self.device) self.model.eval() # inference # transform image img, img_shape, pad_shape, scale_factor = self.img_transform( img=raw_img, scale=self.img_scale, flip=self.img_transform_cfg['flip'], pad_val=self.img_transform_cfg['pad_values'], keep_ratio=self.img_transform_cfg['resize_keep_ratio'], ) img = to_tensor(img).to(self.device).unsqueeze(0) target_layers = ["backbone.features.30"] gcam = GradCAM(model=self.model) probs = gcam.forward(img) final_probs = probs.detach().cpu().numpy() # heatmap for class 0 target_class = 0 ids_ = torch.LongTensor([[target_class]]).to(self.device) gcam.backward(ids=ids_) # Grad-CAM # regions: [H, W] # raw_img: [H, W, 3] # ori_regions_heatmap: [H, W, 3] for target_layer in target_layers: regions_0 = gcam.generate(target_layer=target_layer) regions_0 = regions_0[0, 0].cpu().numpy() ori_regions_0 = image_transfer_back(img=regions_0, scale=scale_factor, cur_shape=regions_0.shape, ori_shape=raw_img.shape[0:2]) ori_regions_heatmap_0, alpha0 = to_heatmap(ori_regions_0, start_r, end_r, 'red') # heatmap for class 1 target_class = 1 ids_ = torch.LongTensor([[target_class]]).to(self.device) gcam.backward(ids=ids_) for target_layer in target_layers: regions_1 = gcam.generate(target_layer=target_layer) regions_1 = regions_1[0, 0].cpu().numpy() ori_regions_1 = image_transfer_back(img=regions_1, scale=scale_factor, cur_shape=regions_1.shape, ori_shape=raw_img.shape[0:2]) ori_regions_heatmap_1, alpha1 = to_heatmap(ori_regions_1, start_r, end_r, 'blue') for x in locals().keys(): del locals()[x] gc.collect() return final_probs, ori_regions_0, ori_regions_1, ori_regions_heatmap_0, ori_regions_heatmap_1, alpha0, alpha1