def parepare_img(self, img): if self.new_image: cfg = self.model.cfg img_transform = ImageTransform( size_divisor=cfg.data.test.size_divisor, **cfg.img_norm_cfg) device = next(self.model.parameters()).device img = mmcv.imread(img) ori_shape = img.shape img, img_shape, pad_shape, scale_factor = img_transform( img, scale=cfg.data.test.img_scale, keep_ratio=cfg.data.test.get('resize_keep_ratio', True)) img = to_tensor(img).to(device).unsqueeze(0) img_meta = [ dict( ori_shape=ori_shape, img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=False) ] self.img=img self.img_meta=img_meta
def __init__(self,img_df, root_dir, type=None,ann_file=None,img_prefix=None, img_scale=(1333, 800), size_divisor=32, flip_ratio=0, with_mask=False, with_crowd=False, with_label=True, img_norm_cfg=None, resize_keep_ratio=True): self.img_df = img_df self.root_dir = root_dir self.size_divisor = size_divisor self.img_norm_cfg = img_norm_cfg self.resize_keep_ratio = resize_keep_ratio self.flip_ratio = flip_ratio # (long_edge, short_edge) or [(long1, short1), (long2, short2), ...] self.img_scales = img_scale if isinstance(img_scale, list) else [img_scale] assert mmcv.is_list_of(self.img_scales, tuple) self.img_transform = ImageTransform( size_divisor=self.size_divisor, **self.img_norm_cfg)
def __init__(self, model_path, config_file, categories, iou_thr=0.5, score_thr=0, device=None): super(MmdetDetector, self).__init__(categories=categories, iou_thr=iou_thr, score_thr=score_thr, device=device) self.model_path = model_path self.config_file = config_file self._build_detector() self.img_transform = ImageTransform(size_divisor=self.cfg.data.test.size_divisor, **self.cfg.img_norm_cfg)
def inference_detector(model, imgs, cfg, device='cuda:0'): img_transform = ImageTransform( size_divisor=cfg.data.test.size_divisor, **cfg.img_norm_cfg) model = model.to(device) model.eval() if not isinstance(imgs, list): return _inference_single(model, imgs, img_transform, cfg, device) else: return _inference_generator(model, imgs, img_transform, cfg, device)
def inference_detector(model, imgs, cfg, device='cuda:0'): img_transform = ImageTransform( # 这个类实现了__call__放法,img_transform 可调用 size_divisor=cfg.data.test.size_divisor, **cfg.img_norm_cfg) model = model.to(device) model.eval() if not isinstance(imgs, list): # 如果是list说明是多张图片 return _inference_single(model, imgs, img_transform, cfg, device) else: # 否则单张图 return _inference_generator(model, imgs, img_transform, cfg, device)
def main(args): ##### #TODO:build model & load weight from mmdet.datasets.transforms import ImageTransform from tqdm import tqdm config_file = args.config_file checkpoint_file = args.checkpoint_file cfg = Config.fromfile(config_file) device = 'cuda:0' model = init_detector(config_file, checkpoint_file, device=device) print(model) ###### # Grad-CAM # layer_name = get_last_conv_name(model) layer_name = 'backbone.layer4.2.conv3' folder = '/EHDD1/ADD/data/iSAID_Devkit/preprocess/dataset/iSAID_patches/val/images/' dst_folder = '/EHDD1/ADD/data/iSAID_Devkit/preprocess/dataset/iSAID_patches/val/cam' os.makedirs(dst_folder, exist_ok=True) os.makedirs(dst_folder+'++', exist_ok=True) imlist_total = os.listdir(folder) imlist = list(filter(ispure, imlist_total)) ##### for image in tqdm(imlist[1::2]): #TODO : prepare input grad_cam = GradCAM(model, layer_name) grad_cam_plus_plus = GradCamPlusPlus(model, layer_name) img = mmcv.imread(os.path.join(folder, image)) if img.shape[0] != img.shape[1]: print(image) continue img_transform = ImageTransform(size_divisor=cfg.data.test.size_divisor, **cfg.img_norm_cfg) data = _prepare_data(img, img_transform, model.cfg, device) ####### image_dict = {} mask = grad_cam(data) # cam mask grad_cam.remove_handlers() image_dict['overlay'], image_dict['heatmap'], image_dict['mask'] = gen_cam(img, mask) save_image(image_dict, image.split('.')[0], output_dir=dst_folder) # # Grad-CAM++ # grad_cam_plus_plus = GradCamPlusPlus(model, layer_name) image_dict = {} mask_plus_plus = grad_cam_plus_plus(data) # cam mask image_dict['overlay'], image_dict['heatmap'], image_dict['mask'] = gen_cam(img, mask_plus_plus) grad_cam_plus_plus.remove_handlers() save_image(image_dict, image.split('.')[0], output_dir=dst_folder+'++') torch.cuda.empty_cache()
def inference_detector(model, imgs): """Inference image(s) with the detector. Args: model (nn.Module): The loaded detector. imgs (str/ndarray or list[str/ndarray]): Either image files or loaded images. Returns: If imgs is a str, a generator will be returned, otherwise return the detection results directly. """ cfg = model.cfg img_transform = ImageTransform(size_divisor=cfg.data.test.size_divisor, **cfg.img_norm_cfg) device = next(model.parameters()).device # model device return _inference_single(model, imgs, img_transform, device)
def save_layer_outputs(model, hooks, graph, layer_name, input_folder, input_name, out_folder, input_size): img_cv = cv2.imread(join(abspath(input_folder), input_name)) img = cv2.cvtColor(img_cv, cv2.COLOR_BGR2RGB) img = cv2.resize(img, input_size) img = np.expand_dims(np.transpose(img, (2, 0, 1)), axis=0) img_tensor = torch.tensor(img, dtype=torch.float32).cuda() img_fna = mmcv.imread(join(abspath(input_folder), input_name)) config_file = './fna_retinanet_fpn_retrain.py' cfg = mmcv.Config.fromfile(config_file) img_transform = ImageTransform( size_divisor=cfg.data.test.size_divisor, **cfg.img_norm_cfg) data = _prepare_data(img_fna, img_transform, cfg, device='cuda:0') outputs = model(return_loss=False, rescale=True, **data) #outputs = model(img_tensor) layers = graph["config"]["layers"] layer_id = None for layer in layers: if layer["name"] == layer_name: config = layer["config"] if config != "None" and "layer_id" in config: layer_id = config["layer_id"] break results = [] if layer_id != None: for hook in hooks: if hook.layer_id == layer_id: channel = np.shape(hook.output)[1] max_channel = min([channel, channel]) for channel in range(max_channel): filename = save_layer_img(hook.output[0, channel, :, :], layer_name, channel, out_folder, input_name) results.append(filename) break return results
def main(): args = parse_args() if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): raise ValueError('The output file must be a pkl file.') cfg = mmcv.Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None cfg.data.test.test_mode = True if args.gpus == 1: model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) load_checkpoint(model, args.checkpoint) model = MMDataParallel(model, device_ids=[0]) img_transform = ImageTransform(size_divisor=cfg.data.test.size_divisor, **cfg.img_norm_cfg) classes = get_classes('coco') while True: start_time = time.time() ret_val, img = cam.read() data = inference._prepare_data(img, img_transform, cfg, 'cuda:0') with torch.no_grad(): result = model(return_loss=False, rescale=True, **data) model.module.show_result(data, result, cfg.img_norm_cfg, dataset=dataset.CLASSES) if cv2.waitKey(1) == 27: break # esc to quit cv2.destroyAllWindows()
def inference_tracker(model, imgs, img_refer, bbox): """For Video Object Segmentation, inference image(s) with the detector. Args: model (nn.Module): The loaded detector. imgs (str/ndarray or list[str/ndarray]): Either image files or loaded images. img_refer (str/ndarray): The object in first frame. bbox (list): [x1, x2, y1, y2] Returns: If imgs is a str, a generator will be returned, otherwise return the detection results directly. """ cfg = model.cfg img_transform = ImageTransform(size_divisor=cfg.data.test.size_divisor, **cfg.img_norm_cfg) device = next(model.parameters()).device # model device if not isinstance(imgs, list): return _inference_vos_single(model, imgs, img_refer, bbox, img_transform, device) else: return _inference_vos_generator(model, imgs, img_refer, bbox, img_transform, device)
def __init__(self, root, ann_file, img_prefix, img_norm_cfg, img_scale=(1242, 375), size_divisor=32, proposal_file=None, flip_ratio=0.5, with_point=False, with_mask=False, with_label=True, class_names=['Car', 'Van'], augmentor=None, generator=None, anchor_generator=None, anchor_area_threshold=1, target_encoder=None, out_size_factor=2, test_mode=False): self.root = root self.img_scales = img_scale if isinstance(img_scale, list) else [img_scale] assert mmcv.is_list_of(self.img_scales, tuple) # normalization configs self.img_norm_cfg = img_norm_cfg # flip ratio self.flip_ratio = flip_ratio # size_divisor (used for FPN) self.size_divisor = size_divisor self.class_names = class_names self.test_mode = test_mode self.with_label = with_label self.with_mask = with_mask self.with_point = with_point self.img_prefix = osp.join(root, 'image_2') self.right_prefix = osp.join(root, 'image_3') self.lidar_prefix = osp.join(root, 'velodyne_reduced') self.calib_prefix = osp.join(root, 'calib') self.label_prefix = osp.join(root, 'label_2') with open(ann_file, 'r') as f: self.sample_ids = list(map(int, f.read().splitlines())) if not self.test_mode: self._set_group_flag() # transforms self.img_transform = ImageTransform(size_divisor=self.size_divisor, **self.img_norm_cfg) # voxel self.augmentor = augmentor self.generator = generator self.target_encoder = target_encoder self.out_size_factor = out_size_factor self.anchor_area_threshold = anchor_area_threshold # anchor if anchor_generator is not None: feature_map_size = self.generator.grid_size[:2] // self.out_size_factor feature_map_size = [*feature_map_size, 1][::-1] anchors = anchor_generator(feature_map_size) self.anchors = anchors.reshape([-1, 7]) self.anchors_bv = rbbox2d_to_near_bbox( self.anchors[:, [0, 1, 3, 4, 6]]) else: self.anchors = None
def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True if args.ckpt: cfg.resume_from = args.ckpt cfg.test_cfg.rcnn.score_thr = 0.5 FOCAL_LENGTH = cfg.get('FOCAL_LENGTH', 1000) model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__, config=cfg.text, CLASSES=('Human', )) # add an attribute for visualization convenience model.CLASSES = ('Human', ) model = MMDataParallel(model, device_ids=[0]).cuda() # build runner optimizer = build_optimizer(model, cfg.optimizer) runner = Runner(model, lambda x: x, optimizer, cfg.work_dir, cfg.log_level) runner.resume(cfg.resume_from) model = runner.model model.eval() # necessary for headless rendering os.environ['PYOPENGL_PLATFORM'] = 'egl' render = Renderer(focal_length=FOCAL_LENGTH) img_transform = ImageTransform(size_divisor=32, **img_norm_cfg) img_scale = cfg.common_val_cfg.img_scale with torch.no_grad(): folder_name = args.image_folder output_folder = args.output_folder os.makedirs(output_folder, exist_ok=True) images = os.listdir(folder_name) for image in images: file_name = osp.join(folder_name, image) img = cv2.imread(file_name) ori_shape = img.shape img, img_shape, pad_shape, scale_factor = img_transform( img, img_scale) # Force padding for the issue of multi-GPU training padded_img = np.zeros((img.shape[0], img_scale[1], img_scale[0]), dtype=img.dtype) padded_img[:, :img.shape[-2], :img.shape[-1]] = img img = padded_img assert img.shape[1] == 512 and img.shape[ 2] == 832, "Image shape incorrect" data_batch = dict( img=DC([to_tensor(img[None, ...])], stack=True), img_meta=DC([{ 'img_shape': img_shape, 'scale_factor': scale_factor, 'flip': False, 'ori_shape': ori_shape }], cpu_only=True), ) bbox_results, pred_results = model(**data_batch, return_loss=False) if pred_results is not None: pred_results['bboxes'] = bbox_results[0] img = denormalize(img) img_viz = prepare_dump(pred_results, img, render, bbox_results, FOCAL_LENGTH) cv2.imwrite( f'{file_name.replace(folder_name, output_folder)}.output.jpg', img_viz[:, :, ::-1])
def __init__(self, img_scale, img_norm_cfg, size_divisor=None, proposal_file=None, num_max_proposals=1000, flip_ratio=0, with_mask=True, with_crowd=True, with_label=True, with_track=False, extra_aug=None, aug_ref_bbox_param=None, resize_keep_ratio=True, test_mode=False): self.frame_id_counter = 0 # (long_edge, short_edge) or [(long1, short1), (long2, short2), ...] self.img_scales = img_scale if isinstance(img_scale, list) else [img_scale] assert mmcv.is_list_of(self.img_scales, tuple) # normalization configs self.img_norm_cfg = img_norm_cfg # max proposals per image self.num_max_proposals = num_max_proposals # flip ratio self.flip_ratio = flip_ratio assert flip_ratio >= 0 and flip_ratio <= 1 # padding border to ensure the image size can be divided by # size_divisor (used for FPN) self.size_divisor = size_divisor # with mask or not (reserved field, takes no effect) self.with_mask = with_mask # some datasets provide bbox annotations as ignore/crowd/difficult, # if `with_crowd` is True, then these info is returned. self.with_crowd = with_crowd # with label is False for RPN self.with_label = with_label self.with_track = with_track # params for augmenting bbox in the reference frame self.aug_ref_bbox_param = aug_ref_bbox_param # in test mode or not self.test_mode = test_mode # set group flag for the sampler if not self.test_mode: self._set_group_flag() # transforms self.img_transform = ImageTransform(size_divisor=self.size_divisor, **self.img_norm_cfg) self.bbox_transform = BboxTransform() self.mask_transform = MaskTransform() self.numpy2tensor = Numpy2Tensor() # if use extra augmentation if extra_aug is not None: self.extra_aug = ExtraAugmentation(**extra_aug) else: self.extra_aug = None # image rescale if keep ratio self.resize_keep_ratio = resize_keep_ratio
def get_app(model, hooks, classes, top, input_size, html_base_dir, temp_folder='./tmp', input_folder='./', mean=None, std=None): ''' The base of the Flask application to be run :param model: the model to show :param classes: list of names of output classes to show in the GUI. if None passed - ImageNet classes will be used :param top: number of top predictions to show in the GUI :param html_base_dir: the directory for the HTML (usually inside the packages, quiverboard/dist must be a subdirectory) :param temp_folder: where the temporary image data should be saved :param input_folder: the image directory for the raw data :param mean: list of float mean values :param std: lost of float std values :return: ''' # single_input_shape, input_channels = get_input_config(model) app = Flask(__name__) app.threaded = True CORS(app) ''' prepare model ''' x = torch.zeros(input_size, dtype=torch.float, requires_grad=False).cuda() model.cuda() model.eval() #out = model(x) #graph = make_dot(out, params=dict(model.named_parameters())) config_file = './fna_retinanet_fpn_retrain.py' cfg = mmcv.Config.fromfile(config_file) img_transform = ImageTransform(size_divisor=cfg.data.test.size_divisor, **cfg.img_norm_cfg) img = mmcv.imread('./data/Cat/0.jpg') data = _prepare_data(img.astype(np.float32), img_transform, cfg, device='cuda:0') out = model(return_loss=False, rescale=True, **data) #print(out[0][0]) graph = make_dot(out[0][0], params=dict(model.named_parameters())) ''' Static Routes ''' @app.route('/') def home(): return send_from_directory(join(html_base_dir, 'quiverboard/dist'), 'index.html') @app.route('/<path>') def get_board_files(path): return send_from_directory(join(html_base_dir, 'quiverboard/dist'), path) @app.route('/temp-file/<path>') def get_temp_file(path): return send_from_directory(abspath(temp_folder), path) @app.route('/input-file/<path>') def get_input_file(path): return send_from_directory(abspath(input_folder), path) ''' Computations ''' @app.route('/model') def get_config(): # print (jsonify(json.loads(model.to_json()))) # print("test-------------") # model_file = "/home/user/ANS/QuiverTest/model.json" # model_file = "/home/user/ANS/pytorch_model_vis/model_1.json" # with open(model_file, "r") as f: # return jsonify(json.loads(f.read())) return jsonify(graph) @app.route('/inputs') def get_inputs(): return jsonify(list_img_files(input_folder)) @app.route('/layer/<layer_name>/<input_path>') def get_layer_outputs(layer_name, input_path): print(layer_name, input_path) results = save_layer_outputs(model, hooks, graph, layer_name, input_folder, input_path, temp_folder, input_size=tuple(input_size[2:])) print("------------------ssss---------------------") return jsonify(results) @app.route('/predict/<input_path>') def get_prediction(input_path): # print ("prediction", input_path) results = [[("sa", "bot_34", 0.2)], [("sa", "bot_35", 0.6)]] return safe_jsonify(results) return app
def summary(model, cfg): def register_hook(name): def hook(module, input, output): class_name = str(module.__class__).split(".")[-1].split("'")[0] module_idx = len(summary) # m_key = "%s-%i" % (class_name, module_idx + 1) m_key = name new_key = False if name not in summary.keys(): summary[m_key] = OrderedDict() new_key = True summary[m_key]["input_shape"] = tuple(input[0].size()) summary[m_key]["output_shape"] = tuple(output.size()) else: if not isinstance(summary[m_key]["input_shape"], list): summary[m_key]["input_shape"] = [summary[m_key]["input_shape"]] if not isinstance(summary[m_key]["output_shape"], list): summary[m_key]["output_shape"] = [summary[m_key]["output_shape"]] summary[m_key]["input_shape"].append(tuple(input[0].size())) summary[m_key]["output_shape"].append(tuple(output.size())) params = 0 flops = 0 if hasattr(module, "weight") and hasattr(module.weight, "size"): params += torch.prod(torch.LongTensor(list(module.weight.size()))) summary[m_key]["trainable"] = module.weight.requires_grad if hasattr(module, "bias") and hasattr(module.bias, "size"): params += torch.prod(torch.LongTensor(list(module.bias.size()))) input_shape = tuple(input[0].size()[1:]) output_shape = tuple(output.size()[1:]) batch_size_ = output.size(0) if class_name == "ReLU": flops = torch.prod(torch.LongTensor(input_shape)) elif class_name == 'Linear': flops = params elif class_name == "BatchNorm2d" or class_name == "SyncBatchNorm": flops = torch.prod(torch.LongTensor(input_shape)) * 2 if getattr(module, "affine"): flops *= 2 elif class_name == "Conv2d" or class_name == "ConvTranspose2d" or class_name == "DeformConv": flops = params * torch.prod(torch.LongTensor(output_shape[1:])) elif class_name == "MaxPool2d": if isinstance(module.kernel_size, tuple): kernel_ops = torch.prod(torch.LongTensor([*module.kernel_size])) else: kernel_ops = torch.prod(torch.LongTensor([module.kernel_size ** 2])) flops = kernel_ops * torch.prod(torch.LongTensor(output_shape)) elif class_name == "AdaptiveMaxPool2d": kernel_ops = torch.prod(torch.LongTensor(input_shape[1:])//torch.LongTensor(output_shape[1:])) flops = kernel_ops * torch.prod(torch.LongTensor(output_shape)) elif class_name == "AvgPool2d": if isinstance(module.kernel_size, tuple): kernel_ops = torch.prod(torch.LongTensor([*module.kernel_size])) + 1 else: kernel_ops = torch.prod(torch.LongTensor([module.kernel_size ** 2])) + 1 flops = kernel_ops * torch.prod(torch.LongTensor(output_shape)) elif class_name == "AdaptiveAvgPool2d": kernel_ops = torch.prod(torch.LongTensor(input_shape[1:])//torch.LongTensor(output_shape[1:])) + 1 flops = kernel_ops * torch.prod(torch.LongTensor(output_shape)) elif class_name == "Softmax": flops = torch.prod(torch.LongTensor(output_shape)) * 3 elif class_name == "NonLocal2d": flops = 2 * (torch.prod(torch.LongTensor(output_shape[1:])) ** 2) * module.planes if module.downsample: flops /= 4 elif class_name == "ContextBlock2d": if module.pool == "att": flops = torch.prod(torch.LongTensor(output_shape)) summary[m_key]["nb_params"] = params if new_key: summary[m_key]["nb_flops"] = flops * batch_size_ else: if not isinstance(summary[m_key]["nb_flops"], list): summary[m_key]["nb_flops"] = [summary[m_key]["nb_flops"]] summary[m_key]["nb_flops"].append(flops * batch_size_) return hook input_size = [cfg.data.test.img_scale] batch_size = 1 img_transform = ImageTransform( size_divisor=cfg.data.test.size_divisor, **cfg.img_norm_cfg) img = np.random.rand(*reversed(cfg.data.test.img_scale)) data = _prepare_data(img, img_transform, cfg, 'cuda') fake_test_cfg = cfg.test_cfg.copy() if hasattr(fake_test_cfg, 'rcnn'): fake_test_cfg.rcnn.score_thr = 0 else: fake_test_cfg.score_thr = 0 # import pdb # pdb.set_trace() # model = build_detector( # cfg.model, train_cfg=None, test_cfg=fake_test_cfg) model.test_cfg = fake_test_cfg model = model.to('cuda') model.eval() # create properties summary = OrderedDict() hooks = [] # register hook # model.apply(register_hook) for m_name, m in model.named_modules(): # hooks.append(m.register_forward_hook(register_hook(m_name, m))) if ( not isinstance(m, nn.Sequential) and not isinstance(m, nn.ModuleList) and not (m == model) and (hasattr(nn, m.__class__.__name__) or is_custom_operator(m)) ): hooks.append(m.register_forward_hook(register_hook(m_name))) # make a forward pass with torch.no_grad(): model(return_loss=False, rescale=True, **data) model.test_cfg = cfg.test_cfg # remove these hooks for h in hooks: h.remove() line_new_format = "{:<30} {:>25} {:>15} {:>15}" line_new = line_new_format.format("Layer (type)", "Output Shape", "Param #", "FLOPS") line_length = len(line_new) s = "\n" s += "-" * line_length + "\n" s += line_new + "\n" s += "=" * line_length + "\n" total_params = 0 total_flops = 0 total_output = 0 trainable_params = 0 for layer in summary: # input_shape, output_shape, trainable, nb_params if isinstance(summary[layer]["output_shape"], list): line_new = line_new_format.format( layer, str(summary[layer]["output_shape"][0]), "{0:,}".format(summary[layer]["nb_params"]), "{0:,}".format(summary[layer]["nb_flops"][0]), ) total_params += summary[layer]["nb_params"] total_flops += summary[layer]["nb_flops"][0] total_output += np.prod(summary[layer]["output_shape"][0]) for i in range(1, len(summary[layer]["output_shape"])): line_new += "\n" line_new += line_new_format.format( "", str(summary[layer]["output_shape"][i]), "", "{0:,}".format(summary[layer]["nb_flops"][i]), ) total_flops += summary[layer]["nb_flops"][i] else: line_new = line_new_format.format( layer, str(summary[layer]["output_shape"]), "{0:,}".format(summary[layer]["nb_params"]), "{0:,}".format(summary[layer]["nb_flops"]), ) total_params += summary[layer]["nb_params"] total_flops += summary[layer]["nb_flops"] total_output += np.prod(summary[layer]["output_shape"]) if "trainable" in summary[layer]: if summary[layer]["trainable"] == True: trainable_params += summary[layer]["nb_params"] s += line_new + "\n" # assume 4 bytes/number (float on cuda). total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024 ** 2.)) total_output_size = abs(2. * total_output * 4. / (1024 ** 2.)) # x2 for gradients total_params_size = abs(total_params.numpy() * 4. / (1024 ** 2.)) total_flops_size = abs(total_flops.numpy() / (1024 ** 3.)) total_size = total_params_size + total_output_size + total_input_size s += "=" * line_length + "\n" s += "Total params: {0:,}".format(total_params) + "\n" s += "Trainable params: {0:,}".format(trainable_params) + "\n" s += "Non-trainable params: {0:,}".format(total_params - trainable_params) + "\n" s += "-" * line_length + "\n" s += "Input size (MB): %0.2f" % total_input_size + "\n" s += "Forward/backward pass size (MB): %0.2f" % total_output_size + "\n" s += "Params size (MB): %0.2f" % total_params_size + "\n" s += "Flops size (G): %0.2f" % total_flops_size + "\n" s += "Estimated Total Size (MB): %0.2f" % total_size + "\n" s += "-" * line_length + "\n" s += str(model) + "\n" # return summary return s
def __init__( self, ann_file, img_prefix, img_scale, img_norm_cfg, size_divisor=None, proposal_file=None, num_max_proposals=1000, flip_ratio=0, with_mask=True, with_crowd=True, with_label=True, extra_aug=None, # here to add random crop for art. resize_keep_ratio=True, test_mode=False): # prefix of images path self.img_prefix = img_prefix # load annotations (and proposals) # need to be implement. self.img_infos = self.load_annotations(ann_file) if proposal_file is not None: self.proposals = self.load_proposals(proposal_file) else: self.proposals = None # filter images with no annotation during training if not test_mode: valid_inds = self._filter_imgs() self.img_infos = [self.img_infos[i] for i in valid_inds] if self.proposals is not None: self.proposals = [self.proposals[i] for i in valid_inds] # (long_edge, short_edge) or [(long1, short1), (long2, short2), ...] self.img_scales = img_scale if isinstance(img_scale, list) else [img_scale] assert mmcv.is_list_of(self.img_scales, tuple) # normalization configs self.img_norm_cfg = img_norm_cfg # max proposals per image self.num_max_proposals = num_max_proposals # flip ratio self.flip_ratio = flip_ratio assert flip_ratio >= 0 and flip_ratio <= 1 # padding border to ensure the image size can be divided by # size_divisor (used for FPN) self.size_divisor = size_divisor # with mask or not (reserved field, takes no effect) self.with_mask = with_mask # some datasets provide bbox annotations as ignore/crowd/difficult, # if `with_crowd` is True, then these info is returned. self.with_crowd = with_crowd # with label is False for RPN self.with_label = with_label # in test mode or not self.test_mode = test_mode # set group flag for the sampler if not self.test_mode: self._set_group_flag() # transforms self.img_transform = ImageTransform(size_divisor=self.size_divisor, **self.img_norm_cfg) self.bbox_transform = BboxTransform() self.mask_transform = MaskTransform() self.numpy2tensor = Numpy2Tensor() # if use extra augmentation if extra_aug is not None: self.extra_aug = ExtraAugmentationIC(**extra_aug) else: self.extra_aug = None # image rescale if keep ratio self.resize_keep_ratio = resize_keep_ratio # random scale mode, if img scales > 2 choose value, else range self.resize_mode = 'value' if len(self.img_scales) > 2 else 'range'