def test(model_cfg, dataset_cfg, checkpoint, batch_size=64, gpus=1, workers=4): dataset = call_obj(**dataset_cfg) data_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False, num_workers=workers) # put model on gpus if isinstance(model_cfg, list): model = [call_obj(**c) for c in model_cfg] model = torch.nn.Sequential(*model) else: model = call_obj(**model_cfg) load_checkpoint(model, checkpoint, map_location='cpu') model = MMDataParallel(model, device_ids=range(gpus)).cuda() model.eval() results = [] labels = [] prog_bar = ProgressBar(len(dataset)) for data, label in data_loader: with torch.no_grad(): output = model(data).data.cpu().numpy() results.append(output) labels.append(label) for i in range(len(data)): prog_bar.update() results = np.concatenate(results) labels = np.concatenate(labels) print('Top 1: {:.2f}%'.format(100 * topk_accuracy(results, labels, 1))) print('Top 5: {:.2f}%'.format(100 * topk_accuracy(results, labels, 5)))
def _non_dist_test(model, dataset, cfg, validate=False): data_loader = build_dataloader(dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, len(cfg.gpus.test), dist=False, shuffle=False) print('dataloader built') model = MMDataParallel(model, device_ids=cfg.gpus.test).cuda() model.eval() embeddings = [] for batch_idx, testdata in enumerate(data_loader): embed = model(testdata['img'], return_loss=False) embeddings.append(embed) embeddings = torch.cat(embeddings) metric = model.metric_branch # compatibility auc auc = dataset.test_compatibility(embeddings, metric) # fill-in-blank accuracy acc = dataset.test_fitb(embeddings, metric) print('Compat AUC: {:.2f} FITB: {:.1f}\n'.format(round(auc, 2), round(acc * 100, 1)))
def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None cfg.data.test.test_mode = True # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader( dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=False, shuffle=False) # build the model and load checkpoint model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) load_checkpoint(model, args.checkpoint, map_location='cpu') if args.fuse_conv_bn: model = fuse_module(model) model = MMDataParallel(model, device_ids=[0]) model.eval() # the first several iterations may be very slow so skip them num_warmup = 5 pure_inf_time = 0 # benchmark with 200 image and take the average for i, data in enumerate(data_loader): torch.cuda.synchronize() start_time = time.perf_counter() with torch.no_grad(): model(return_loss=False, rescale=True, **data) torch.cuda.synchronize() elapsed = time.perf_counter() - start_time if i >= num_warmup: pure_inf_time += elapsed if (i + 1) % args.log_interval == 0: fps = (i + 1 - num_warmup) / pure_inf_time print(f'Done image [{i + 1:<3}/ 2000], fps: {fps:.1f} img / s') if (i + 1) == 2000: pure_inf_time += elapsed fps = (i + 1 - num_warmup) / pure_inf_time print(f'Overall fps: {fps:.1f} img / s') break
def _non_dist_test(model, dataset, cfg, validate=False): data_loader = build_dataloader(dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, len(cfg.gpus.test), dist=False, shuffle=False) print('dataloader built') model = MMDataParallel(model, device_ids=cfg.gpus.test).cuda() model.eval() embeddings = [] for batch_idx, testdata in enumerate(data_loader): embed = model(testdata['img'], return_loss=False) embeddings.append(embed.data.cpu().numpy()) # save as numpy array, and then transfer to tensor # this is to avoid out-of-memory embeddings = np.asarray(embeddings) embeddings = torch.from_numpy(embeddings) metric = model.triplet_net.metric_branch # compatibility auc auc = dataset.test_compatibility(embeddings, metric) # fill-in-blank accuracy acc = dataset.test_fitb(embeddings, metric) print('Compat AUC: {:.2f} FITB: {:.1f}\n'.format(round(auc, 2), round(acc * 100, 1)))
def _non_dist_test(model, dataset, cfg, validate=False): data_loader = build_dataloader(dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, len(cfg.gpus.test), dist=False, shuffle=False) print('dataloader built') model = MMDataParallel(model, device_ids=cfg.gpus.test).cuda() model.eval() attr_calculator = AttrCalculator(cfg) for batch_idx, testdata in enumerate(data_loader): imgs = testdata['img'] landmark = testdata['landmark'] attr = testdata['attr'] attr_pred = model(imgs, attr, landmark=landmark, return_loss=False) attr_calculator.collect_result(attr_pred, attr) if batch_idx % cfg.print_interval == 0: attr_calculator.show_result(batch_idx) attr_calculator.show_result()
def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.backbone.pretrained = None cfg.data.test.test_mode = True # build the dataloader dataset = build_dataset(cfg.data.test, dict(test_mode=True)) data_loader = build_dataloader( dataset, videos_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=False, shuffle=False) # build the model and load checkpoint model = build_model( cfg.model, train_cfg=None, test_cfg=cfg.get('test_cfg')) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) if args.fuse_conv_bn: model = fuse_conv_bn(model) model = MMDataParallel(model, device_ids=[0]) model.eval() # the first several iterations may be very slow so skip them num_warmup = 5 pure_inf_time = 0 # benchmark with 2000 video and take the average for i, data in enumerate(data_loader): torch.cuda.synchronize() start_time = time.perf_counter() with torch.no_grad(): model(return_loss=False, **data) torch.cuda.synchronize() elapsed = time.perf_counter() - start_time if i >= num_warmup: pure_inf_time += elapsed if (i + 1) % args.log_interval == 0: fps = (i + 1 - num_warmup) / pure_inf_time print( f'Done video [{i + 1:<3}/ 2000], fps: {fps:.1f} video / s') if (i + 1) == 200: pure_inf_time += elapsed fps = (i + 1 - num_warmup) / pure_inf_time print(f'Overall fps: {fps:.1f} video / s') break
def _non_dist_test(model, dataset, cfg, validate=False): data_loader = build_dataloader(dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, len(cfg.gpus.test), dist=False, shuffle=False) print('dataloader built') model = MMDataParallel(model, device_ids=cfg.gpus.test).cuda() model.eval() evaluator = LandmarkDetectorEvaluator(cfg.img_size, cfg.landmark_num) error_list, det_percent_list = [], [] for batch_idx, testdata in enumerate(data_loader): img = testdata['img'] landmark = testdata['landmark_for_regression'] vis = testdata['vis'] pred_vis, pred_lm = model(img, return_loss=False) det_error, det_lm_percent = evaluator.evaluate_landmark_detection( pred_vis, pred_lm, vis, landmark) if batch_idx % 20 == 0: print('Batch idx {:d}, normalized error = {:.4f}, ' 'det. percent = {:.2f}'.format(batch_idx, det_error, det_lm_percent)) error_list.append(det_error) det_percent_list.append(det_lm_percent) print('Fashion Landmark Detection Normalized Error: {:.4f}, ' 'Detected Percent: {:.2f}'.format( sum(error_list) / len(error_list), sum(det_percent_list) / len(det_percent_list)))
def modelInit(configPath, checkPointPath): # 进行cfg的一些配置 cfg = loadCfg(configPath) # 我选择单卡测试去看结果图... distributed = False # dataset 初始化 dataset = build_dataset(cfg.data.test) # dataloader 初始化 data_loader = build_dataloader(dataset, imgs_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None cfg.data.test.test_mode = True model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) checkpoint = load_checkpoint(model, checkPointPath, map_location='cpu') if 'CLASSES' in checkpoint['meta']: model.CLASSES = checkpoint['meta']['CLASSES'] else: model.CLASSES = dataset.CLASSES model = MMDataParallel(model, device_ids=[0]) model.eval() return model, dataset, data_loader
def main(): args = parse_args() config_file = './cascade_rcnn_r50_rfp_sac_iou_ls_alldata-v3_e15.py' checkpoint_file = 'epoch_15.pth' device = 'cuda:0' cfg = Config.fromfile(config_file) # build model model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) checkpoint = load_checkpoint(model, checkpoint_file, map_location=device) test_json_raw = json.load(open(cfg.data.test.ann_file)) imgid2name = {} for imageinfo in test_json_raw['images']: imgid = imageinfo['id'] imgid2name[imageinfo['file_name']] = imgid # imgid2name[imgid] = imageinfo['file_name'] wrap_fp16_model(model) # 采用fp16加速预测 # model = fuse_conv_bn(model) # 加上后出错 # build the dataloader samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1) # aug_test不支持batch_size>1 dataset = build_dataset(cfg.data.test) data_loader = build_dataloader(dataset, samples_per_gpu=samples_per_gpu, workers_per_gpu=cfg.data.workers_per_gpu, dist=False, shuffle=False) model = MMDataParallel(model, device_ids=[0]) # 为啥加?(不加就错了) model.eval() json_results = [] dataset = data_loader.dataset prog_bar = mmcv.ProgressBar(len(dataset)) for i, data in enumerate(data_loader): with torch.no_grad(): result = model(return_loss=False, rescale=True, **data) batch_size = len(result) result = result[0] # 每次只输入一张 img_metas = data['img_metas'][0].data[0] # print(result) # predict = adaptive_inference_detector(model, image) # basename = img_metas[0]['ori_filename'] # image = cv2.imread(os.path.join(cfg.data.test.img_prefix, basename)) for i, bboxes in enumerate(result): if len(bboxes) > 0: for bbox in bboxes: x1, y1, x2, y2, score = bbox.tolist() if score >= 0.001: data = dict() data['image_id'] = imgid2name[img_metas[0] ['ori_filename']] data['bbox'] = [x1, y1, x2 - x1, y2 - y1] data['score'] = float(score) data['category_id'] = i + 1 json_results.append(data) for _ in range(batch_size): prog_bar.update() mmcv.dump(json_results, args.jsonfile)
def test_cluster_det(model, cfg, logger): if cfg.load_from: load_checkpoint(model, cfg.load_from) for k, v in cfg.model['kwargs'].items(): setattr(cfg.test_data, k, v) dataset = build_dataset(cfg.test_data) processor = build_processor(cfg.stage) losses = [] output_probs = [] if cfg.gpus == 1: data_loader = build_dataloader(dataset, processor, cfg.batch_size_per_gpu, cfg.workers_per_gpu, train=False) model = MMDataParallel(model, device_ids=range(cfg.gpus)) if cfg.cuda: model.cuda() model.eval() for i, data in enumerate(data_loader): with torch.no_grad(): output, loss = model(data, return_loss=True) losses += [loss.item()] if i % cfg.log_config.interval == 0: if dataset.ignore_meta: logger.info('[Test] Iter {}/{}'.format( i, len(data_loader))) else: logger.info('[Test] Iter {}/{}: Loss {:.4f}'.format( i, len(data_loader), loss)) if cfg.save_output: output = output.view(-1) prob = output.data.cpu().numpy() output_probs.append(prob) else: raise NotImplementedError if not dataset.ignore_meta: avg_loss = sum(losses) / len(losses) logger.info('[Test] Overall Loss {:.4f}'.format(avg_loss)) if cfg.save_output: fn = os.path.basename(cfg.load_from) opath = os.path.join(cfg.work_dir, fn[:fn.rfind('.pth')] + '.npz') meta = { 'tot_inst_num': dataset.inst_num, 'proposal_folders': cfg.test_data.proposal_folders, } print('dump output to {}'.format(opath)) output_probs = np.concatenate(output_probs).ravel() np.savez_compressed(opath, data=output_probs, meta=meta)
def test(model, dataset, cfg, logger): if cfg.load_from: print('load from {}'.format(cfg.load_from)) load_checkpoint(model, cfg.load_from, strict=True, logger=logger) losses = [] edges = [] scores = [] if cfg.gpus == 1: data_loader = build_dataloader(dataset, cfg.batch_size_per_gpu, cfg.workers_per_gpu, train=False) model = MMDataParallel(model, device_ids=range(cfg.gpus)) if cfg.cuda: model.cuda() model.eval() for i, (data, cid, node_list) in enumerate(data_loader): with torch.no_grad(): _, _, h1id, gtmat = data pred, loss = model(data, return_loss=True) losses += [loss.item()] pred = F.softmax(pred, dim=1) if i % cfg.log_config.interval == 0: if dataset.ignore_label: logger.info('[Test] Iter {}/{}'.format( i, len(data_loader))) else: acc, p, r = online_evaluate(gtmat, pred) logger.info( '[Test] Iter {}/{}: Loss {:.4f}, ' 'Accuracy {:.4f}, Precision {:.4f}, Recall {:.4f}'. format(i, len(data_loader), loss, acc, p, r)) node_list = node_list.numpy() bs = len(cid) h1id_num = len(h1id[0]) for b in range(bs): cidb = cid[b].int().item() nlst = node_list[b] center_idx = nlst[cidb] for j, n in enumerate(h1id[b]): edges.append([center_idx, nlst[n.item()]]) scores.append(pred[b * h1id_num + j, 1].item()) else: raise NotImplementedError if not dataset.ignore_label: avg_loss = sum(losses) / len(losses) logger.info('[Test] Overall Loss {:.4f}'.format(avg_loss)) return np.array(edges), np.array(scores), len(dataset)
def init_model(): config = './configs/htc/htc_hrnetv2p_w48_20e_kaggle_pku_no_semantic_translation_wudi_car_insurance.py' checkpoint_path = '/data/Kaggle/checkpoints/all_cwxe99_3070100flip05resumme93Dec29-16-28-48/epoch_100.pth' cfg = mmcv.Config.fromfile(config) model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) checkpoint = load_checkpoint(model, checkpoint_path, map_location='cpu') model.CLASSES = checkpoint['meta']['CLASSES'] model = MMDataParallel(model, device_ids=[0]) model.eval() return model, cfg
def _non_dist_test(model, query_set, gallery_set, cfg, validate=False): model = MMDataParallel(model, device_ids=cfg.gpus.test).cuda() model.eval() query_embeds = _process_embeds(query_set, model, cfg) gallery_embeds = _process_embeds(gallery_set, model, cfg) query_embeds_np = np.array(query_embeds) gallery_embeds_np = np.array(gallery_embeds) e = Evaluator(cfg.data.query.id_file, cfg.data.gallery.id_file, extract_feature=cfg.extract_feature) e.evaluate(query_embeds_np, gallery_embeds_np)
def test(model_cfg, dataset_cfg, checkpoint, batch_size=64, gpus=1, workers=2): dataset = call_obj(**dataset_cfg) data_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False, num_workers=workers) # put model on gpus if isinstance(model_cfg, list): model = [call_obj(**c) for c in model_cfg] model = torch.nn.Sequential(*model) else: model = call_obj(**model_cfg) load_checkpoint(model, checkpoint, map_location='cpu') model = MMDataParallel(model, device_ids=range(gpus)).cuda() #model = MMDataParallel(model) model.eval() results = [] labels = [] prog_bar = ProgressBar(len(dataset)) total_time = 0 for data, label in data_loader: with torch.no_grad(): start = time.time() output = model(data).data.cpu().numpy() if torch.cuda.is_available(): torch.cuda.synchronize() t = time.time() - start total_time += t results.append(output) labels.append(label) for i in range(len(data)): prog_bar.update() results = np.concatenate(results) labels = np.concatenate(labels) #macs, params = get_model_complexity_info(model.cuda(), (3, 300, 18, 2), as_strings=True, # print_per_layer_stat=True, verbose=True) #print('{:<30} {:<8}'.format('Computational complexity: ', macs)) #print('{:<30} {:<8}'.format('Number of parameters: ', params)) print("Average infer time: ", total_time / len(data_loader)) print("Total infer time: ", total_time) print('Top 1: {:.2f}%'.format(100 * topk_accuracy(results, labels, 1))) print('Top 5: {:.2f}%'.format(100 * topk_accuracy(results, labels, 5)))
def extract_features(image_set, cfg, save_feature_dir): model = build_retriever(cfg.model) print('model built') model = MMDataParallel(model, device_ids=cfg.gpus.test).cuda() model.eval() embeds = _process_embeds(image_set, model, cfg) if not os.path.exists(save_feature_dir): os.makedirs(save_feature_dir) save_path = os.path.join(save_feature_dir, 'extracted_features.mat') sio.savemat(save_path, {'embeds': embeds}) print('extracted features saved to : %s' % save_path)
def _non_dist_test(model, dataset, cfg, validate=False): model = MMDataParallel(model, device_ids=cfg.gpus.test).cuda() model.eval() embeds = _process_embeds(dataset, model, cfg) metric = model.module.triplet_net.metric_branch # compatibility auc auc = dataset.test_compatibility(embeds, metric) # fill-in-blank accuracy acc = dataset.test_fitb(embeds, metric) print('Compat AUC: {:.2f} FITB: {:.1f}\n'.format(round(auc, 2), round(acc * 100, 1)))
def test(model_cfg, dataset_cfg, checkpoint, batch_size=64, gpus=1, workers=4): #cnt = 0 #confusion conf_matrix = torch.zeros(model_cfg.num_class, model_cfg.num_class) #confusion set_determined_seed(seed) torch.multiprocessing.set_sharing_strategy('file_system') dataset = call_obj(**dataset_cfg) data_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False, num_workers=workers) # put model on gpus if isinstance(model_cfg, list): model = [call_obj(**c) for c in model_cfg] model = torch.nn.Sequential(*model) else: model = call_obj(**model_cfg) load_checkpoint(model, checkpoint, map_location='cpu') model = MMDataParallel(model, device_ids=get_gpus(gpus)).cuda() model.eval() results = [] labels = [] prog_bar = ProgressBar(len(dataset)) for data, label in data_loader: with torch.no_grad(): # cnt += 1 # print("\n"+str(cnt)) # torch.cuda.empty_cache() output = model(data).data.cpu().numpy() results.append(output) labels.append(label) for i in range(len(data)): prog_bar.update() results = np.concatenate(results) labels = np.concatenate(labels) #confusion conf_matrix = confusion_matrix( torch.max(torch.from_numpy(results), 1)[1], labels, conf_matrix) np.save('/home/computer/WBH/GCN/INTERGCN/conf.npy', conf_matrix) #confusion print('Top 1: {:.2f}%'.format(100 * topk_accuracy(results, labels, 1))) print('Top 5: {:.2f}%'.format(100 * topk_accuracy(results, labels, 5)))
def detect(inputs, results, model_cfg, dataset_cfg, checkpoint, video_dir, batch_size=64, gpus=1, workers=4): print('detect start') # put model on gpus if isinstance(model_cfg, list): model = [call_obj(**c) for c in model_cfg] model = torch.nn.Sequential(*model) else: model = call_obj(**model_cfg) load_checkpoint(model, checkpoint, map_location='cpu') model = MMDataParallel(model, device_ids=range(gpus)).cuda() model.eval() results = [] labels = [] video_file_list = os.listdir(video_dir) prog_bar = ProgressBar(len(video_file_list)) for video_file in video_file_list: data = inputs.get() data_loader = data_parse(data, dataset_cfg.pipeline, dataset_cfg.data_source.num_track) data, label = data_loader with torch.no_grad(): data = torch.from_numpy(data) # 增加一维,表示batch_size data = data.unsqueeze(0) data = data.float().to("cuda:0").detach() output = model(data).data.cpu().numpy() results.append(output) labels.append(torch.tensor([label])) for i in range(len(data)): prog_bar.update() print('--------', results, labels, '--------------') results = np.concatenate(results) labels = np.concatenate(labels) print('Top 1: {:.2f}%'.format(100 * topk_accuracy(results, labels, 1))) print('Top 5: {:.2f}%'.format(100 * topk_accuracy(results, labels, 5)))
def main(): args = parse_args() model = init_detector(args.config, args.checkpoint) cfg = model.cfg assert getattr(detectors, cfg.model['type']) is \ detectors.SingleStageDetector model = MMDataParallel(model, device_ids=[0]) batch = torch.FloatTensor(1, 3, cfg.input_size, cfg.input_size).cuda() input_shape = (cfg.input_size, cfg.input_size, 3) scale = np.array([1, 1, 1, 1], dtype=np.float32) data = dict(img=batch, img_meta=[{ 'img_shape': input_shape, 'scale_factor': scale }]) model.eval() model.module.onnx_export(export_name=args.output, **data) print("export end")
def train_flownet(model, dataset, cfg, distributed=False, validate=False, logger=None): if logger is None: logger = get_root_logger(cfg.log_level) # start training # prepare data loaders data_loaders = [ build_dataloader(dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, cfg.gpus, dist=False) ] # put model on gpus model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda() # build runner runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir, cfg.log_level) runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config, cfg.checkpoint_config, cfg.log_config) # if cfg.resume_from: # runner.resume(cfg.resume_from) # elif cfg.load_from: # runner.load_checkpoint(cfg.load_from) model.eval() for param in model.parameters(): param.requires_grad = False # model.load_flow() model.module.flow_head.train() for param in model.module.flow_head.parameters(): param.requires_grad = True # training runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
def _non_dist_test_cate_attr(model, dataset, cfg, validate=False): data_loader = build_dataloader(dataset, cfg.data.imgs_per_gpu, cfg.data.workers_per_gpu, len(cfg.gpus.test), dist=False, shuffle=False) print('dataloader built') model = MMDataParallel(model, device_ids=cfg.gpus.test).cuda() model.eval() attr_calculator = AttrCalculator( cfg, topns=[3, 5], show_attr_name=True, attr_name_file=cfg.data.test['attr_cloth_file']) cate_calculator = CateCalculator(cfg, topns=[1, 3, 5]) for batch_idx, testdata in enumerate(data_loader): imgs = testdata['img'] landmark = testdata['landmark'] attr = testdata['attr'] cate = testdata['cate'] attr_pred, cate_pred = model(imgs, attr, landmark=landmark, return_loss=False) attr_calculator.collect_result(attr_pred, attr) cate_calculator.collect_result(cate_pred, cate) if batch_idx % cfg.print_interval == 0: attr_calculator.show_result(batch_idx) cate_calculator.show_result(batch_idx) attr_calculator.show_result() attr_calculator.show_per_attr_result() cate_calculator.show_result()
def main(): args = parse_args() model = init_detector(args.config, args.checkpoint) cfg = model.cfg assert getattr(detectors, cfg.model['type']) is detectors.SingleStageDetector model = MMDataParallel(model, device_ids=[0]) batch = torch.FloatTensor(1, 3, cfg.input_size, cfg.input_size).cuda() input_shape = (cfg.input_size, cfg.input_size, 3) scale = np.array([1, 1, 1, 1], dtype=np.float32) data = dict(img=batch, img_meta=[{'img_shape': input_shape, 'scale_factor': scale}]) model.eval() model.module.onnx_export = onnx_export.__get__(model.module) model.module.forward = forward.__get__(model.module) model.module.forward_export = forward_export_detector.__get__(model.module) model.module.bbox_head.export_forward = export_forward_ssd_head.__get__(model.module.bbox_head) model.module.bbox_head._prepare_cls_scores_bbox_preds = prepare_cls_scores_bbox_preds_ssd_head.__get__(model.module.bbox_head) model.module.bbox_head.get_bboxes = get_bboxes_ssd_head.__get__(model.module.bbox_head) model.module.onnx_export(export_name=args.output, **data)
def main(): args = parse_args() # === config === cfg = Config.fromfile(args.config) if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None cfg.data.test.test_mode = True dataset = obj_from_dict(cfg.data.test, datasets, dict(test_mode=True)) loader = build_dataloader( dataset, cfg.data.tasks_per_gpu // cfg.data.tasks_per_gpu, max(1, cfg.data.workers_per_gpu // cfg.data.tasks_per_gpu), args.gpus, dist=False, customized_sampler=False, shuffle=False) model = build_model(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) model = MMDataParallel(model, device_ids=range(args.gpus)).cuda() load_checkpoint(model, args.checkpoint) model.eval() results = [] prog_bar = mmcv.ProgressBar(len(loader)) for data in loader: with torch.no_grad(): result = model(return_loss=False, **data) results.append(result) prog_bar.update() topk = (1, 3, 5, 10) if not args.topk else args.topk evaluate(results, eval=args.eval, topk=topk)
def setup(self, config_file, checkpoint_file, fuse_conv): cfg = Config.fromfile(config_file) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None cfg.data.test.test_mode = True # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader( dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=False, shuffle=False) # build the model and load checkpoint model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) load_checkpoint(model, checkpoint_file, map_location='cpu') if fuse_conv: model = fuse_module(model) self._fuse_conv = fuse_conv model = MMDataParallel(model, device_ids=[0]) model.eval() return model, data_loader, dataset
def main(): args = parse_args() cfg = Config.fromfile(args.config) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) # import modules from plguin/xx, registry will be updated if hasattr(cfg, 'plugin') & cfg.plugin: import importlib if hasattr(cfg, 'plugin_dir'): plugin_dir = cfg.plugin_dir _module_dir = os.path.dirname(plugin_dir) _module_dir = _module_dir.split('/') _module_path = _module_dir[0] for m in _module_dir[1:]: _module_path = _module_path + '.' + m print(_module_path) plg_lib = importlib.import_module(_module_path) else: # import dir is the dirpath for the config file _module_dir = os.path.dirname(args.config) _module_dir = _module_dir.split('/') _module_path = _module_dir[0] for m in _module_dir[1:]: _module_path = _module_path + '.' + m print(_module_path) plg_lib = importlib.import_module(_module_path) # import modules from string list. if cfg.get('custom_imports', None): from mmcv.utils import import_modules_from_strings import_modules_from_strings(**cfg['custom_imports']) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None # in case the test dataset is concatenated samples_per_gpu = 1 if isinstance(cfg.data.test, dict): cfg.data.test.test_mode = True samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1) if samples_per_gpu > 1: # Replace 'ImageToTensor' to 'DefaultFormatBundle' cfg.data.test.pipeline = replace_ImageToTensor( cfg.data.test.pipeline) elif isinstance(cfg.data.test, list): for ds_cfg in cfg.data.test: ds_cfg.test_mode = True samples_per_gpu = max( [ds_cfg.pop('samples_per_gpu', 1) for ds_cfg in cfg.data.test]) if samples_per_gpu > 1: for ds_cfg in cfg.data.test: ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline) distributed = False # set random seeds if args.seed is not None: set_random_seed(args.seed, deterministic=args.deterministic) # build the dataloader dataset = build_dataset(cfg.data.test) data_loader = build_dataloader( dataset, samples_per_gpu=samples_per_gpu, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) if not os.path.exists(args.out_dir): os.mkdir(args.out_dir) # build the model and load checkpoint cfg.model.train_cfg = None model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) #from IPython import embed #embed() fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') if args.fuse_conv_bn: model = fuse_conv_bn(model) model = MMDataParallel(model, device_ids=[0]) model.eval() output_list = [] for i, data in enumerate(data_loader): with torch.no_grad(): data = scatter(data, [-1])[0] for k, v in data.items(): if isinstance(v, torch.Tensor): data[k] = v.cuda() outputs = model.module.eval_forward(data) output_list.append(outputs) if i >= 100: break merged_output_list = [] for i, output in enumerate(output_list): save_dir = os.path.join(args.out_dir, 'sample-{}'.format(i)) if not os.path.isdir(save_dir): os.mkdir(save_dir) outputs = parse_output(output, save_dir) merged_output_list.append(outputs) save_dir = os.path.join(args.out_dir, 'gifs') if not os.path.isdir(save_dir): os.mkdir(save_dir) merge_output(merged_output_list, save_dir)
def test_cluster_mall(model1, cfg, logger): model = torch.load(cfg.load_from1) for k, v in cfg.model1['kwargs'].items(): setattr(cfg.test_data, k, v) for k, v in cfg.model2['kwargs'].items(): setattr(cfg.test_data, k, v) setattr(cfg.test_data, 'phase', 'test') dataset = build_dataset_mall(cfg.test_data) processor = build_processor(cfg.stage) losses = [] output_probs = [] IoP_GT = [] IoP_binary_GT = [] num_impure_pro = 0 if cfg.gpus == 1: data_loader = build_dataloader(dataset, processor, cfg.batch_size_per_gpu, cfg.workers_per_gpu, train=False) model = MMDataParallel(model, device_ids=range(cfg.gpus)) if cfg.cuda: model.cuda() output_IoP_loss = [] model.eval() for i, data in enumerate(data_loader): with torch.no_grad(): output, loss = model(data, return_loss=True) losses += [loss.item()] num_impure_pro += (data[-1] == 0).nonzero().shape[0] if i % cfg.log_config.interval == 0: logger.info('[Test] Iter {}/{}: Loss {:.4f}'.format( i, len(data_loader), loss)) if cfg.save_output: output = output[:, 1] output = output.view(-1) output_probs.append(output.tolist()) IoP_GT.append(data[-1].tolist()) else: raise NotImplementedError output_probs1 = [iop for item in output_probs for iop in item] output_probs = np.array([iop for item in output_probs for iop in item]) IoP_GT0 = [iop for item in IoP_GT for iop in item] IoP_GT = np.array([iop for item in IoP_GT for iop in item]) output_probs = torch.from_numpy(output_probs) IoP_GT1 = torch.from_numpy(IoP_GT) #HistgramStd.eval_batch_new(output_probs, IoP_GT1, 'BCE') output_probs2 = np.array(output_probs1) # plot roc curve false_positive_rate, true_positive_rate, thresholds = roc_curve( IoP_GT, output_probs2) roc_auc = auc(false_positive_rate, true_positive_rate) plt.title('ROC') plt.plot(false_positive_rate, true_positive_rate, 'b', label='AUC = %0.4f' % roc_auc) plt.legend(loc='lower right') plt.plot([0, 1], [0, 1], 'r--') plt.ylabel('TPR') plt.xlabel('FPR') plt.draw() plt.savefig(cfg.work_dir + '/ROC.jpg') plt.close() # plot IoP distribution curve pos01 = np.where((IoP_GT1 == 0)) iop_01 = output_probs2[pos01] pos02 = np.where((IoP_GT1 == 1)) iop_02 = output_probs2[pos02] if cfg.save_output: plt.figure(1) plt.subplot(1, 1, 1) plt.boxplot([iop_01.tolist(), iop_02.tolist()], notch=True) x_tricks = np.array([1, 2]) plt.xticks(x_tricks) plt.grid(axis='y') plt.draw() plt.savefig(cfg.work_dir + '/Estimated_IoP.jpg') plt.close() estimated_iop_dict = {} for i, node in enumerate(dataset.lst): node_name = node.split('/')[-1] estimated_iop = output_probs1[i] estimated_iop_dict[node_name] = estimated_iop with open(cfg.work_dir + '/Estimated_IoP_eval_dict.json', 'w') as f: json.dump(estimated_iop_dict, f) with open(cfg.work_dir + '/Estimated_IoP_eval.json', 'w') as f: json.dump(output_probs1, f) with open(cfg.work_dir + '/GT_IoP_eval.json', 'w') as f: json.dump(IoP_GT0, f)
def main(): args = parse_args() cfg = Config.fromfile(args.config) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) # import modules from string list. if cfg.get('custom_imports', None): from mmcv.utils import import_modules_from_strings import_modules_from_strings(**cfg['custom_imports']) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None cfg.data.test.test_mode = True # build the dataloader samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1) if samples_per_gpu > 1: # Replace 'ImageToTensor' to 'DefaultFormatBundle' cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader(dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=False, shuffle=False) # build the model and load checkpoint cfg.model.train_cfg = None model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) load_checkpoint(model, args.checkpoint, map_location='cpu') if args.fuse_conv_bn: model = fuse_conv_bn(model) model = MMDataParallel(model, device_ids=[0]) model.eval() # the first several iterations may be very slow so skip them num_warmup = 5 pure_inf_time = 0 # benchmark with 2000 image and take the average for i, data in enumerate(data_loader): torch.cuda.synchronize() start_time = time.perf_counter() with torch.no_grad(): model(return_loss=False, rescale=True, **data) torch.cuda.synchronize() elapsed = time.perf_counter() - start_time if i >= num_warmup: pure_inf_time += elapsed if (i + 1) % args.log_interval == 0: fps = (i + 1 - num_warmup) / pure_inf_time print(f'Done image [{i + 1:<3}/ 2000], fps: {fps:.1f} img / s') if (i + 1) == 2000: pure_inf_time += elapsed fps = (i + 1 - num_warmup) / pure_inf_time print(f'Overall fps: {fps:.1f} img / s') break
def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True if args.ckpt: cfg.resume_from = args.ckpt cfg.test_cfg.rcnn.score_thr = 0.5 FOCAL_LENGTH = cfg.get('FOCAL_LENGTH', 1000) model = build_detector(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) if cfg.checkpoint_config is not None: # save mmdet version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmdet_version=__version__, config=cfg.text, CLASSES=('Human', )) # add an attribute for visualization convenience model.CLASSES = ('Human', ) model = MMDataParallel(model, device_ids=[0]).cuda() # build runner optimizer = build_optimizer(model, cfg.optimizer) runner = Runner(model, lambda x: x, optimizer, cfg.work_dir, cfg.log_level) runner.resume(cfg.resume_from) model = runner.model model.eval() # necessary for headless rendering os.environ['PYOPENGL_PLATFORM'] = 'egl' render = Renderer(focal_length=FOCAL_LENGTH) img_transform = ImageTransform(size_divisor=32, **img_norm_cfg) img_scale = cfg.common_val_cfg.img_scale with torch.no_grad(): folder_name = args.image_folder output_folder = args.output_folder os.makedirs(output_folder, exist_ok=True) images = os.listdir(folder_name) for image in images: file_name = osp.join(folder_name, image) img = cv2.imread(file_name) ori_shape = img.shape img, img_shape, pad_shape, scale_factor = img_transform( img, img_scale) # Force padding for the issue of multi-GPU training padded_img = np.zeros((img.shape[0], img_scale[1], img_scale[0]), dtype=img.dtype) padded_img[:, :img.shape[-2], :img.shape[-1]] = img img = padded_img assert img.shape[1] == 512 and img.shape[ 2] == 832, "Image shape incorrect" data_batch = dict( img=DC([to_tensor(img[None, ...])], stack=True), img_meta=DC([{ 'img_shape': img_shape, 'scale_factor': scale_factor, 'flip': False, 'ori_shape': ori_shape }], cpu_only=True), ) bbox_results, pred_results = model(**data_batch, return_loss=False) if pred_results is not None: pred_results['bboxes'] = bbox_results[0] img = denormalize(img) img_viz = prepare_dump(pred_results, img, render, bbox_results, FOCAL_LENGTH) cv2.imwrite( f'{file_name.replace(folder_name, output_folder)}.output.jpg', img_viz[:, :, ::-1])
def main(): args = parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpuid img_dir = args.img_dir out_dir = args.out_dir batch_size = args.batch_size cfg = mmcv.Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None cfg.data.test.test_mode = True # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # build the dataloader if args.img_dir != '': file_list = common.load_filepaths(args.img_dir, suffix=('.jpg', '.png', '.jpeg'), recursive=True) elif args.img_list != '': file_list = parse_testfile(args.img_list) else: raise "Both img_dir and img_list is empty." dataset = FilesDataset(file_list, cfg.test_pipeline) data_loader = build_dataloader(dataset, imgs_per_gpu=batch_size, workers_per_gpu=batch_size, dist=distributed, shuffle=False) # build the model and load checkpoint model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') model = reweight_cls(model, args.tau).cuda() model = MMDataParallel(model, device_ids=[0]) model.eval() count = 0 for i, data in enumerate(data_loader): with torch.no_grad(): # bbox_results, segm_results results = model(return_loss=False, rescale=True, **data) # batch #for result in results: # file_path = file_list[count] # save_name = file_path.replace('/home/songbai.xb/workspace/projects/TAO/data/TAO/frames/val/', '') # save_path = os.path.join(out_dir, save_name) # common.makedirs(os.path.dirname(save_path)) # save_in_tao_format(result, save_path) # count += 1 file_path = file_list[i] save_name = file_path.replace( '/home/songbai.xb/workspace/projects/TAO/data/TAO/frames/val/', '') save_name = save_name.replace('.jpg', '.pkl') save_path = os.path.join(out_dir, save_name) common.makedirs(os.path.dirname(save_path)) save_in_tao_format(results[0], save_path)
def test(model_cfg, dataset_cfg, checkpoint, batch_size=64, gpus=1, workers=4): model = call_obj(**model_cfg) edge = model.graph.edge load_checkpoint(model, checkpoint, map_location='cpu') model = MMDataParallel(model, device_ids=range(gpus)).cuda() model.eval() sys.path.append('{}/{}/build/python'.format(os.getcwd(), "openpose")) try: from openpose import pyopenpose as op except: print('Can not find Openpose Python API.') return opWrapper = op.WrapperPython() params = dict(model_folder='openpose/models', model_pose='COCO') params["hand"] = True opWrapper = op.WrapperPython() opWrapper.configure(params) opWrapper.start() # video_capture = cv2.VideoCapture("mmskeleton/deprecated/st_gcn/resource/media/clean_and_jerk.mp4") video_capture = cv2.VideoCapture("fall01.mp4") pose_tracker = naive_pose_tracker() # start recognition start_time = time.time() frame_index = 0 gt_labels = [] with open( 'mmskeleton/deprecated/st_gcn/resource/kinetics_skeleton/label_name.txt', 'r') as f: for line in f: gt_labels.append(line.strip('\n')) while (True): tic = time.time() # get image ret, orig_image = video_capture.read() # orig_image = cv2.imread("3.jpg") if orig_image is None: break source_H, source_W, _ = orig_image.shape # orig_image = cv2.resize( # orig_image, (256 * source_W // source_H, 256)) H, W, _ = orig_image.shape # pose estimation datum = op.Datum() datum.cvInputData = orig_image opWrapper.emplaceAndPop([datum]) multi_pose = datum.poseKeypoints # (num_person, num_joint, 3) # orig_image = cv2.resize(orig_image, (768, 1024)) # cv2.imshow("orig_image-GCN", orig_image) # cv2.waitKey(0) if len(multi_pose.shape) != 3: continue # normalization multi_pose[:, :, 0] = multi_pose[:, :, 0] / W multi_pose[:, :, 1] = multi_pose[:, :, 1] / H multi_pose[:, :, 0:2] = multi_pose[:, :, 0:2] - 0.5 multi_pose[:, :, 0][multi_pose[:, :, 2] == 0] = 0 multi_pose[:, :, 1][multi_pose[:, :, 2] == 0] = 0 # pose tracking # if self.arg.video == 'camera_source': # frame_index = int((time.time() - start_time) * self.arg.fps) # else: # frame_index += 1 frame_index += 1 pose_tracker.update(multi_pose, frame_index) data_numpy = pose_tracker.get_skeleton_sequence() data = torch.from_numpy(data_numpy) data = data.unsqueeze(0) data = data.float().to("cuda:0").detach() with open("de.txt", 'w+') as f: for i in data[0][0]: f.write(str(i) + '\n\n') # break with torch.no_grad(): output = model(data).data.cpu().numpy() voting_label = int(output.argmax(axis=1)) print('voting_label_index:{}'.format(voting_label)) print(len(gt_labels)) print(gt_labels[voting_label]) print(output[0][voting_label]) app_fps = 1 / (time.time() - tic) image = render(edge, data_numpy, gt_labels[voting_label], [[gt_labels[voting_label]]], None, orig_image, app_fps) cv2.imshow("ST-GCN", image) if cv2.waitKey(1) & 0xFF == ord('q'): break