def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.backbone.pretrained = None cfg.data.test.test_mode = True # build the dataloader dataset = build_dataset(cfg.data.test, dict(test_mode=True)) data_loader = build_dataloader( dataset, videos_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=False, shuffle=False) # build the model and load checkpoint model = build_model( cfg.model, train_cfg=None, test_cfg=cfg.get('test_cfg')) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) if args.fuse_conv_bn: model = fuse_conv_bn(model) model = MMDataParallel(model, device_ids=[0]) model.eval() # the first several iterations may be very slow so skip them num_warmup = 5 pure_inf_time = 0 # benchmark with 2000 video and take the average for i, data in enumerate(data_loader): torch.cuda.synchronize() start_time = time.perf_counter() with torch.no_grad(): model(return_loss=False, **data) torch.cuda.synchronize() elapsed = time.perf_counter() - start_time if i >= num_warmup: pure_inf_time += elapsed if (i + 1) % args.log_interval == 0: fps = (i + 1 - num_warmup) / pure_inf_time print( f'Done video [{i + 1:<3}/ 2000], fps: {fps:.1f} video / s') if (i + 1) == 200: pure_inf_time += elapsed fps = (i + 1 - num_warmup) / pure_inf_time print(f'Overall fps: {fps:.1f} video / s') break
def inference_pytorch(args, cfg, distributed, data_loader): """Get predictions by pytorch models.""" # remove redundant pretrain steps for testing turn_off_pretrained(cfg.model) # build the model and load checkpoint model = build_model(cfg.model, train_cfg=None, test_cfg=cfg.get('test_cfg')) if len(cfg.module_hooks) > 0: register_module_hooks(model, cfg.module_hooks) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) load_checkpoint(model, args.checkpoint, map_location='cpu') if args.fuse_conv_bn: model = fuse_conv_bn(model) if not distributed: model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test(model, data_loader) else: model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) outputs = multi_gpu_test(model, data_loader, args.tmpdir, args.gpu_collect) return outputs
def create_model(config): if isinstance(config, str): config = Config.fromfile(config) config = config.model elif isinstance(config, dict): pass model = build_model(config) return model
def __init__(self, net_configs, is_head_included=True): super(BaseClassificationNet, self).__init__() self.net_configs = net_configs # 1 build the model based on the configurations self._net = build_model(net_configs) self.is_head_included = is_head_included # the features must be forwarded the avg pool self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
def build_model_by_config(config, ckpt_path=None): if isinstance(config, str): cfg = Config.fromfile(config) cfg.model.backbone.pretrained = None elif isinstance(config, dict): cfg = config model = build_model(cfg.model, train_cfg=None, test_cfg=cfg.get('test_cfg')) if ckpt_path is not None: load_checkpoint(model, ckpt_path, map_location='cpu') model.eval() return model
def setup(self): print("loading dataset ..."); self.system_dict["local"]["datasets"] = [build_dataset(self.system_dict["local"]["cfg"].data.train)] print("loading_model ..."); self.system_dict["local"]["model"] = build_model(self.system_dict["local"]["cfg"].model, train_cfg=self.system_dict["local"]["cfg"].train_cfg, test_cfg=self.system_dict["local"]["cfg"].test_cfg) print("creating workspace directory ..."); mmcv.mkdir_or_exist(osp.abspath(self.system_dict["local"]["cfg"].work_dir)) print("Done");
def skeleton_based_action_recognition(args, pose_results, num_frame, h, w): fake_anno = dict(frame_dict='', label=-1, img_shape=(h, w), origin_shape=(h, w), start_index=0, modality='Pose', total_frames=num_frame) num_person = max([len(x) for x in pose_results]) num_keypoint = 17 keypoint = np.zeros((num_person, num_frame, num_keypoint, 2), dtype=np.float16) keypoint_score = np.zeros((num_person, num_frame, num_keypoint), dtype=np.float16) for i, poses in enumerate(pose_results): for j, pose in enumerate(poses): pose = pose['keypoints'] keypoint[j, i] = pose[:, :2] keypoint_score[j, i] = pose[:, 2] fake_anno['keypoint'] = keypoint fake_anno['keypoint_score'] = keypoint_score label_map = [x.strip() for x in open(args.label_map).readlines()] num_class = len(label_map) skeleton_config = mmcv.Config.fromfile(args.skeleton_config) skeleton_config.model.cls_head.num_classes = num_class # for K400 dataset skeleton_pipeline = Compose(skeleton_config.test_pipeline) skeleton_imgs = skeleton_pipeline(fake_anno)['imgs'][None] skeleton_imgs = skeleton_imgs.to(args.device) # Build skeleton-based recognition model skeleton_model = build_model(skeleton_config.model) load_checkpoint(skeleton_model, args.skeleton_checkpoint, map_location='cpu') skeleton_model.to(args.device) skeleton_model.eval() with torch.no_grad(): output = skeleton_model(return_loss=False, imgs=skeleton_imgs) action_idx = np.argmax(output) skeleton_action_result = label_map[ action_idx] # skeleton-based action result for the whole video return skeleton_action_result
def inference_mmaction2(inputs, config, checkpoint): import torch from mmaction.models import build_model from mmcv import Config from mmcv.runner import load_checkpoint cfg = Config.fromfile(config) cfg.model.backbone.pretrained = None model = build_model(cfg.model, train_cfg=None, test_cfg=cfg.get('test_cfg')) load_checkpoint(model, checkpoint, map_location='cpu') model.eval() inputs = torch.tensor(inputs) with torch.no_grad(): return model(return_loss=False, imgs=inputs)
def test_rgb_build(): """ Test the code for building the rgb model""" # define the test data rgb_input_shape = (1, 3, 3, 8, 32, 32) rgb_demo_inputs = test_models.base.generate_recognizer_demo_inputs( rgb_input_shape, model_type='3D') rbg_model = build_model(Config.multimodal_nets_configs["rgb_model"]) rgb_imgs = rgb_demo_inputs['imgs'] # flow_imgs = flow_demo_inputs['imgs'] # audio_feas = flow_demo_inputs['imgs'] gt_labels = rgb_demo_inputs['gt_labels'] print("rgb_imgs: ", rgb_imgs.shape) # print("flow_imgs: ", flow_imgs.shape) # print("audio_feas: ", audio_feas.shape) print("gt_labels: ", gt_labels.shape) losses = rbg_model(rgb_imgs, gt_labels) print("the rgb losses: ", losses)
def main(): args = parse_args() frame_paths, original_frames = frame_extraction(args.video, args.short_side) num_frame = len(frame_paths) h, w, _ = original_frames[0].shape # Get clip_len, frame_interval and calculate center index of each clip config = mmcv.Config.fromfile(args.config) config.merge_from_dict(args.cfg_options) test_pipeline = Compose(config.data.test.pipeline) # Load label_map label_map = [x.strip() for x in open(args.label_map).readlines()] # Get Human detection results det_results = detection_inference(args, frame_paths) torch.cuda.empty_cache() pose_results = pose_inference(args, frame_paths, det_results) torch.cuda.empty_cache() fake_anno = dict(frame_dir='', label=-1, img_shape=(h, w), original_shape=(h, w), start_index=0, modality='Pose', total_frames=num_frame) num_person = max([len(x) for x in pose_results]) # Current PoseC3D models are trained on COCO-keypoints (17 keypoints) num_keypoint = 17 keypoint = np.zeros((num_person, num_frame, num_keypoint, 2), dtype=np.float16) keypoint_score = np.zeros((num_person, num_frame, num_keypoint), dtype=np.float16) for i, poses in enumerate(pose_results): for j, pose in enumerate(poses): pose = pose['keypoints'] keypoint[j, i] = pose[:, :2] keypoint_score[j, i] = pose[:, 2] fake_anno['keypoint'] = keypoint fake_anno['keypoint_score'] = keypoint_score imgs = test_pipeline(fake_anno)['imgs'][None] imgs = imgs.to(args.device) model = build_model(config.model) load_checkpoint(model, args.checkpoint, map_location=args.device) model.to(args.device) model.eval() with torch.no_grad(): output = model(return_loss=False, imgs=imgs) action_idx = np.argmax(output) action_label = label_map[action_idx] pose_model = init_pose_model(args.pose_config, args.pose_checkpoint, args.device) vis_frames = [ vis_pose_result(pose_model, frame_paths[i], pose_results[i]) for i in range(num_frame) ] for frame in vis_frames: cv2.putText(frame, action_label, (10, 30), FONTFACE, FONTSCALE, FONTCOLOR, THICKNESS, LINETYPE) cv2.imwrite('frame.jpg', vis_frames[0]) vid = mpy.ImageSequenceClip([x[:, :, ::-1] for x in vis_frames], fps=24) vid.write_videofile(args.out_filename, remove_temp=True) tmp_frame_dir = osp.dirname(frame_paths[0]) shutil.rmtree(tmp_frame_dir)
return args if __name__ == '__main__': args = parse_args() assert args.opset_version == 11, 'MMAction2 only supports opset 11 now' cfg = mmcv.Config.fromfile(args.config) # import modules from string list. if not args.is_localizer: cfg.model.backbone.pretrained = None # build the model model = build_model( cfg.model, train_cfg=None, test_cfg=cfg.get('test_cfg')) model = _convert_batchnorm(model) # onnx.export does not support kwargs if hasattr(model, 'forward_dummy'): model.forward = model.forward_dummy elif hasattr(model, '_forward') and args.is_localizer: model.forward = model._forward else: raise NotImplementedError( 'Please implement the forward method for exporting.') checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') # conver model to onnx file pytorch2onnx(
nargs='+', default=None, help='Input dimension, mandatory for localizers') args = parser.parse_args() args.input_size = tuple(args.input_size) if args.input_size else None return args if __name__ == '__main__': args = parse_args() config_path = args.config checkpoint_path = args.checkpoint model_cfg, test_pipeline, test_cfg = _get_cfg(config_path) model = build_model(model_cfg, train_cfg=None, test_cfg=test_cfg).cuda() if not args.is_localizer: try: dummy_input = torch.randn(args.input_size).cuda() except TypeError: for trans in test_pipeline: if trans['type'] == 'SampleFrames': t = trans['clip_len'] n = trans['num_clips'] elif trans['type'] == 'Resize': if isinstance(trans['scale'], int): s = trans['scale'] elif isinstance(trans['scale'], tuple): s = max(trans['scale']) # #crop x (#batch * #clip) x #channel x clip_len x height x width dummy_input = torch.randn(1, 1 * n, 3, t, s, s).cuda()
def get_model_cls_3d(name, num_classes=1, num_classes_aux=0, pretrained=True): """ Loads a pretrained model. Supports resnet18, resnet34, resnet50, i3d, slowfast & slowonly. Strides are modified to be adapted to the small input size. Args: name (str): Model name num_classes (int, optional): Number of classes. Defaults to 1. num_classes_aux (int, optional): Number of aux classes. Defaults to 0. pretrained (bool, optional): Whether to load pretrained weights. Defaults to True. Raises: NotImplementedError: Specified model name is not supported. Returns: torch model -- Pretrained model. """ # Load pretrained model if "resnet" in name: n_classes = 1139 if "KMS" in CP_PATHS[name] else 1039 depth = int(name[-2:]) model = generate_model(depth, n_classes=n_classes) if pretrained: load_model_weights_3d(model, CP_PATHS[name]) elif name == "i3d": # i3d model = InceptionI3d(num_classes=400, in_channels=3) if pretrained: load_model_weights(model, CP_PATHS[name]) elif name in ["slowfast", "slowonly"]: model = build_model(CONFIGS[name]) if pretrained: print(f'\n -> Loading weighs from "{CP_PATHS[name]}"\n') load_checkpoint(model, CP_PATHS[name]) else: raise NotImplementedError model.name = name model.num_classes = num_classes model.num_classes_aux = num_classes_aux if "resnet" in name: # Strides model.conv1.stride = (1, 1, 1) model.layer2[0].conv1.stride = (1, 2, 2) model.layer2[0].downsample[0].stride = (1, 2, 2) model.layer3[0].conv1.stride = (1, 2, 2) model.layer3[0].downsample[0].stride = (1, 2, 2) model.layer4[0].conv1.stride = (1, 2, 2) model.layer4[0].downsample[0].stride = (1, 2, 2) model.maxpool.stride = (1, 2, 2) model.nb_ft = model.fc.in_features model.fc = nn.Linear(model.nb_ft, num_classes) model.forward = lambda x: forward_with_aux_resnet_3d(model, x) if num_classes_aux: model.fc_aux = nn.Linear(model.nb_ft, num_classes_aux) elif name == "i3d": model.Conv3d_1a_7x7.conv3d.stride = (1, 1, 1) # model.MaxPool3d_2a_3x3.stride = (1, 1, 1) model.MaxPool3d_4a_3x3.stride = (1, 2, 2) model.MaxPool3d_5a_2x2.stride = (1, 2, 2) model.nb_ft = model.logits.conv3d.in_channels model.logits = nn.Linear(model.nb_ft, num_classes) if num_classes_aux: model.fc_aux = nn.Linear(model.nb_ft, num_classes_aux) elif name == "slowfast": model.backbone.slow_path.conv1.stride = (1, 1, 1) model.backbone.fast_path.conv1.stride = (1, 1, 1) model.backbone.slow_path.maxpool.stride = (1, 1, 1) model.backbone.fast_path.maxpool.stride = (1, 1, 1) model.backbone.slow_path.pool2.stride = (1, 1, 1) model.backbone.fast_path.pool2.stride = (1, 1, 1) model.dropout = nn.Dropout(0.5) model.avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1)) model.nb_ft = model.cls_head.fc_cls.in_features model.fc = nn.Linear(model.nb_ft, num_classes) model.forward = lambda x: forward_slowfast(model, x) if num_classes_aux: model.fc_aux = nn.Linear(model.nb_ft, num_classes_aux) else: model.backbone.conv1.stride = (1, 1, 1) model.backbone.pool2.stride = (1, 1, 1) model.backbone.maxpool.stride = (1, 1, 1) model.dropout = nn.Dropout(0.5) model.avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1)) model.nb_ft = model.cls_head.fc_cls.in_features model.fc = nn.Linear(model.nb_ft, num_classes) model.forward = lambda x: forward_slowonly(model, x) if num_classes_aux: model.fc_aux = nn.Linear(model.nb_ft, num_classes_aux) return model
def main(): # parse arguments args = parse_args() # load config cfg = Config.fromfile(args.config) if args.update_config is not None: cfg.merge_from_dict(args.update_config) cfg = update_config(cfg, args) cfg = propagate_root_dir(cfg, args.data_dir) # init distributed env first, since logger depends on the dist info. distributed = args.launcher != 'none' if distributed: init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # init logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config: {cfg.text}') if cfg.get('nncf_config'): check_nncf_is_enabled() logger.info('NNCF config: {}'.format(cfg.nncf_config)) meta.update(get_nncf_metadata()) # set random seeds cfg.seed = args.seed meta['seed'] = args.seed if cfg.get('seed'): logger.info(f'Set random seed to {cfg.seed}, deterministic: {args.deterministic}') set_random_seed(cfg.seed, deterministic=args.deterministic) # build datasets datasets = [build_dataset(cfg.data, 'train', dict(logger=logger))] logger.info(f'Train datasets:\n{str(datasets[0])}') if len(cfg.workflow) == 2: if not args.no_validate: warnings.warn('val workflow is duplicated with `--validate`, ' 'it is recommended to use `--validate`. see ' 'https://github.com/open-mmlab/mmaction2/pull/123') datasets.append(build_dataset(copy.deepcopy(cfg.data), 'val', dict(logger=logger))) logger.info(f'Val datasets:\n{str(datasets[-1])}') # filter dataset labels if cfg.get('classes'): datasets = [dataset.filter(cfg.classes) for dataset in datasets] # build model model = build_model( cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg, class_sizes=datasets[0].class_sizes, class_maps=datasets[0].class_maps ) # define ignore layers ignore_prefixes = [] if hasattr(cfg, 'reset_layer_prefixes') and isinstance(cfg.reset_layer_prefixes, (list, tuple)): ignore_prefixes += cfg.reset_layer_prefixes ignore_suffixes = ['num_batches_tracked'] if hasattr(cfg, 'reset_layer_suffixes') and isinstance(cfg.reset_layer_suffixes, (list, tuple)): ignore_suffixes += cfg.reset_layer_suffixes # train model train_model( model, datasets, cfg, distributed=distributed, validate=(not args.no_validate), timestamp=timestamp, meta=meta, ignore_prefixes=tuple(ignore_prefixes), ignore_suffixes=tuple(ignore_suffixes) )
def main(): args = parse_args() args.is_rgb = args.modality == 'RGB' args.clip_len = 1 if args.is_rgb else 5 args.input_format = 'NCHW' if args.is_rgb else 'NCHW_Flow' rgb_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) flow_norm_cfg = dict(mean=[128, 128], std=[128, 128]) args.img_norm_cfg = rgb_norm_cfg if args.is_rgb else flow_norm_cfg args.f_tmpl = 'img_{:05d}.jpg' if args.is_rgb else 'flow_{}_{:05d}.jpg' args.in_channels = args.clip_len * (3 if args.is_rgb else 2) # max batch_size for one forward args.batch_size = 200 # define the data pipeline for Untrimmed Videos data_pipeline = [ dict(type='UntrimmedSampleFrames', clip_len=args.clip_len, frame_interval=args.frame_interval, start_index=0), dict(type='FrameSelector'), dict(type='Resize', scale=(-1, 256)), dict(type='CenterCrop', crop_size=256), dict(type='Normalize', **args.img_norm_cfg), dict(type='FormatShape', input_format=args.input_format), dict(type='Collect', keys=['imgs'], meta_keys=[]), dict(type='ToTensor', keys=['imgs']) ] data_pipeline = Compose(data_pipeline) #pdb.set_trace() # define TSN R50 model, the model is used as the feature extractor model_cfg = dict(type='Recognizer2D', backbone=dict(type='ResNet', depth=50, in_channels=args.in_channels, norm_eval=False), cls_head=dict(type='TSNHead', num_classes=400, in_channels=2048, spatial_type='avg', consensus=dict(type='AvgConsensus', dim=1)), test_cfg=dict(average_clips=None)) model = build_model(model_cfg) # load pretrained weight into the feature extractor state_dict = torch.load(args.ckpt)['state_dict'] #pdb.set_trace() model.load_state_dict(state_dict) model = model.cuda() model.eval() data = open(args.data_list).readlines() data = [x.strip() for x in data] data = data[args.part::args.total] # enumerate Untrimmed videos, extract feature from each of them prog_bar = mmcv.ProgressBar(len(data)) if not osp.exists(args.output_prefix): os.system(f'mkdir -p {args.output_prefix}') for item in data: frame_dir, length, label = item.split() output_file = osp.basename(frame_dir) + '.pkl' frame_dir = osp.join(args.data_prefix, frame_dir) output_file = osp.join(args.output_prefix, output_file) assert output_file.endswith('.pkl') length = int(length) # prepare a psuedo sample tmpl = dict(frame_dir=frame_dir, total_frames=length, filename_tmpl=args.f_tmpl, start_index=0, modality=args.modality) sample = data_pipeline(tmpl) imgs = sample['imgs'] shape = imgs.shape # the original shape should be N_seg * C * H * W, resize it to N_seg * # 1 * C * H * W so that the network return feature of each frame (No # score average among segments) imgs = imgs.reshape((shape[0], 1) + shape[1:]) imgs = imgs.cuda() def forward_data(model, data): # chop large data into pieces and extract feature from them results = [] start_idx = 0 num_clip = data.shape[0] while start_idx < num_clip: with torch.no_grad(): part = data[start_idx:start_idx + args.batch_size] feat = model.forward(part, return_loss=False) results.append(feat) start_idx += args.batch_size return np.concatenate(results) feat = forward_data(model, imgs) #pdb.set_trace() with open(output_file, 'wb') as fout: pickle.dump(feat, fout) prog_bar.update()
def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_dir is determined in this priority: # CLI > config file > default (base filename) if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.resume_from is not None: cfg.resume_from = args.resume_from if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # dump config cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) # init logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config: {cfg.text}') # set random seeds if args.seed is not None: logger.info('Set random seed to {}, deterministic: {}'.format( args.seed, args.deterministic)) set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed model = build_model(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: val_dataset = copy.deepcopy(cfg.data.val) datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmaction version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmaction_version=__version__, config=cfg.text) train_model(model, datasets, cfg, distributed=distributed, validate=args.validate, timestamp=timestamp, meta=meta)
def main(): parser = ArgumentParser() parser.add_argument('--config', '-c', type=str, required=True) parser.add_argument('--checkpoint', '-w', type=str, required=True) parser.add_argument('--dataset_name', '-n', type=str, required=True) parser.add_argument('--data_dir', '-d', type=str, required=True) parser.add_argument('--predictions', '-p', type=str, required=True) parser.add_argument('--movements', '-m', type=str, required=True) parser.add_argument('--keypoints', '-k', type=str, required=True) parser.add_argument('--out_annotation', '-o', type=str, required=True) args = parser.parse_args() assert exists(args.config) assert exists(args.weights) assert exists(args.data_dir) assert exists(args.predictions) assert exists(args.movements) assert exists(args.keypoints) assert args.dataset_name is not None and args.dataset_name != '' assert args.out_annotation is not None and args.out_annotation != '' cfg = Config.fromfile(args.config) cfg = update_config(cfg, args, trg_name=args.dataset_name) cfg = propagate_root_dir(cfg, args.data_dir) dataset = build_dataset(cfg.data, 'train', dict(test_mode=True)) data_pipeline = Compose(dataset.pipeline.transforms[1:]) print('{} dataset:\n'.format(args.mode) + str(dataset)) model = build_model(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) load_checkpoint(model, args.checkpoint, strict=False) model = MMDataParallel(model, device_ids=[0]) model.eval() annotation_path = join(args.data_dir, cfg.data.train.sources[0], cfg.data.train.ann_file) records = load_annotation(annotation_path) predictions = load_distributed_data(args.predictions, parse_predictions_file, 'txt') movements = load_distributed_data(args.movements, parse_movements_file, 'txt') hand_kpts = load_distributed_data(args.keypoints, parse_kpts_file, 'json') print('Loaded records: {}'.format(len(records))) invalid_stat = dict() all_candidates = [] ignore_candidates = get_ignore_candidates(records, IGNORE_LABELS) all_candidates += ignore_candidates static_candidates, static_invalids = get_regular_candidates( records, predictions, movements, hand_kpts, cfg.data.output.length, False, STATIC_LABELS, NEGATIVE_LABEL, NO_MOTION_LABEL, min_score=0.9, min_length=4, max_distance=1) all_candidates += static_candidates invalid_stat = update_stat(invalid_stat, static_invalids) print('Static candidates: {}'.format(len(static_candidates))) if len(invalid_stat) > 0: print('Ignored records after static analysis:') for ignore_label, ignore_values in invalid_stat.items(): print(' - {}: {}'.format(ignore_label.replace('_', ' '), len(ignore_values))) dynamic_candidates, dynamic_invalids = get_regular_candidates( records, predictions, movements, hand_kpts, cfg.data.output.length, True, DYNAMIC_LABELS, NEGATIVE_LABEL, NO_MOTION_LABEL, min_score=0.9, min_length=4, max_distance=1) all_candidates += dynamic_candidates invalid_stat = update_stat(invalid_stat, dynamic_invalids) print('Dynamic candidates: {}'.format(len(dynamic_candidates))) if len(invalid_stat) > 0: print('Ignored records after dynamic analysis:') for ignore_label, ignore_values in invalid_stat.items(): print(' - {}: {}'.format(ignore_label.replace('_', ' '), len(ignore_values))) fixed_records, fix_stat = find_best_match(all_candidates, model, dataset, NEGATIVE_LABEL) invalid_stat = update_stat(invalid_stat, fix_stat) print('Final records: {}'.format(len(fixed_records))) if len(invalid_stat) > 0: print('Final ignored records:') for ignore_label, ignore_values in invalid_stat.items(): print(' - {}: {}'.format(ignore_label.replace('_', ' '), len(ignore_values))) for ignored_record in ignore_values: print(' - {}'.format(ignored_record.path)) dump_records(fixed_records, args.out_annotation) print('Fixed annotation has been stored at: {}'.format( args.out_annotation))
def main(): args = parse_args() cfg = Config.fromfile(args.config) cfg.merge_from_dict(args.cfg_options) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # work_dir is determined in this priority: # CLI > config file > default (base filename) if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir elif cfg.get('work_dir', None) is None: # use config filename as default work_dir if cfg.work_dir is None cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) if args.resume_from is not None: cfg.resume_from = args.resume_from if args.gpu_ids is not None: cfg.gpu_ids = args.gpu_ids else: cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # The flag is used to determine whether it is omnisource training cfg.setdefault('omnisource', False) # The flag is used to register module's hooks cfg.setdefault('module_hooks', []) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # dump config cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) # init logger before other steps timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) log_file = osp.join(cfg.work_dir, f'{timestamp}.log') logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) # init the meta dict to record some important information such as # environment info and seed, which will be logged meta = dict() # log env info env_info_dict = collect_env() env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()]) dash_line = '-' * 60 + '\n' logger.info('Environment info:\n' + dash_line + env_info + '\n' + dash_line) meta['env_info'] = env_info # log some basic info logger.info(f'Distributed training: {distributed}') logger.info(f'Config: {cfg.text}') # set random seeds if args.seed is not None: logger.info(f'Set random seed to {args.seed}, ' f'deterministic: {args.deterministic}') set_random_seed(args.seed, deterministic=args.deterministic) cfg.seed = args.seed meta['seed'] = args.seed meta['config_name'] = osp.basename(args.config) meta['work_dir'] = osp.basename(cfg.work_dir.rstrip('/\\')) model = build_model(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg')) register_module_hooks(model.backbone, cfg.module_hooks) if cfg.omnisource: # If omnisource flag is set, cfg.data.train should be a list assert type(cfg.data.train) is list datasets = [build_dataset(dataset) for dataset in cfg.data.train] else: datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: # For simplicity, omnisource is not compatiable with val workflow, # we recommend you to use `--validate` assert not cfg.omnisource if args.validate: warnings.warn('val workflow is duplicated with `--validate`, ' 'it is recommended to use `--validate`. see ' 'https://github.com/open-mmlab/mmaction2/pull/123') val_dataset = copy.deepcopy(cfg.data.val) datasets.append(build_dataset(val_dataset)) if cfg.checkpoint_config is not None: # save mmaction version, config file content and class names in # checkpoints as meta data cfg.checkpoint_config.meta = dict(mmaction_version=__version__ + get_git_hash(digits=7), config=cfg.text) train_model(model, datasets, cfg, distributed=distributed, validate=args.validate, timestamp=timestamp, meta=meta)
def main(): parser = ArgumentParser() parser.add_argument('--config', type=str, required=True, help='Test config file path') parser.add_argument('--checkpoint', type=str, required=True, help='Checkpoint file') parser.add_argument('--data_dir', type=str, required=True, help='The dir with dataset') parser.add_argument('--out_dir', type=str, required=True, help='Output directory') parser.add_argument('--dataset', type=str, required=True, help='Dataset name') parser.add_argument('--gpus', default=1, type=int, help='GPU number used for annotating') parser.add_argument('--proc_per_gpu', default=2, type=int, help='Number of processes per GPU') parser.add_argument('--mode', choices=['train', 'val', 'test'], default='train') args = parser.parse_args() assert exists(args.config) assert exists(args.checkpoint) assert exists(args.data_dir) cfg = Config.fromfile(args.config) cfg = update_config(cfg, args, trg_name=args.dataset) cfg = propagate_root_dir(cfg, args.data_dir) dataset = build_dataset(cfg.data, args.mode, dict(test_mode=True)) data_pipeline = Compose(dataset.pipeline.transforms[1:]) print('{} dataset:\n'.format(args.mode) + str(dataset)) tasks = prepare_tasks(dataset, cfg.input_clip_length) print('Prepared tasks: {}'.format(sum([len(v) for v in tasks.values()]))) if not exists(args.out_dir): makedirs(args.out_dir) model = build_model(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) load_checkpoint(model, args.checkpoint, strict=False) batch_size = 4 * cfg.data.videos_per_gpu if args.gpus == 1: model = MMDataParallel(model, device_ids=[0]) model.eval() process_tasks(tasks, dataset, model, args.out_dir, batch_size, cfg.input_clip_length, data_pipeline) else: raise NotImplementedError
def main(): args = parse_args() cfg = mmcv.Config.fromfile(args.config) # Load output_config from cfg output_config = cfg.get('output_config', {}) # Overwrite output_config from args.out output_config = merge_configs(output_config, dict(out=args.out)) # Load eval_config from cfg eval_config = cfg.get('eval_config', {}) # Overwrite eval_config from args.eval eval_config = merge_configs(eval_config, dict(metrics=args.eval)) # Add options from args.option eval_config = merge_configs(eval_config, args.options) assert output_config or eval_config, \ ('Please specify at least one operation (save or eval the ' 'results) with the argument "--out" or "--eval"') # set cudnn benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.data.test.test_mode = True if cfg.test_cfg is None: cfg.test_cfg = dict(average_clips=args.average_clips) else: cfg.test_cfg.average_clips = args.average_clips # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # build the dataloader dataset = build_dataset(cfg.data.test, dict(test_mode=True)) data_loader = build_dataloader(dataset, videos_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # map lable from txt to csv file df = pd.read_csv('/data2/phap/datasets/dataset3_test.txt', header=None) df.columns = ['full_name'] df['file_name'] = df['full_name'].apply(lambda x: x.rsplit(' ')[0]) df['true_label'] = df['full_name'].apply(lambda x: x.rsplit(' ')[-1]) # build the model and load checkpoint model = build_model(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) load_checkpoint(model, args.checkpoint, map_location='cpu') if not distributed: model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test(model, data_loader) else: model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) outputs = multi_gpu_test(model, data_loader, args.tmpdir, args.gpu_collect) # convert softmax output to one hot pred_arr = [] for i in outputs: pred = np.argmax(i) pred_arr.append(pred) # import output into csv df['pred_label_orig'] = outputs df['pred_label'] = pred_arr # save csv file df.to_csv('dataset3_test_pred_w_rwf_model.csv') print('\nSuccess, csv file saved') rank, _ = get_dist_info() if rank == 0: if output_config: out = output_config['out'] print(f'\nwriting results to {out}') dataset.dump_results(outputs, **output_config) if eval_config: eval_res = dataset.evaluate(outputs, **eval_config) for name, val in eval_res.items(): print(f'{name}: {val:.04f}')
def main(): args = parse_args() cfg = mmcv.Config.fromfile(args.config) if args.update_config is not None: cfg.merge_from_dict(args.update_config) cfg = update_config(cfg, args) cfg = propagate_root_dir(cfg, args.data_dir) # Load output_config from cfg output_config = cfg.get('output_config', {}) # Overwrite output_config from args.out output_config = merge_configs(output_config, dict(out=args.out)) # Load eval_config from cfg eval_config = cfg.get('eval_config', {}) # Overwrite eval_config from args.eval eval_config = merge_configs(eval_config, dict(metrics=args.eval)) # Add options from args.option eval_config = merge_configs(eval_config, args.options) assert output_config or eval_config, \ ('Please specify at least one operation (save or eval the ' 'results) with the argument "--out" or "--eval"') # init distributed env first, since logger depends on the dist info. distributed = args.launcher != 'none' if distributed: init_dist(args.launcher, **cfg.dist_params) # get rank rank, _ = get_dist_info() if cfg.get('seed'): print(f'Set random seed to {cfg.seed}') set_random_seed(cfg.seed) # build the dataset dataset = build_dataset(cfg.data, 'test', dict(test_mode=True)) if cfg.get('classes'): dataset = dataset.filter(cfg.classes) if rank == 0: print(f'Test datasets:\n{str(dataset)}') # build the dataloader data_loader = build_dataloader(dataset, videos_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint model = build_model(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg, class_sizes=dataset.class_sizes, class_maps=dataset.class_maps) # nncf model wrapper if is_checkpoint_nncf(args.checkpoint) and not cfg.get('nncf_config'): # reading NNCF config from checkpoint nncf_part = get_nncf_config_from_meta(args.checkpoint) for k, v in nncf_part.items(): cfg[k] = v if cfg.get('nncf_config'): check_nncf_is_enabled() if not is_checkpoint_nncf(args.checkpoint): raise RuntimeError( 'Trying to make testing with NNCF compression a model snapshot that was NOT trained with NNCF' ) cfg.load_from = args.checkpoint cfg.resume_from = None if torch.cuda.is_available(): model = model.cuda() _, model = wrap_nncf_model(model, cfg, None, get_fake_input) else: fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) # load model weights load_checkpoint(model, args.checkpoint, map_location='cpu', force_matching=True) if args.fuse_conv_bn: model = fuse_conv_bn(model) if not distributed: model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test(model, data_loader) else: model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) outputs = multi_gpu_test(model, data_loader, args.tmpdir, args.gpu_collect) if rank == 0: if output_config: out = output_config['out'] print(f'\nwriting results to {out}') dataset.dump_results(outputs, **output_config) if eval_config: eval_res = dataset.evaluate(outputs, **eval_config) print('\nFinal metrics:') for name, val in eval_res.items(): if 'invalid_info' in name: continue if isinstance(val, float): print(f'{name}: {val:.04f}') elif isinstance(val, str): print(f'{name}:\n{val}') else: print(f'{name}: {val}') invalid_info = { name: val for name, val in eval_res.items() if 'invalid_info' in name } if len(invalid_info) > 0: assert args.out_invalid is not None and args.out_invalid != '' if os.path.exists(args.out_invalid): shutil.rmtree(args.out_invalid) if not os.path.exists(args.out_invalid): os.makedirs(args.out_invalid) for name, invalid_record in invalid_info.items(): out_invalid_dir = os.path.join(args.out_invalid, name) item_gen = zip(invalid_record['ids'], invalid_record['conf'], invalid_record['pred']) for invalid_idx, pred_conf, pred_label in item_gen: record_info = dataset.get_info(invalid_idx) gt_label = record_info['label'] if 'filename' in record_info: src_data_path = record_info['filename'] in_record_name, record_extension = os.path.basename( src_data_path).split('.') out_record_name = f'{in_record_name}_gt{gt_label}_pred{pred_label}_conf{pred_conf:.3f}' trg_data_path = os.path.join( out_invalid_dir, f'{out_record_name}.{record_extension}') shutil.copyfile(src_data_path, trg_data_path) else: src_data_path = record_info['frame_dir'] in_record_name = os.path.basename(src_data_path) out_record_name = f'{in_record_name}_gt{gt_label}_pred{pred_label}_conf{pred_conf:.3f}' trg_data_path = os.path.join( out_invalid_dir, out_record_name) os.makedirs(trg_data_path) start_frame_id = record_info[ 'clip_start'] + dataset.start_index end_frame_id = record_info[ 'clip_end'] + dataset.start_index for frame_id in range(start_frame_id, end_frame_id): img_name = f'{frame_id:05}.jpg' shutil.copyfile( os.path.join(src_data_path, img_name), os.path.join(trg_data_path, img_name))
return args if __name__ == '__main__': args = parse_args() assert args.opset_version == 11, 'MMAction2 only supports opset 11 now' cfg = mmcv.Config.fromfile(args.config) # import modules from string list. if not args.is_localizer: cfg.model.backbone.pretrained = None # build the model model = build_model(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) model = _convert_batchnorm(model) # onnx.export does not support kwargs if hasattr(model, 'forward_dummy'): model.forward = model.forward_dummy elif hasattr(model, '_forward') and args.is_localizer: model.forward = model._forward else: raise NotImplementedError( 'Please implement the forward method for exporting.') checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') # conver model to onnx file pytorch2onnx(
def main(): args = parse_args() cfg = Config.fromfile(args.config) cfg.merge_from_dict(args.cfg_options) # Load output_config from cfg output_config = cfg.get('output_config', {}) # Overwrite output_config from args.out output_config = merge_configs(output_config, dict(out=args.out)) # Load eval_config from cfg eval_config = cfg.get('eval_config', {}) # Overwrite eval_config from args.eval eval_config = merge_configs(eval_config, dict(metrics=args.eval)) # Add options from args.option eval_config = merge_configs(eval_config, args.options) assert output_config or eval_config, \ ('Please specify at least one operation (save or eval the ' 'results) with the argument "--out" or "--eval"') # set cudnn benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.data.test.test_mode = True if cfg.test_cfg is None: cfg.test_cfg = dict(average_clips=args.average_clips) else: # You can set average_clips during testing, it will override the # original settting if args.average_clips is not None: cfg.test_cfg.average_clips = args.average_clips # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # create work_dir mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) # build the dataloader dataset = build_dataset(cfg.data.test, dict(test_mode=True)) dataloader_setting = dict(videos_per_gpu=cfg.data.get('videos_per_gpu', 2), workers_per_gpu=cfg.data.get( 'workers_per_gpu', 0), dist=distributed, shuffle=False) dataloader_setting = dict(dataloader_setting, **cfg.data.get('test_dataloader', {})) data_loader = build_dataloader(dataset, **dataloader_setting) # build the model and load checkpoint model = build_model(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) load_checkpoint(model, args.checkpoint, map_location='cpu') if args.fuse_conv_bn: model = fuse_conv_bn(model) if not distributed: model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test(model, data_loader) else: model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) outputs = multi_gpu_test(model, data_loader, args.tmpdir, args.gpu_collect) rank, _ = get_dist_info() if rank == 0: if output_config: out = output_config['out'] print(f'\nwriting results to {out}') dataset.dump_results(outputs, **output_config) if eval_config: eval_res = dataset.evaluate(outputs, **eval_config) for name, val in eval_res.items(): print(f'{name}: {val:.04f}')
def skeleton_based_stdet(args, label_map, human_detections, pose_results, num_frame, clip_len, frame_interval, h, w): window_size = clip_len * frame_interval assert clip_len % 2 == 0, 'We would like to have an even clip_len' timestamps = np.arange(window_size // 2, num_frame + 1 - window_size // 2, args.predict_stepsize) skeleton_config = mmcv.Config.fromfile(args.skeleton_config) num_class = max(label_map.keys()) + 1 # for AVA dataset (81) skeleton_config.model.cls_head.num_classes = num_class skeleton_pipeline = Compose(skeleton_config.test_pipeline) skeleton_stdet_model = build_model(skeleton_config.model) load_checkpoint(skeleton_stdet_model, args.skeleton_stdet_checkpoint, map_location='cpu') skeleton_stdet_model.to(args.device) skeleton_stdet_model.eval() skeleton_predictions = [] print('Performing SpatioTemporal Action Detection for each clip') prog_bar = mmcv.ProgressBar(len(timestamps)) for timestamp in timestamps: proposal = human_detections[timestamp - 1] if proposal.shape[0] == 0: # no people detected skeleton_predictions.append(None) continue start_frame = timestamp - (clip_len // 2 - 1) * frame_interval frame_inds = start_frame + np.arange(0, window_size, frame_interval) frame_inds = list(frame_inds - 1) num_frame = len(frame_inds) # 30 pose_result = [pose_results[ind] for ind in frame_inds] skeleton_prediction = [] for i in range(proposal.shape[0]): # num_person skeleton_prediction.append([]) fake_anno = dict(frame_dict='', label=-1, img_shape=(h, w), origin_shape=(h, w), start_index=0, modality='Pose', total_frames=num_frame) num_person = 1 num_keypoint = 17 keypoint = np.zeros( (num_person, num_frame, num_keypoint, 2)) # M T V 2 keypoint_score = np.zeros( (num_person, num_frame, num_keypoint)) # M T V # pose matching person_bbox = proposal[i][:4] area = expand_bbox(person_bbox, h, w) for j, poses in enumerate(pose_result): # num_frame max_iou = float('-inf') index = -1 if len(poses) == 0: continue for k, per_pose in enumerate(poses): iou = cal_iou(per_pose['bbox'][:4], area) if max_iou < iou: index = k max_iou = iou keypoint[0, j] = poses[index]['keypoints'][:, :2] keypoint_score[0, j] = poses[index]['keypoints'][:, 2] fake_anno['keypoint'] = keypoint fake_anno['keypoint_score'] = keypoint_score skeleton_imgs = skeleton_pipeline(fake_anno)['imgs'][None] skeleton_imgs = skeleton_imgs.to(args.device) with torch.no_grad(): output = skeleton_stdet_model(return_loss=False, imgs=skeleton_imgs) output = output[0] for k in range(len(output)): # 81 if k not in label_map: continue if output[k] > args.action_score_thr: skeleton_prediction[i].append( (label_map[k], output[k])) skeleton_predictions.append(skeleton_prediction) prog_bar.update() return timestamps, skeleton_predictions
def main(): args = parse_args() cfg = Config.fromfile(args.config) cfg.merge_from_dict(args.cfg_options) # Load output_config from cfg output_config = cfg.get('output_config', {}) if args.out: # Overwrite output_config from args.out output_config = Config._merge_a_into_b(dict(out=args.out), output_config) # Load eval_config from cfg eval_config = cfg.get('eval_config', {}) if args.eval: # Overwrite eval_config from args.eval eval_config = Config._merge_a_into_b(dict(metrics=args.eval), eval_config) if args.eval_options: # Add options from args.eval_options eval_config = Config._merge_a_into_b(args.eval_options, eval_config) assert output_config or eval_config, \ ('Please specify at least one operation (save or eval the ' 'results) with the argument "--out" or "--eval"') dataset_type = cfg.data.test.type if output_config.get('out', None): if 'output_format' in output_config: # ugly workround to make recognition and localization the same warnings.warn( 'Skip checking `output_format` in localization task.') else: out = output_config['out'] # make sure the dirname of the output path exists mmcv.mkdir_or_exist(osp.dirname(out)) _, suffix = osp.splitext(out) if dataset_type == 'AVADataset': assert suffix[1:] == 'csv', ('For AVADataset, the format of ' 'the output file should be csv') else: assert suffix[1:] in file_handlers, ( 'The format of the output ' 'file should be json, pickle or yaml') # set cudnn benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.data.test.test_mode = True if args.average_clips is not None: # You can set average_clips during testing, it will override the # original setting if cfg.model.get('test_cfg') is None and cfg.get('test_cfg') is None: cfg.model.setdefault('test_cfg', dict(average_clips=args.average_clips)) else: if cfg.model.get('test_cfg') is not None: cfg.model.test_cfg.average_clips = args.average_clips else: cfg.test_cfg.average_clips = args.average_clips # init distributed env first, since logger depends on the dist info. if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # The flag is used to register module's hooks cfg.setdefault('module_hooks', []) # build the dataloader dataset = build_dataset(cfg.data.test, dict(test_mode=True)) dataloader_setting = dict(videos_per_gpu=cfg.data.get('videos_per_gpu', 1), workers_per_gpu=cfg.data.get( 'workers_per_gpu', 1), dist=distributed, shuffle=False) dataloader_setting = dict(dataloader_setting, **cfg.data.get('test_dataloader', {})) data_loader = build_dataloader(dataset, **dataloader_setting) # remove redundant pretrain steps for testing turn_off_pretrained(cfg.model) # build the model and load checkpoint model = build_model(cfg.model, train_cfg=None, test_cfg=cfg.get('test_cfg')) if len(cfg.module_hooks) > 0: register_module_hooks(model, cfg.module_hooks) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) load_checkpoint(model, args.checkpoint, map_location='cpu') if args.fuse_conv_bn: model = fuse_conv_bn(model) if not distributed: model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test(model, data_loader) else: model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) outputs = multi_gpu_test(model, data_loader, args.tmpdir, args.gpu_collect) rank, _ = get_dist_info() if rank == 0: if output_config.get('out', None): out = output_config['out'] print(f'\nwriting results to {out}') #import pdb #pdb.set_trace() print(out[-4:]) if out[-4:] == 'json': result_dict = {} for result in outputs: video_name = result['video_name'] result_dict[video_name] = result['proposal_list'] output_dict = { 'version': 'VERSION 1.3', 'results': result_dict, 'external_data': {} } mmcv.dump(output_dict, out) else: dataset.dump_results(outputs, **output_config) if eval_config: eval_res = dataset.evaluate(outputs, **eval_config) for name, val in eval_res.items(): print(f'{name}: {val:.04f}')
def main(): args = parse_args() cfg = mmcv.Config.fromfile(args.config) if args.update_config is not None: cfg.merge_from_dict(args.update_config) cfg = update_config(cfg, args) cfg = propagate_root_dir(cfg, args.data_dir) # Load output_config from cfg output_config = cfg.get('output_config', {}) # Overwrite output_config from args.out output_config = merge_configs(output_config, dict(out=args.out)) # Load eval_config from cfg eval_config = cfg.get('eval_config', {}) # Overwrite eval_config from args.eval eval_config = merge_configs(eval_config, dict(metrics=args.eval)) # Add options from args.option eval_config = merge_configs(eval_config, args.options) assert output_config or eval_config, \ ('Please specify at least one operation (save or eval the ' 'results) with the argument "--out" or "--eval"') # init distributed env first, since logger depends on the dist info. distributed = args.launcher != 'none' if distributed: init_dist(args.launcher, **cfg.dist_params) # get rank rank, _ = get_dist_info() if cfg.get('seed'): print(f'Set random seed to {cfg.seed}') set_random_seed(cfg.seed) # build the dataset dataset = build_dataset(cfg.data, 'test', dict(test_mode=True)) if cfg.get('classes'): dataset = dataset.filter(cfg.classes) if rank == 0: print(f'Test datasets:\n{str(dataset)}') # build the dataloader data_loader = build_dataloader( dataset, videos_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False ) # build the model and load checkpoint model = build_model( cfg.model, train_cfg=None, test_cfg=cfg.test_cfg, class_sizes=dataset.class_sizes, class_maps=dataset.class_maps ) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) # load model weights load_checkpoint(model, args.checkpoint, map_location='cpu', force_matching=True) if args.fuse_conv_bn: model = fuse_conv_bn(model) if not distributed: model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test(model, data_loader) else: model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False) outputs = multi_gpu_test(model, data_loader, args.tmpdir, args.gpu_collect) if rank == 0: if output_config: out = output_config['out'] print(f'\nwriting results to {out}') dataset.dump_results(outputs, **output_config) if eval_config: eval_res = dataset.evaluate(outputs, **eval_config) print('\nFinal metrics:') for name, val in eval_res.items(): print(f'{name}: {val:.04f}')