def Test(args): if args.gpus is not None: gpus = [int(x) for x in args.gpus.split(',')] num_gpus = len(gpus) else: gpus = range(args.num_gpus) num_gpus = args.num_gpus if num_gpus > 0: total_batch_size = args.batch_size * num_gpus log.info("Running on GPUs: {}".format(gpus)) log.info("total_batch_size: {}".format(total_batch_size)) else: total_batch_size = args.batch_size log.info("Running on CPU") log.info("total_batch_size: {}".format(total_batch_size)) # Model building functions def create_model_ops(model, loss_scale): return model_builder.build_model( model=model, model_name=args.model_name, model_depth=args.model_depth, num_labels=args.num_labels, num_channels=args.num_channels, crop_size=args.crop_size, clip_length=( args.clip_length_of if args.input_type == 1 else args.clip_length_rgb ), loss_scale=loss_scale, is_test=1, pred_layer_name=args.pred_layer_name, ) test_model = cnn.CNNModelHelper( order="NCHW", name="video_model_test", use_cudnn=(True if args.use_cudnn == 1 else False), cudnn_exhaustive_search=True, ) test_reader, number_of_examples = model_builder.create_data_reader( test_model, name="test_reader", input_data=args.test_data, ) if args.num_iter <= 0: num_iter = int(number_of_examples / total_batch_size) else: num_iter = args.num_iter def test_input_fn(model): model_helper.AddVideoInput( test_model, test_reader, batch_size=args.batch_size, clip_per_video=args.clip_per_video, decode_type=1, length_rgb=args.clip_length_rgb, sampling_rate_rgb=args.sampling_rate_rgb, scale_h=args.scale_h, scale_w=args.scale_w, crop_size=args.crop_size, num_decode_threads=4, num_of_class=args.num_labels, random_mirror=False, random_crop=False, input_type=args.input_type, length_of=args.clip_length_of, sampling_rate_of=args.sampling_rate_of, frame_gap_of=args.frame_gap_of, do_flow_aggregation=args.do_flow_aggregation, flow_data_type=args.flow_data_type, get_rgb=(args.input_type == 0), get_optical_flow=(args.input_type == 1), get_video_id=args.get_video_id, use_local_file=args.use_local_file, ) if num_gpus > 0: data_parallel_model.Parallelize_GPU( test_model, input_builder_fun=test_input_fn, forward_pass_builder_fun=create_model_ops, param_update_builder_fun=None, devices=gpus ) else: test_model._device_type = caffe2_pb2.CPU test_model._devices = [0] device_opt = core.DeviceOption(test_model._device_type, 0) with core.DeviceScope(device_opt): # Because our loaded models are named with "gpu_x", keep the naming for now. # TODO: Save model using `data_parallel_model.ExtractPredictorNet` # to extract the model for "gpu_0". It also renames # the input and output blobs by stripping the "gpu_x/" prefix with core.NameScope("{}_{}".format("gpu", 0)): test_input_fn(test_model) create_model_ops(test_model, 1.0) workspace.RunNetOnce(test_model.param_init_net) workspace.CreateNet(test_model.net) if args.db_type == 'minidb': if num_gpus > 0: model_helper.LoadModel(args.load_model_path, args.db_type) data_parallel_model.FinalizeAfterCheckpoint(test_model) else: with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU, 0)): model_helper.LoadModel(args.load_model_path, args.db_type) elif args.db_type == 'pickle': if num_gpus > 0: model_loader.LoadModelFromPickleFile( test_model, args.load_model_path, use_gpu=True, root_gpu_id=gpus[0] ) data_parallel_model.FinalizeAfterCheckpoint(test_model) else: model_loader.LoadModelFromPickleFile( test_model, args.load_model_path, use_gpu=False ) else: log.warning("Unsupported db_type: {}".format(args.db_type)) # metric counters for classification clip_acc = 0 video_top1 = 0 video_topk = 0 video_count = 0 clip_count = 0 for i in range(num_iter): workspace.RunNet(test_model.net.Proto().name) num_devices = 1 # default for cpu if args.num_gpus > 0: num_devices = args.num_gpus for g in range(num_devices): # get labels label = workspace.FetchBlob( "gpu_{}".format(g) + '/label' ) # get predictions predicts = workspace.FetchBlob("gpu_{}".format(g) + '/softmax') assert predicts.shape[0] == args.batch_size * args.clip_per_video for j in range(args.batch_size): # get label for one video sample_label = label[j * args.clip_per_video] # get clip accuracy for k in range(args.clip_per_video): c1, _ = metric.accuracy_metric( predicts[j * args.clip_per_video + k, :], label[j * args.clip_per_video + k]) clip_acc = clip_acc + c1 # get all clip predictions for one video all_clips = predicts[ j * args.clip_per_video:(j + 1) * args.clip_per_video, :] # aggregate predictions into one video_pred = PredictionAggregation(all_clips, args.aggregation) c1, ck = metric.accuracy_metric( video_pred, sample_label, args.top_k) video_top1 = video_top1 + c1 video_topk = video_topk + ck video_count = video_count + args.batch_size clip_count = clip_count + label.shape[0] if i > 0 and i % args.display_iter == 0: log.info('Iter {}/{}: clip: {}, top1: {}, top 5: {}'.format( i, num_iter, clip_acc / clip_count, video_top1 / video_count, video_topk / video_count)) log.info("Test accuracy: clip: {}, top 1: {}, top{}: {}".format( clip_acc / clip_count, video_top1 / video_count, args.top_k, video_topk / video_count )) if num_gpus > 0: flops, params = model_helper.GetFlopsAndParams(test_model, gpus[0]) else: flops, params = model_helper.GetFlopsAndParams(test_model) log.info('FLOPs: {}, params: {}'.format(flops, params))
def Test(args): assert args.batch_size == 1 # large testing assume batch size one if args.gpus is not None: gpus = [int(x) for x in args.gpus.split(',')] num_gpus = len(gpus) else: gpus = range(args.num_gpus) num_gpus = args.num_gpus if num_gpus > 0: total_batch_size = args.batch_size * num_gpus log.info("Running on GPUs: {}".format(gpus)) log.info("total_batch_size: {}".format(total_batch_size)) else: total_batch_size = args.batch_size log.info("Running on CPU") log.info("total_batch_size: {}".format(total_batch_size)) video_input_args = dict( batch_size=args.batch_size, clip_per_video=args.clip_per_video, decode_type=1, length_rgb=args.clip_length_rgb, sampling_rate_rgb=args.sampling_rate_rgb, scale_h=args.scale_h, scale_w=args.scale_w, crop_size=args.crop_size, video_res_type=args.video_res_type, short_edge=min(args.scale_h, args.scale_w), num_decode_threads=args.num_decode_threads, do_multi_label=args.multi_label, num_of_class=args.num_labels, random_mirror=False, random_crop=False, input_type=args.input_type, length_of=args.clip_length_of, sampling_rate_of=args.sampling_rate_of, frame_gap_of=args.frame_gap_of, do_flow_aggregation=args.do_flow_aggregation, flow_data_type=args.flow_data_type, get_rgb=(args.input_type == 0 or args.input_type >= 3), get_optical_flow=(args.input_type == 1 or args.input_type >= 4), use_local_file=args.use_local_file, crop_per_clip=args.crop_per_clip, ) reader_args = dict( name="test_reader", input_data=args.test_data, ) # Model building functions def create_model_ops(model, loss_scale): return model_builder.build_model( model=model, model_name=args.model_name, model_depth=args.model_depth, num_labels=args.num_labels, batch_size=args.batch_size * args.clip_per_video, num_channels=args.num_channels, crop_size=args.crop_size, clip_length=(args.clip_length_of if args.input_type == 1 else args.clip_length_rgb), loss_scale=loss_scale, is_test=1, pred_layer_name=args.pred_layer_name, multi_label=args.multi_label, channel_multiplier=args.channel_multiplier, bottleneck_multiplier=args.bottleneck_multiplier, use_dropout=args.use_dropout, conv1_temporal_stride=args.conv1_temporal_stride, conv1_temporal_kernel=args.conv1_temporal_kernel, use_convolutional_pred=args.use_convolutional_pred, use_pool1=args.use_pool1, ) def empty_function(model, loss_scale=1): # null return test_data_loader = cnn.CNNModelHelper( order="NCHW", name="data_loader", ) test_model = cnn.CNNModelHelper( order="NCHW", name="video_model", use_cudnn=(True if args.use_cudnn == 1 else False), cudnn_exhaustive_search=True, ) test_reader, number_of_examples = reader_utils.create_data_reader( test_data_loader, **reader_args) if args.num_iter <= 0: num_iter = int(math.ceil(number_of_examples / total_batch_size)) else: num_iter = args.num_iter def test_input_fn(model): model_helper.AddVideoInput(test_data_loader, test_reader, **video_input_args) if num_gpus > 0: data_parallel_model.Parallelize_GPU( test_data_loader, input_builder_fun=test_input_fn, forward_pass_builder_fun=empty_function, param_update_builder_fun=None, devices=gpus, optimize_gradient_memory=True, ) data_parallel_model.Parallelize_GPU( test_model, input_builder_fun=empty_function, forward_pass_builder_fun=create_model_ops, param_update_builder_fun=None, devices=gpus, optimize_gradient_memory=True, ) else: test_model._device_type = caffe2_pb2.CPU test_model._devices = [0] device_opt = core.DeviceOption(test_model._device_type, 0) with core.DeviceScope(device_opt): # Because our loaded models are named with "gpu_x", # keep the naming for now. # TODO: Save model using `data_parallel_model.ExtractPredictorNet` # to extract the model for "gpu_0". It also renames # the input and output blobs by stripping the "gpu_x/" prefix with core.NameScope("{}_{}".format("gpu", 0)): test_input_fn(test_data_loader) create_model_ops(test_model, 1.0) workspace.RunNetOnce(test_data_loader.param_init_net) workspace.CreateNet(test_data_loader.net) workspace.RunNetOnce(test_model.param_init_net) workspace.CreateNet(test_model.net) if args.db_type == 'minidb': if num_gpus > 0: model_helper.LoadModel(args.load_model_path, args.db_type) else: with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU, 0)): model_helper.LoadModel(args.load_model_path, args.db_type) elif args.db_type == 'pickle': if num_gpus > 0: model_loader.LoadModelFromPickleFile(test_model, args.load_model_path, use_gpu=True, root_gpu_id=gpus[0]) else: model_loader.LoadModelFromPickleFile(test_model, args.load_model_path, use_gpu=False) else: log.warning("Unsupported db_type: {}".format(args.db_type)) data_parallel_model.FinalizeAfterCheckpoint(test_model) # metric couters for multilabel all_prob_for_map = np.empty(shape=[0, args.num_labels], dtype=np.float) all_label_for_map = np.empty(shape=[0, args.num_labels], dtype=np.int32) # metric counters for closed-world classification clip_acc = 0 video_top1 = 0 video_topk = 0 video_count = 0 clip_count = 0 num_devices = 1 # default for cpu if num_gpus > 0: num_devices = num_gpus # actual_batch_size inference_batch_size = args.crop_per_inference num_crop_per_bag = args.clip_per_video * args.crop_per_clip # make sure you do your math correctly assert num_crop_per_bag % num_crop_per_bag == 0 num_slice = int(num_crop_per_bag / inference_batch_size) for i in range(num_iter): # load one batch of data assume 1 video # which is (#clips x #crops) x 3 x crop_size x crop_size workspace.RunNet(test_data_loader.net.Proto().name) # get all data into a list, each per device (gpu) video_data = [] label_data = [] all_predicts = [] for g in range(num_devices): data = workspace.FetchBlob("gpu_{}".format(gpus[g]) + '/data') video_data.append(data) label = workspace.FetchBlob("gpu_{}".format(gpus[g]) + '/label') label_data.append(label) all_predicts.append([]) for slice in range(num_slice): for g in range(num_devices): data = video_data[g][slice * inference_batch_size:(slice + 1) * inference_batch_size, :, :, :, :] if args.multi_label: label = label_data[g][slice * inference_batch_size:(slice + 1) * inference_batch_size, :] else: label = label_data[g][slice * inference_batch_size:(slice + 1) * inference_batch_size] workspace.FeedBlob("gpu_{}".format(gpus[g]) + '/data', data) workspace.FeedBlob("gpu_{}".format(gpus[g]) + '/label', label) # do one iteration of inference over one slice across devices workspace.RunNet(test_model.net.Proto().name) for g in range(num_devices): # get predictions if args.multi_label: predicts = workspace.FetchBlob("gpu_{}".format(gpus[g]) + '/prob') else: predicts = workspace.FetchBlob("gpu_{}".format(gpus[g]) + '/softmax') assert predicts.shape[0] == inference_batch_size # accumulate predictions if all_predicts[g] == []: all_predicts[g] = predicts else: all_predicts[g] = np.concatenate( (all_predicts[g], predicts), axis=0) for g in range(num_devices): # get clip accuracy predicts = all_predicts[g] if args.multi_label: sample_label = label_data[g][0, :] else: sample_label = label_data[g][0] for k in range(num_crop_per_bag): sorted_preds = np.argsort(predicts[k, :]) sorted_preds[:] = sorted_preds[::-1] if sorted_preds[0] == sample_label: clip_acc = clip_acc + 1 # since batch_size == 1 all_clips = predicts # aggregate predictions into one video_pred = PredictionAggregation(all_clips, args.aggregation) if args.multi_label: video_pred = np.expand_dims(video_pred, axis=0) sample_label = np.expand_dims(sample_label, axis=0) all_prob_for_map = np.concatenate( (all_prob_for_map, video_pred), axis=0) all_label_for_map = np.concatenate( (all_label_for_map, sample_label), axis=0) else: sorted_video_pred = np.argsort(video_pred) sorted_video_pred[:] = sorted_video_pred[::-1] if sorted_video_pred[0] == sample_label: video_top1 = video_top1 + 1 if sample_label in sorted_video_pred[0:args.top_k]: video_topk = video_topk + 1 video_count = video_count + num_devices clip_count = clip_count + num_devices * num_crop_per_bag if i > 0 and i % args.display_iter == 0: if args.multi_label: # mAP auc, ap, wap, aps = metric.mean_ap_metric( all_prob_for_map, all_label_for_map) log.info( 'Iter {}/{}: mAUC: {}, mAP: {}, mWAP: {}, mAP_all: {}'. format(i, num_iter, auc, ap, wap, np.mean(aps))) else: # accuracy log.info('Iter {}/{}: clip: {}, top1: {}, top 5: {}'.format( i, num_iter, clip_acc / clip_count, video_top1 / video_count, video_topk / video_count)) if args.multi_label: # mAP auc, ap, wap, aps = metric.mean_ap_metric(all_prob_for_map, all_label_for_map) log.info("Test mAUC: {}, mAP: {}, mWAP: {}, mAP_all: {}".format( auc, ap, wap, np.mean(aps))) if args.print_per_class_metrics: log.info("Test mAP per class: {}".format(aps)) else: # accuracy log.info("Test accuracy: clip: {}, top 1: {}, top{}: {}".format( clip_acc / clip_count, video_top1 / video_count, args.top_k, video_topk / video_count)) if num_gpus > 0: flops, params, inters = model_helper.GetFlopsAndParams( test_model, gpus[0]) else: flops, params, inters = model_helper.GetFlopsAndParams(test_model) log.info('FLOPs: {}, params: {}, inters: {}'.format(flops, params, inters))