Пример #1
0
def Test(args):
    if args.gpus is not None:
        gpus = [int(x) for x in args.gpus.split(',')]
        num_gpus = len(gpus)
    else:
        gpus = range(args.num_gpus)
        num_gpus = args.num_gpus

    if num_gpus > 0:
        total_batch_size = args.batch_size * num_gpus
        log.info("Running on GPUs: {}".format(gpus))
        log.info("total_batch_size: {}".format(total_batch_size))
    else:
        total_batch_size = args.batch_size
        log.info("Running on CPU")
        log.info("total_batch_size: {}".format(total_batch_size))

    # Model building functions
    def create_model_ops(model, loss_scale):
        return model_builder.build_model(
            model=model,
            model_name=args.model_name,
            model_depth=args.model_depth,
            num_labels=args.num_labels,
            num_channels=args.num_channels,
            crop_size=args.crop_size,
            clip_length=(
                args.clip_length_of if args.input_type == 1
                else args.clip_length_rgb
            ),
            loss_scale=loss_scale,
            is_test=1,
            pred_layer_name=args.pred_layer_name,
        )

    test_model = cnn.CNNModelHelper(
        order="NCHW",
        name="video_model_test",
        use_cudnn=(True if args.use_cudnn == 1 else False),
        cudnn_exhaustive_search=True,
    )

    test_reader, number_of_examples = model_builder.create_data_reader(
        test_model,
        name="test_reader",
        input_data=args.test_data,
    )

    if args.num_iter <= 0:
        num_iter = int(number_of_examples / total_batch_size)
    else:
        num_iter = args.num_iter

    def test_input_fn(model):
        model_helper.AddVideoInput(
            test_model,
            test_reader,
            batch_size=args.batch_size,
            clip_per_video=args.clip_per_video,
            decode_type=1,
            length_rgb=args.clip_length_rgb,
            sampling_rate_rgb=args.sampling_rate_rgb,
            scale_h=args.scale_h,
            scale_w=args.scale_w,
            crop_size=args.crop_size,
            num_decode_threads=4,
            num_of_class=args.num_labels,
            random_mirror=False,
            random_crop=False,
            input_type=args.input_type,
            length_of=args.clip_length_of,
            sampling_rate_of=args.sampling_rate_of,
            frame_gap_of=args.frame_gap_of,
            do_flow_aggregation=args.do_flow_aggregation,
            flow_data_type=args.flow_data_type,
            get_rgb=(args.input_type == 0),
            get_optical_flow=(args.input_type == 1),
            get_video_id=args.get_video_id,
            use_local_file=args.use_local_file,
        )

    if num_gpus > 0:
        data_parallel_model.Parallelize_GPU(
            test_model,
            input_builder_fun=test_input_fn,
            forward_pass_builder_fun=create_model_ops,
            param_update_builder_fun=None,
            devices=gpus
        )
    else:
        test_model._device_type = caffe2_pb2.CPU
        test_model._devices = [0]
        device_opt = core.DeviceOption(test_model._device_type, 0)
        with core.DeviceScope(device_opt):
            # Because our loaded models are named with "gpu_x", keep the naming for now.
            # TODO: Save model using `data_parallel_model.ExtractPredictorNet`
            # to extract the model for "gpu_0". It also renames
            # the input and output blobs by stripping the "gpu_x/" prefix
            with core.NameScope("{}_{}".format("gpu", 0)):
                test_input_fn(test_model)
                create_model_ops(test_model, 1.0)

    workspace.RunNetOnce(test_model.param_init_net)
    workspace.CreateNet(test_model.net)

    if args.db_type == 'minidb':
        if num_gpus > 0:
            model_helper.LoadModel(args.load_model_path, args.db_type)
            data_parallel_model.FinalizeAfterCheckpoint(test_model)
        else:
            with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU, 0)):
                model_helper.LoadModel(args.load_model_path, args.db_type)
    elif args.db_type == 'pickle':
        if num_gpus > 0:
            model_loader.LoadModelFromPickleFile(
                test_model,
                args.load_model_path,
                use_gpu=True,
                root_gpu_id=gpus[0]
            )
            data_parallel_model.FinalizeAfterCheckpoint(test_model)
        else:
            model_loader.LoadModelFromPickleFile(
                test_model,
                args.load_model_path,
                use_gpu=False
            )
    else:
        log.warning("Unsupported db_type: {}".format(args.db_type))


    # metric counters for classification
    clip_acc = 0
    video_top1 = 0
    video_topk = 0
    video_count = 0
    clip_count = 0

    for i in range(num_iter):
        workspace.RunNet(test_model.net.Proto().name)
        num_devices = 1  # default for cpu
        if args.num_gpus > 0:
            num_devices = args.num_gpus

        for g in range(num_devices):
            # get labels
            label = workspace.FetchBlob(
                "gpu_{}".format(g) + '/label'
            )
            # get predictions
            predicts = workspace.FetchBlob("gpu_{}".format(g) + '/softmax')
            assert predicts.shape[0] == args.batch_size * args.clip_per_video

            for j in range(args.batch_size):
                # get label for one video
                sample_label = label[j * args.clip_per_video]
                # get clip accuracy
                for k in range(args.clip_per_video):
                    c1, _ = metric.accuracy_metric(
                        predicts[j * args.clip_per_video + k, :],
                        label[j * args.clip_per_video + k])
                    clip_acc = clip_acc + c1
                # get all clip predictions for one video
                all_clips = predicts[
                    j * args.clip_per_video:(j + 1) * args.clip_per_video, :]
                # aggregate predictions into one
                video_pred = PredictionAggregation(all_clips, args.aggregation)
                c1, ck = metric.accuracy_metric(
                    video_pred, sample_label, args.top_k)
                video_top1 = video_top1 + c1
                video_topk = video_topk + ck

            video_count = video_count + args.batch_size
            clip_count = clip_count + label.shape[0]

        if i > 0 and i % args.display_iter == 0:
            log.info('Iter {}/{}: clip: {}, top1: {}, top 5: {}'.format(
                i,
                num_iter,
                clip_acc / clip_count,
                video_top1 / video_count,
                video_topk / video_count))

    log.info("Test accuracy: clip: {}, top 1: {}, top{}: {}".format(
        clip_acc / clip_count,
        video_top1 / video_count,
        args.top_k,
        video_topk / video_count
    ))

    if num_gpus > 0:
        flops, params = model_helper.GetFlopsAndParams(test_model, gpus[0])
    else:
        flops, params = model_helper.GetFlopsAndParams(test_model)
    log.info('FLOPs: {}, params: {}'.format(flops, params))
Пример #2
0
def Test(args):
    assert args.batch_size == 1  # large testing assume batch size one
    if args.gpus is not None:
        gpus = [int(x) for x in args.gpus.split(',')]
        num_gpus = len(gpus)
    else:
        gpus = range(args.num_gpus)
        num_gpus = args.num_gpus

    if num_gpus > 0:
        total_batch_size = args.batch_size * num_gpus
        log.info("Running on GPUs: {}".format(gpus))
        log.info("total_batch_size: {}".format(total_batch_size))
    else:
        total_batch_size = args.batch_size
        log.info("Running on CPU")
        log.info("total_batch_size: {}".format(total_batch_size))

    video_input_args = dict(
        batch_size=args.batch_size,
        clip_per_video=args.clip_per_video,
        decode_type=1,
        length_rgb=args.clip_length_rgb,
        sampling_rate_rgb=args.sampling_rate_rgb,
        scale_h=args.scale_h,
        scale_w=args.scale_w,
        crop_size=args.crop_size,
        video_res_type=args.video_res_type,
        short_edge=min(args.scale_h, args.scale_w),
        num_decode_threads=args.num_decode_threads,
        do_multi_label=args.multi_label,
        num_of_class=args.num_labels,
        random_mirror=False,
        random_crop=False,
        input_type=args.input_type,
        length_of=args.clip_length_of,
        sampling_rate_of=args.sampling_rate_of,
        frame_gap_of=args.frame_gap_of,
        do_flow_aggregation=args.do_flow_aggregation,
        flow_data_type=args.flow_data_type,
        get_rgb=(args.input_type == 0 or args.input_type >= 3),
        get_optical_flow=(args.input_type == 1 or args.input_type >= 4),
        use_local_file=args.use_local_file,
        crop_per_clip=args.crop_per_clip,
    )

    reader_args = dict(
        name="test_reader",
        input_data=args.test_data,
    )

    # Model building functions
    def create_model_ops(model, loss_scale):
        return model_builder.build_model(
            model=model,
            model_name=args.model_name,
            model_depth=args.model_depth,
            num_labels=args.num_labels,
            batch_size=args.batch_size * args.clip_per_video,
            num_channels=args.num_channels,
            crop_size=args.crop_size,
            clip_length=(args.clip_length_of
                         if args.input_type == 1 else args.clip_length_rgb),
            loss_scale=loss_scale,
            is_test=1,
            pred_layer_name=args.pred_layer_name,
            multi_label=args.multi_label,
            channel_multiplier=args.channel_multiplier,
            bottleneck_multiplier=args.bottleneck_multiplier,
            use_dropout=args.use_dropout,
            conv1_temporal_stride=args.conv1_temporal_stride,
            conv1_temporal_kernel=args.conv1_temporal_kernel,
            use_convolutional_pred=args.use_convolutional_pred,
            use_pool1=args.use_pool1,
        )

    def empty_function(model, loss_scale=1):
        # null
        return

    test_data_loader = cnn.CNNModelHelper(
        order="NCHW",
        name="data_loader",
    )
    test_model = cnn.CNNModelHelper(
        order="NCHW",
        name="video_model",
        use_cudnn=(True if args.use_cudnn == 1 else False),
        cudnn_exhaustive_search=True,
    )

    test_reader, number_of_examples = reader_utils.create_data_reader(
        test_data_loader, **reader_args)

    if args.num_iter <= 0:
        num_iter = int(math.ceil(number_of_examples / total_batch_size))
    else:
        num_iter = args.num_iter

    def test_input_fn(model):
        model_helper.AddVideoInput(test_data_loader, test_reader,
                                   **video_input_args)

    if num_gpus > 0:
        data_parallel_model.Parallelize_GPU(
            test_data_loader,
            input_builder_fun=test_input_fn,
            forward_pass_builder_fun=empty_function,
            param_update_builder_fun=None,
            devices=gpus,
            optimize_gradient_memory=True,
        )
        data_parallel_model.Parallelize_GPU(
            test_model,
            input_builder_fun=empty_function,
            forward_pass_builder_fun=create_model_ops,
            param_update_builder_fun=None,
            devices=gpus,
            optimize_gradient_memory=True,
        )
    else:
        test_model._device_type = caffe2_pb2.CPU
        test_model._devices = [0]
        device_opt = core.DeviceOption(test_model._device_type, 0)
        with core.DeviceScope(device_opt):
            # Because our loaded models are named with "gpu_x",
            # keep the naming for now.
            # TODO: Save model using `data_parallel_model.ExtractPredictorNet`
            # to extract the model for "gpu_0". It also renames
            # the input and output blobs by stripping the "gpu_x/" prefix
            with core.NameScope("{}_{}".format("gpu", 0)):
                test_input_fn(test_data_loader)
                create_model_ops(test_model, 1.0)

    workspace.RunNetOnce(test_data_loader.param_init_net)
    workspace.CreateNet(test_data_loader.net)
    workspace.RunNetOnce(test_model.param_init_net)
    workspace.CreateNet(test_model.net)

    if args.db_type == 'minidb':
        if num_gpus > 0:
            model_helper.LoadModel(args.load_model_path, args.db_type)
        else:
            with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU, 0)):
                model_helper.LoadModel(args.load_model_path, args.db_type)
    elif args.db_type == 'pickle':
        if num_gpus > 0:
            model_loader.LoadModelFromPickleFile(test_model,
                                                 args.load_model_path,
                                                 use_gpu=True,
                                                 root_gpu_id=gpus[0])
        else:
            model_loader.LoadModelFromPickleFile(test_model,
                                                 args.load_model_path,
                                                 use_gpu=False)
    else:
        log.warning("Unsupported db_type: {}".format(args.db_type))

    data_parallel_model.FinalizeAfterCheckpoint(test_model)

    # metric couters for multilabel
    all_prob_for_map = np.empty(shape=[0, args.num_labels], dtype=np.float)
    all_label_for_map = np.empty(shape=[0, args.num_labels], dtype=np.int32)

    # metric counters for closed-world classification
    clip_acc = 0
    video_top1 = 0
    video_topk = 0
    video_count = 0
    clip_count = 0

    num_devices = 1  # default for cpu
    if num_gpus > 0:
        num_devices = num_gpus
    # actual_batch_size
    inference_batch_size = args.crop_per_inference
    num_crop_per_bag = args.clip_per_video * args.crop_per_clip
    # make sure you do your math correctly
    assert num_crop_per_bag % num_crop_per_bag == 0
    num_slice = int(num_crop_per_bag / inference_batch_size)

    for i in range(num_iter):
        # load one batch of data assume 1 video
        # which is (#clips x #crops) x 3 x crop_size x crop_size
        workspace.RunNet(test_data_loader.net.Proto().name)

        # get all data into a list, each per device (gpu)
        video_data = []
        label_data = []
        all_predicts = []
        for g in range(num_devices):
            data = workspace.FetchBlob("gpu_{}".format(gpus[g]) + '/data')
            video_data.append(data)
            label = workspace.FetchBlob("gpu_{}".format(gpus[g]) + '/label')
            label_data.append(label)
            all_predicts.append([])

        for slice in range(num_slice):
            for g in range(num_devices):
                data = video_data[g][slice * inference_batch_size:(slice + 1) *
                                     inference_batch_size, :, :, :, :]
                if args.multi_label:
                    label = label_data[g][slice *
                                          inference_batch_size:(slice + 1) *
                                          inference_batch_size, :]
                else:
                    label = label_data[g][slice *
                                          inference_batch_size:(slice + 1) *
                                          inference_batch_size]
                workspace.FeedBlob("gpu_{}".format(gpus[g]) + '/data', data)
                workspace.FeedBlob("gpu_{}".format(gpus[g]) + '/label', label)

            # do one iteration of inference over one slice across devices
            workspace.RunNet(test_model.net.Proto().name)

            for g in range(num_devices):
                # get predictions
                if args.multi_label:
                    predicts = workspace.FetchBlob("gpu_{}".format(gpus[g]) +
                                                   '/prob')
                else:
                    predicts = workspace.FetchBlob("gpu_{}".format(gpus[g]) +
                                                   '/softmax')

                assert predicts.shape[0] == inference_batch_size

                # accumulate predictions
                if all_predicts[g] == []:
                    all_predicts[g] = predicts
                else:
                    all_predicts[g] = np.concatenate(
                        (all_predicts[g], predicts), axis=0)

        for g in range(num_devices):
            # get clip accuracy
            predicts = all_predicts[g]
            if args.multi_label:
                sample_label = label_data[g][0, :]
            else:
                sample_label = label_data[g][0]
            for k in range(num_crop_per_bag):
                sorted_preds = np.argsort(predicts[k, :])
                sorted_preds[:] = sorted_preds[::-1]
                if sorted_preds[0] == sample_label:
                    clip_acc = clip_acc + 1

            # since batch_size == 1
            all_clips = predicts
            # aggregate predictions into one
            video_pred = PredictionAggregation(all_clips, args.aggregation)
            if args.multi_label:
                video_pred = np.expand_dims(video_pred, axis=0)
                sample_label = np.expand_dims(sample_label, axis=0)
                all_prob_for_map = np.concatenate(
                    (all_prob_for_map, video_pred), axis=0)
                all_label_for_map = np.concatenate(
                    (all_label_for_map, sample_label), axis=0)
            else:
                sorted_video_pred = np.argsort(video_pred)
                sorted_video_pred[:] = sorted_video_pred[::-1]
                if sorted_video_pred[0] == sample_label:
                    video_top1 = video_top1 + 1
                if sample_label in sorted_video_pred[0:args.top_k]:
                    video_topk = video_topk + 1

        video_count = video_count + num_devices
        clip_count = clip_count + num_devices * num_crop_per_bag

        if i > 0 and i % args.display_iter == 0:
            if args.multi_label:
                # mAP
                auc, ap, wap, aps = metric.mean_ap_metric(
                    all_prob_for_map, all_label_for_map)
                log.info(
                    'Iter {}/{}: mAUC: {}, mAP: {}, mWAP: {}, mAP_all: {}'.
                    format(i, num_iter, auc, ap, wap, np.mean(aps)))
            else:
                # accuracy
                log.info('Iter {}/{}: clip: {}, top1: {}, top 5: {}'.format(
                    i, num_iter, clip_acc / clip_count,
                    video_top1 / video_count, video_topk / video_count))

    if args.multi_label:
        # mAP
        auc, ap, wap, aps = metric.mean_ap_metric(all_prob_for_map,
                                                  all_label_for_map)
        log.info("Test mAUC: {}, mAP: {}, mWAP: {}, mAP_all: {}".format(
            auc, ap, wap, np.mean(aps)))
        if args.print_per_class_metrics:
            log.info("Test mAP per class: {}".format(aps))
    else:
        # accuracy
        log.info("Test accuracy: clip: {}, top 1: {}, top{}: {}".format(
            clip_acc / clip_count, video_top1 / video_count, args.top_k,
            video_topk / video_count))

    if num_gpus > 0:
        flops, params, inters = model_helper.GetFlopsAndParams(
            test_model, gpus[0])
    else:
        flops, params, inters = model_helper.GetFlopsAndParams(test_model)
    log.info('FLOPs: {}, params: {}, inters: {}'.format(flops, params, inters))