示例#1
0
    def create_bn_aux_model(self, node_id):
        """
        bn_aux_model:
        1. It is like "train", as it uses training data.
        2. It is like "train", as only the "train" mode of bn returns sm/siv
        (sm/siv: the mean and inverse *std* of the current batch).
        3. It is like "val/test", as it does not backprop and does not update.
        4. Note: "rm/riv" is fully irrelevant in bn_aux_model.
        """
        self._model = model_builder_video.ModelBuilder(
            name='{}_bn_aux'.format(cfg.MODEL.MODEL_NAME),
            train=True,
            use_cudnn=True,
            cudnn_exhaustive_search=True,
            ws_nbytes_limit=(cfg.CUDNN_WORKSPACE_LIMIT * 1024 * 1024),
            split=cfg.TRAIN.DATA_TYPE,
            use_mem_cache=False,  # We don't cache here.
            force_fw_only=True,
        )
        self._model.build_model(node_id=node_id)

        workspace.CreateNet(self._model.net)
        # self._model.start_data_loader()

        misc.save_net_proto(self._model.net)

        self._find_bn_layers()
        self._clean_and_reset_buffer()
        return
def init_net():
    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
    np.random.seed(cfg.RNG_SEED)

    cfg.TEST.DATA_TYPE = 'test'
    if cfg.TEST.TEST_FULLY_CONV is True:
        cfg.TRAIN.CROP_SIZE = cfg.TRAIN.JITTER_SCALES[0]
        cfg.TEST.USE_MULTI_CROP = 1
    elif cfg.TEST.TEST_FULLY_CONV_FLIP is True:
        cfg.TRAIN.CROP_SIZE = cfg.TRAIN.JITTER_SCALES[0]
        cfg.TEST.USE_MULTI_CROP = 2
    else:
        cfg.TRAIN.CROP_SIZE = 224

    workspace.ResetWorkspace()

    test_model = model_builder_video.ModelBuilder(name='{}_test'.format(
        cfg.MODEL.MODEL_NAME),
                                                  train=False,
                                                  use_cudnn=True,
                                                  cudnn_exhaustive_search=True,
                                                  split=cfg.TEST.DATA_TYPE)
    test_model.build_model()

    if cfg.PROF_DAG:
        test_model.net.Proto().type = 'prof_dag'
    else:
        test_model.net.Proto().type = 'dag'

    workspace.RunNetOnce(test_model.param_init_net)
    net = test_model.net
    checkpoints.load_model_from_params_file_for_test(test_model,
                                                     cfg.TEST.PARAMS_FILE)

    # reivse the input blob from `reader_val/reader_test` to new blob that enables frame-sequence input
    clip_blob = core.BlobReference('gpu_0/data')
    net.AddExternalInput(
        clip_blob
    )  # insert op into network's head needs to rebuild the network, just add an externalinput blob is enough

    # delete the original video_input_op,  blob('gpu_0/data') is feed by this op before and by hand now
    ops = net.Proto().op
    # assert 'reader' in ops[0].name
    assert ops[0].type == 'CustomizedVideoInput'
    del ops[0]
    workspace.CreateBlob('gpu_0/data')

    workspace.CreateNet(net)
    return net
示例#3
0
def create_wrapper(is_train, lfb=None):
    """
    a simpler wrapper that creates the elements for train/test models
    """
    if is_train:
        suffix = '_train'
        split = cfg.TRAIN.DATA_TYPE
    else:
        suffix = '_test'
        split = cfg.TEST.DATA_TYPE

    model = model_builder_video.ModelBuilder(
        train=is_train,
        use_cudnn=True,
        cudnn_exhaustive_search=True,
        ws_nbytes_limit=(cfg.CUDNN_WORKSPACE_LIMIT * 1024 * 1024),
        split=split,
    )
    model.build_model(suffix=suffix, lfb=lfb)

    if cfg.PROF_DAG:
        model.net.Proto().type = 'prof_dag'
    else:
        model.net.Proto().type = 'dag'

    workspace.RunNetOnce(model.param_init_net)
    workspace.CreateNet(model.net)

    model.start_data_loader()

    timer = Timer()
    meter = metrics.MetricsCalculator(
        model=model,
        split=split,
        video_idx_to_name=model.input_db._video_idx_to_name,
        total_num_boxes=(model.input_db._num_boxes_used
                         if cfg.DATASET in ['ava', 'avabox'] else None))

    misc.save_net_proto(model.net)
    misc.save_net_proto(model.param_init_net)

    return model, timer, meter
def create_wrapper(is_train):
    """
    a simpler wrapper that creates the elements for train/test models
    """
    if is_train:
        suffix = '_train'
        split = cfg.TRAIN.DATA_TYPE
        use_mem_cache = cfg.TRAIN.MEM_CACHE
    else:  # is test
        suffix = '_test'.format(cfg.MODEL.MODEL_NAME)
        split = cfg.TEST.DATA_TYPE
        use_mem_cache = True  # we always cache for test

    model = model_builder_video.ModelBuilder(
        name=cfg.MODEL.MODEL_NAME + suffix,
        train=is_train,
        use_cudnn=True,
        cudnn_exhaustive_search=True,
        ws_nbytes_limit=(cfg.CUDNN_WORKSPACE_LIMIT * 1024 * 1024),
        split=split,
        use_mem_cache=use_mem_cache,
    )
    model.build_model()

    if cfg.PROF_DAG:
        model.net.Proto().type = 'prof_dag'
    else:
        model.net.Proto().type = 'dag'

    workspace.RunNetOnce(model.param_init_net)
    workspace.CreateNet(model.net)

    # model.start_data_loader()

    timer = Timer()
    meter = metrics.MetricsCalculator(model=model, split=split)

    misc.save_net_proto(model.net)
    misc.save_net_proto(model.param_init_net)

    return model, timer, meter
# Generate a random input. Normalize for fun.
np.random.seed(123)
data = np.random.rand(4, 3, 32, 224, 224).astype(np.float32)*255
data = (data-114.75)/57.375

#-----------------------------------------------------------------------------------------------#

from caffe2.python import workspace
from models import model_builder_video, resnet_video_org

workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
workspace.ResetWorkspace()

c2_net = model_builder_video.ModelBuilder(
        name='test', train=False,
        use_cudnn=False, cudnn_exhaustive_search=False,
        split='val')

c2_net.net.Proto().type = 'dag'

workspace.CreateBlob('data')
workspace.CreateBlob('labels')

c2_net, out_blob = resnet_video_org.create_model(model=c2_net, data='data', labels='labels', split='val', use_nl=args.model=='r50_nl')

workspace.RunNetOnce(c2_net.param_init_net)
workspace.CreateNet(c2_net.net)

# load pretrained weights
if args.model=='r50':
    wt_file = 'pretrained/i3d_baseline_32x2_IN_pretrain_400k.pkl'
示例#6
0
def get_lfb(params_file, is_train):
    """
    Wrapper function for getting an LFB, which is either inferred given a
    baseline model, or loaded from a file.
    """

    if cfg.LFB.LOAD_LFB:
        return load_lfb(is_train)

    assert params_file, 'LFB.MODEL_PARAMS_FILE is not specified.'
    logger.info('Inferring LFB from %s' % params_file)

    cfg.GET_TRAIN_LFB = is_train

    timer = Timer()

    test_model = model_builder_video.ModelBuilder(
        train=False,
        use_cudnn=True,
        cudnn_exhaustive_search=True,
        split=cfg.TEST.DATA_TYPE,
    )

    suffix = 'infer_{}'.format('train' if is_train else 'test')
    test_model.build_model(
        lfb_infer_only=True,
        suffix=suffix,
        shift=1,
    )

    if cfg.PROF_DAG:
        test_model.net.Proto().type = 'prof_dag'
    else:
        test_model.net.Proto().type = 'dag'

    workspace.RunNetOnce(test_model.param_init_net)
    workspace.CreateNet(test_model.net)

    total_test_net_iters = misc.get_total_test_iters(test_model)

    test_model.start_data_loader()

    checkpoints.load_model_from_params_file_for_test(test_model, params_file)

    all_features = []
    all_metadata = []

    for test_iter in range(total_test_net_iters):

        timer.tic()
        workspace.RunNet(test_model.net.Proto().name)
        timer.toc()

        if test_iter == 0:
            misc.print_net(test_model)
            os.system('nvidia-smi')
        if test_iter % 10 == 0:
            logger.info("Iter {}/{} Time: {}".format(test_iter,
                                                     total_test_net_iters,
                                                     timer.diff))

        if cfg.DATASET in ['ava', 'avabox']:
            all_features.append(get_features('box_pooled'))
            all_metadata.append(get_features('metadata{}'.format(suffix)))
        elif cfg.DATASET in ['charades', 'epic']:
            all_features.append(get_features('pool5'))

    lfb = construct_lfb(all_features, all_metadata, test_model.input_db,
                        is_train)

    logger.info("Shutting down data loader...")
    test_model.shutdown_data_loader()

    workspace.ResetWorkspace()
    logger.info("Done ResetWorkspace...")

    cfg.GET_TRAIN_LFB = False

    if cfg.LFB.WRITE_LFB:
        write_lfb(lfb, is_train)

    return lfb
示例#7
0
def test_net_one_section():
    """
    To save test-time memory, we perform multi-clip test in multiple "sections":
    e.g., 10-clip test can be done in 2 sections of 5-clip test
    """
    timer = Timer()
    results = []
    seen_inds = defaultdict(int)

    logger.warning('Testing started...')  # for monitoring cluster jobs
    test_model = model_builder_video.ModelBuilder(name='{}_test'.format(
        cfg.MODEL.MODEL_NAME),
                                                  train=False,
                                                  use_cudnn=True,
                                                  cudnn_exhaustive_search=True,
                                                  split=cfg.TEST.DATA_TYPE)

    test_model.build_model()

    if cfg.PROF_DAG:
        test_model.net.Proto().type = 'prof_dag'
    else:
        test_model.net.Proto().type = 'dag'

    workspace.RunNetOnce(test_model.param_init_net)
    workspace.CreateNet(test_model.net)

    misc.save_net_proto(test_model.net)
    misc.save_net_proto(test_model.param_init_net)

    total_test_net_iters = int(
        math.ceil(
            float(cfg.TEST.DATASET_SIZE * cfg.TEST.NUM_TEST_CLIPS) /
            cfg.TEST.BATCH_SIZE))

    if cfg.TEST.PARAMS_FILE:
        checkpoints.load_model_from_params_file_for_test(
            test_model, cfg.TEST.PARAMS_FILE)
    else:
        raise Exception('No params files specified for testing model.')

    for test_iter in range(total_test_net_iters):
        timer.tic()
        workspace.RunNet(test_model.net.Proto().name)
        timer.toc()

        if test_iter == 0:
            misc.print_net(test_model)
            os.system('nvidia-smi')

        test_debug = False
        if test_debug is True:
            save_path = 'temp_save/'
            data_blob = workspace.FetchBlob('gpu_0/data')
            label_blob = workspace.FetchBlob('gpu_0/labels')
            print(label_blob)
            data_blob = data_blob * cfg.MODEL.STD + cfg.MODEL.MEAN
            for i in range(data_blob.shape[0]):
                for j in range(4):
                    temp_img = data_blob[i, :, j, :, :]
                    temp_img = temp_img.transpose([1, 2, 0])
                    temp_img = temp_img.astype(np.uint8)
                    fname = save_path + 'ori_' + str(test_iter) \
                        + '_' + str(i) + '_' + str(j) + '.jpg'
                    cv2.imwrite(fname, temp_img)
        """
        When testing, we assume all samples in the same gpu are of the same id
        """
        video_ids_list = []  # for logging
        for gpu_id in range(cfg.NUM_GPUS):
            prefix = 'gpu_{}/'.format(gpu_id)

            softmax_gpu = workspace.FetchBlob(prefix + cfg.TEST.OUTPUT_NAME)
            softmax_gpu = softmax_gpu.reshape((softmax_gpu.shape[0], -1))
            video_id_gpu = workspace.FetchBlob(prefix + 'labels')

            for i in range(len(video_id_gpu)):
                seen_inds[video_id_gpu[i]] += 1

            video_ids_list.append(video_id_gpu[0])
            # print(video_id_gpu)

            # collect results
            for i in range(softmax_gpu.shape[0]):
                probs = softmax_gpu[i].tolist()
                vid = video_id_gpu[i]
                if seen_inds[vid] > cfg.TEST.NUM_TEST_CLIPS:
                    logger.warning('Video id {} have been seen. Skip.'.format(
                        vid, ))
                    continue

                save_pairs = [vid, probs]
                results.append(save_pairs)

        # ---- log
        eta = timer.average_time * (total_test_net_iters - test_iter - 1)
        eta = str(datetime.timedelta(seconds=int(eta)))
        logger.info(('{}/{} iter ({}/{} videos):' +
                     ' Time: {:.3f} (ETA: {}). ID: {}').format(
                         test_iter,
                         total_test_net_iters,
                         len(seen_inds),
                         cfg.TEST.DATASET_SIZE,
                         timer.diff,
                         eta,
                         video_ids_list,
                     ))

    return results
示例#8
0
def load_feature_map(params_file, is_train):
    assert params_file, 'FEATURE_MAP_LOADER.MODEL_PARAMS_FILE is not specified.'
    assert cfg.FEATURE_MAP_LOADER.OUT_DIR, 'FEATURE_MAP_LOADER.OUT_DIR is not specified.'
    logger.info('Inferring feature map from %s' % params_file)

    cfg.FEATURE_MAP_LOADER.ENALBE = True

    cfg.GET_TRAIN_LFB = is_train

    timer = Timer()

    test_model = model_builder_video.ModelBuilder(
        train=False,
        use_cudnn=True,
        cudnn_exhaustive_search=True,
        split=cfg.TEST.DATA_TYPE,
    )

    suffix = 'infer_{}'.format('train' if is_train else 'test')

    if cfg.LFB.ENABLED:
        lfb_path = os.path.join(cfg.LFB.LOAD_LFB_PATH,
                                'train_lfb.pkl' if is_train else 'val_lfb.pkl')
        logger.info('Loading LFB from %s' % lfb_path)
        with open(lfb_path, 'r') as f:
            lfb = pickle.load(f)

        test_model.build_model(
            lfb=lfb,
            suffix=suffix,
            shift=1,
        )

    else:
        test_model.build_model(
            lfb=None,
            suffix=suffix,
            shift=1,
        )

    if cfg.PROF_DAG:
        test_model.net.Proto().type = 'prof_dag'
    else:
        test_model.net.Proto().type = 'dag'

    workspace.RunNetOnce(test_model.param_init_net)
    workspace.CreateNet(test_model.net)

    total_test_net_iters = misc.get_total_test_iters(test_model)

    test_model.start_data_loader()

    checkpoints.load_model_from_params_file_for_test(test_model, params_file)

    all_features = {}
    for feat_name in cfg.FEATURE_MAP_LOADER.NAME_LIST:
        all_features[feat_name] = []

    all_metadata = []

    all_labels = []
    all_proposals = []
    all_original_boxes = []

    if cfg.FEATURE_MAP_LOADER.TEST_ITERS > 0:
        total_test_net_iters = cfg.FEATURE_MAP_LOADER.TEST_ITERS

    for test_iter in range(total_test_net_iters):

        timer.tic()
        workspace.RunNet(test_model.net.Proto().name)
        timer.toc()

        if test_iter == 0:
            misc.print_net(test_model)
            os.system('nvidia-smi')
        if test_iter % 10 == 0:
            logger.info("Iter {}/{} Time: {}".format(test_iter,
                                                     total_test_net_iters,
                                                     timer.diff))

        if cfg.DATASET == "ava":
            for feat_name in cfg.FEATURE_MAP_LOADER.NAME_LIST:
                all_features[feat_name].append(get_features(feat_name))

            all_metadata.append(get_features('metadata{}'.format(suffix)))

            all_labels.append(get_features('labels{}'.format(suffix)))
            all_proposals.append(get_features('proposals{}'.format(suffix)))
            all_original_boxes.append(
                get_features('original_boxes{}'.format(suffix)))

#         elif cfg.DATASET in ['charades', 'epic']:
#             all_features.append(get_features('pool5'))
        else:
            raise Exception("Dataset {} not recognized.".format(cfg.DATASET))

    lfb = construct_lfb(all_features, all_metadata, all_labels, all_proposals,
                        all_original_boxes, test_model.input_db, is_train)

    write_lfb(lfb, is_train)

    logger.info("Shutting down data loader...")
    test_model.shutdown_data_loader()

    workspace.ResetWorkspace()
    logger.info("Done ResetWorkspace...")

    cfg.GET_TRAIN_LFB = False
示例#9
0
def test_one_crop(lfb=None, suffix='', shift=None):
    """Test one crop."""
    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
    np.random.seed(cfg.RNG_SEED)

    cfg.AVA.FULL_EVAL = True

    if lfb is None and cfg.LFB.ENABLED:
        print_cfg()
        lfb = get_lfb(cfg.LFB.MODEL_PARAMS_FILE, is_train=False)

    print_cfg()

    workspace.ResetWorkspace()
    logger.info("Done ResetWorkspace...")

    timer = Timer()

    logger.warning('Testing started...')  # for monitoring cluster jobs

    if shift is None:
        shift = cfg.TEST.CROP_SHIFT
    test_model = model_builder_video.ModelBuilder(train=False,
                                                  use_cudnn=True,
                                                  cudnn_exhaustive_search=True,
                                                  split=cfg.TEST.DATA_TYPE)

    test_model.build_model(lfb=lfb, suffix=suffix, shift=shift)

    if cfg.PROF_DAG:
        test_model.net.Proto().type = 'prof_dag'
    else:
        test_model.net.Proto().type = 'dag'

    workspace.RunNetOnce(test_model.param_init_net)
    workspace.CreateNet(test_model.net)

    misc.save_net_proto(test_model.net)
    misc.save_net_proto(test_model.param_init_net)

    total_test_net_iters = misc.get_total_test_iters(test_model)

    test_model.start_data_loader()
    test_meter = metrics.MetricsCalculator(
        model=test_model,
        split=cfg.TEST.DATA_TYPE,
        video_idx_to_name=test_model.input_db._video_idx_to_name,
        total_num_boxes=(test_model.input_db._num_boxes_used
                         if cfg.DATASET in ['ava', 'avabox'] else None))

    if cfg.TEST.PARAMS_FILE:
        checkpoints.load_model_from_params_file_for_test(
            test_model, cfg.TEST.PARAMS_FILE)
    else:
        raise Exception('No params files specified for testing model.')

    begin_time = time.time()

    for test_iter in range(total_test_net_iters):
        timer.tic()
        workspace.RunNet(test_model.net.Proto().name)
        timer.toc()

        if test_iter == 0:
            misc.print_net(test_model)
            os.system('nvidia-smi')
            misc.show_flops_params(test_model)

        test_meter.calculate_and_log_all_metrics_test(test_iter, timer,
                                                      total_test_net_iters,
                                                      suffix)

    logger.info('TTTTTTTIME: {}'.format(time.time() - begin_time))

    test_meter.finalize_metrics(name=get_test_name(shift))
    test_meter.log_final_metrics(test_iter, total_test_net_iters)
    test_model.shutdown_data_loader()
示例#10
0
def test_net_one_section(full_label_fname=None, store_vis=False):
    """
    To save test-time memory, we perform multi-clip test in multiple
    "sections":
    e.g., 10-clip test can be done in 2 sections of 5-clip test
    Args:
        full_label_id: If set uses this LMDB file, and assumes the full labels
            are being provided
        store_vis: Store visualization of what the model learned, CAM
            style stuff
    """
    timer = Timer()
    results = []
    seen_inds = defaultdict(int)

    logger.warning('Testing started...')  # for monitoring cluster jobs
    test_model = model_builder_video.ModelBuilder(
        name='{}_test'.format(cfg.MODEL.MODEL_NAME),
        train=False,
        use_cudnn=True,
        cudnn_exhaustive_search=True,
        split=cfg.TEST.DATA_TYPE,
        split_dir_name=(full_label_fname if full_label_fname is not None else
                        cfg.TEST.DATA_TYPE))

    test_model.build_model()

    if cfg.PROF_DAG:
        test_model.net.Proto().type = 'prof_dag'
    else:
        test_model.net.Proto().type = 'dag'

    workspace.RunNetOnce(test_model.param_init_net)
    workspace.CreateNet(test_model.net)

    misc.save_net_proto(test_model.net)
    misc.save_net_proto(test_model.param_init_net)

    total_test_net_iters = int(
        math.ceil(
            float(cfg.TEST.DATASET_SIZE * cfg.TEST.NUM_TEST_CLIPS) /
            cfg.TEST.BATCH_SIZE))

    if cfg.TEST.PARAMS_FILE:
        checkpoints.load_model_from_params_file_for_test(
            test_model, cfg.TEST.PARAMS_FILE)
    else:
        cfg.TEST.PARAMS_FILE = checkpoints.get_checkpoint_resume_file()
        checkpoints.load_model_from_params_file_for_test(
            test_model, cfg.TEST.PARAMS_FILE)
        logging.info('No params file specified for testing but found the last '
                     'trained one {}'.format(cfg.TEST.PARAMS_FILE))
        # raise Exception('No params files specified for testing model.')

    for test_iter in range(total_test_net_iters):
        timer.tic()
        workspace.RunNet(test_model.net.Proto().name)
        timer.toc()

        if test_iter == 0:
            misc.print_net(test_model)
            os.system('nvidia-smi')

        test_debug = False
        if test_debug is True:
            save_path = 'temp_save/'
            data_blob = workspace.FetchBlob('gpu_0/data')
            label_blob = workspace.FetchBlob('gpu_0/labels')
            print(label_blob)
            data_blob = data_blob * cfg.MODEL.STD + cfg.MODEL.MEAN
            for i in range(data_blob.shape[0]):
                for j in range(4):
                    temp_img = data_blob[i, :, j, :, :]
                    temp_img = temp_img.transpose([1, 2, 0])
                    temp_img = temp_img.astype(np.uint8)
                    fname = save_path + 'ori_' + str(test_iter) \
                        + '_' + str(i) + '_' + str(j) + '.jpg'
                    cv2.imwrite(fname, temp_img)
        """
        When testing, we assume all samples in the same gpu are of the same id.
        ^ This comment is from the original code. Anyway not sure why it should
        be the case.. we are extracting out the labels for each element of the
        batch anyway... Where is this assumption being used?
        ^ Checked with Xiaolong, ignore this.
        """
        video_ids_list = []  # for logging
        for gpu_id in range(cfg.NUM_GPUS):
            prefix = 'gpu_{}/'.format(gpu_id)

            # Note that this is called softmax_gpu, but could also be
            # sigmoid.
            softmax_gpu = workspace.FetchBlob(prefix + 'activation')
            softmax_gpu = softmax_gpu.reshape((softmax_gpu.shape[0], -1))
            # Mean the fc7 over time and space, to get a compact feature
            # This has already been passed through AvgPool op, but might not
            # have averaged all the way
            fc7 = np.mean(workspace.FetchBlob(prefix + 'fc7'),
                          axis=(-1, -2, -3))
            # IMP! The label blob at test time contains the "index" to the
            # video, and not the video class. This is how the lmdb gen scripts
            # are set up. @xiaolonw needs it to get predictions for each video
            # and then re-reads the label file to get the actual class labels
            # to compute the test accuracy.
            video_id_gpu = workspace.FetchBlob(prefix + 'labels')
            temporal_crop_id = [None] * len(video_id_gpu)
            spatial_crop_id = [None] * len(video_id_gpu)
            if full_label_fname is not None:
                video_id_gpu, temporal_crop_id, spatial_crop_id = (
                    label_id_to_parts(video_id_gpu))

            for i in range(len(video_id_gpu)):
                seen_inds[video_id_gpu[i]] += 1

            video_ids_list.append(video_id_gpu[0])
            # print(video_id_gpu)

            if store_vis:
                save_dir = osp.join(cfg.CHECKPOINT.DIR,
                                    'vis_{}'.format(full_label_fname))
                data_blob = workspace.FetchBlob(prefix + 'data')
                label_blob = workspace.FetchBlob(prefix + 'labels')
                fc7_full = workspace.FetchBlob(prefix + 'fc7_beforeAvg')
                data_blob = data_blob * cfg.MODEL.STD + cfg.MODEL.MEAN
                for i in range(data_blob.shape[0]):
                    if temporal_crop_id[i] != 0 or spatial_crop_id[i] != 1:
                        # Only visualizing the first center clip
                        continue
                    gen_store_vis(frames=data_blob[i],
                                  fc7_feats=fc7_full[i],
                                  outfpath=osp.join(save_dir,
                                                    str(video_id_gpu[i])))

            # collect results
            for i in range(softmax_gpu.shape[0]):
                probs = softmax_gpu[i].tolist()
                vid = video_id_gpu[i]
                if seen_inds[vid] > cfg.TEST.NUM_TEST_CLIPS:
                    logger.warning('Video id {} have been seen. Skip.'.format(
                        vid, ))
                    continue

                save_pairs = [
                    vid, probs, temporal_crop_id[i], spatial_crop_id[i], fc7[i]
                ]
                results.append(save_pairs)

        # ---- log
        eta = timer.average_time * (total_test_net_iters - test_iter - 1)
        eta = str(datetime.timedelta(seconds=int(eta)))
        logger.info(('{}/{} iter ({}/{} videos):' +
                     ' Time: {:.3f} (ETA: {}). ID: {}').format(
                         test_iter,
                         total_test_net_iters,
                         len(seen_inds),
                         cfg.TEST.DATASET_SIZE,
                         timer.diff,
                         eta,
                         video_ids_list,
                     ))

    return results