Пример #1
0
def get_inference_utils(opt):
    assert opt.inference_crop in ['center', 'nocrop']

    normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm,
                                     opt.no_std_norm)

    spatial_transform = [Resize(opt.sample_size)]
    if opt.inference_crop == 'center':
        spatial_transform.append(CenterCrop(opt.sample_size))
        spatial_transform.append(ToArray())
    if opt.input_type == 'flow':
        spatial_transform.append(PickFirstChannels(n=2))
    spatial_transform.extend([ScaleValue(opt.value_scale), normalize])
    spatial_transform = Compose(spatial_transform)

    temporal_transform = []
    if opt.sample_t_stride > 1:
        temporal_transform.append(TemporalSubsampling(opt.sample_t_stride))
    temporal_transform.append(
        SlidingWindow(opt.sample_duration, opt.inference_stride))
    temporal_transform = TemporalCompose(temporal_transform)

    inference_data = get_inference_data(opt.video_path, opt.annotation_path,
                                        opt.dataset, opt.input_type,
                                        opt.file_type, opt.inference_subset,
                                        spatial_transform, temporal_transform)

    inference_loader = paddle.batch(inference_data.reader,
                                    batch_size=opt.inference_batch_size)

    return inference_loader, inference_data.class_names
Пример #2
0
def get_inference_utils(opt):
    assert opt.inference_crop in ['center', 'nocrop']

    normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm,
                                     opt.no_std_norm)

    spatial_transform = [Resize(opt.sample_size)]
    if opt.inference_crop == 'center':
        spatial_transform.append(CenterCrop(opt.sample_size))
    spatial_transform.extend(
        [ToTensor(), ScaleValue(opt.value_scale), normalize])
    spatial_transform = Compose(spatial_transform)

    temporal_transform = []
    if opt.sample_t_stride > 1:
        temporal_transform.append(TemporalSubsampling(opt.sample_t_stride))
    temporal_transform.append(
        SlidingWindow(opt.sample_duration, opt.inference_stride))
    temporal_transform = TemporalCompose(temporal_transform)

    inference_data, collate_fn = get_inference_data(
        opt.video_path, opt.annotation_path, opt.dataset, opt.file_type,
        opt.inference_subset, spatial_transform, temporal_transform)

    inference_loader = torch.utils.data.DataLoader(
        inference_data,
        batch_size=opt.inference_batch_size,
        shuffle=False,
        num_workers=opt.n_threads,
        pin_memory=True,
        worker_init_fn=worker_init_fn,
        collate_fn=collate_fn)

    return inference_loader, inference_data.class_names
Пример #3
0
def get_inference_utils(opt):
    assert opt.inference_crop in ['center', 'nocrop']

    normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm,
                                     opt.no_std_norm)

    spatial_transform = [Resize(opt.sample_size)]
    if opt.inference_crop == 'center':
        spatial_transform.append(CenterCrop(opt.sample_size))
    spatial_transform.append(ToTensor())
    if opt.input_type == 'flow':
        spatial_transform.append(PickFirstChannels(n=2))
    spatial_transform.extend([ScaleValue(opt.value_scale), normalize])
    spatial_transform = Compose(spatial_transform)

    temporal_transform = []
    if opt.sample_t_stride > 1:
        temporal_transform.append(TemporalSubsampling(opt.sample_t_stride))
    temporal_transform.append(
        SlidingWindow(opt.sample_duration, opt.inference_stride))
    temporal_transform = TemporalCompose(temporal_transform)

    inference_data, collate_fn = get_inference_data(opt.inference_label_path, opt.video_id_path,
                                   'test', opt.inference_frame_dir, opt.image_size, window_size=opt.window_size)

    inference_loader = torch.utils.data.DataLoader(
        inference_data,
        batch_size=opt.inference_batch_size,
        shuffle=False,
        num_workers=opt.n_threads,
        pin_memory=False,
        worker_init_fn=worker_init_fn)
        # collate_fn=collate_fn)

    return inference_loader, inference_data.class_names
Пример #4
0
def get_val_utils(opt):
    normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm,
                                     opt.no_std_norm)
    spatial_transform = [
        Resize(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToArray(),
    ]
    if opt.input_type == 'flow':
        spatial_transform.append(PickFirstChannels(n=2))
    spatial_transform.extend([ScaleValue(opt.value_scale), normalize])
    spatial_transform = Compose(spatial_transform)

    temporal_transform = []
    if opt.sample_t_stride > 1:
        temporal_transform.append(TemporalSubsampling(opt.sample_t_stride))
    temporal_transform.append(
        TemporalEvenCrop(opt.sample_duration, opt.n_val_samples))
    temporal_transform = TemporalCompose(temporal_transform)

    val_data = get_validation_data(opt.video_path, opt.annotation_path,
                                   opt.dataset, opt.input_type, opt.file_type,
                                   spatial_transform, temporal_transform)
    val_loader = paddle.batch(val_data.reader, batch_size=opt.batch_size)

    val_logger = Logger(opt.result_path / 'val.log', ['epoch', 'loss', 'acc'])

    return val_loader, val_logger
Пример #5
0
def get_val_utils(opt):
    normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm,
                                     opt.no_std_norm)

    if opt.train_crop == 'other':
        spatial_transform = [
            Resize((opt.scale_h, opt.scale_w)),
            RandomCrop(opt.sample_size),
            ToTensor()
        ]
    else:
        spatial_transform = [
            Resize(opt.sample_size),
            CenterCrop(opt.sample_size),
            ToTensor()
        ]

    if opt.input_type == 'flow':
        spatial_transform.append(PickFirstChannels(n=2))
    spatial_transform.extend([ScaleValue(opt.value_scale), normalize])
    spatial_transform = Compose(spatial_transform)

    temporal_transform = []
    if opt.sample_t_stride > 1:
        temporal_transform.append(TemporalSubsampling(opt.sample_t_stride))

    temporal_transform.append(
        TemporalEvenCrop(opt.sample_duration, opt.n_val_samples))

    temporal_transform = TemporalCompose(temporal_transform)

    val_data, collate_fn = get_validation_data(
        opt.video_path, opt.annotation_path, opt.dataset, opt.input_type,
        opt.file_type, spatial_transform, temporal_transform)

    if opt.distributed:
        val_sampler = torch.utils.data.distributed.DistributedSampler(
            val_data, shuffle=False)
    else:
        val_sampler = None

    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=(opt.batch_size //
                                                         opt.n_val_samples),
                                             shuffle=False,
                                             num_workers=opt.n_threads,
                                             pin_memory=True,
                                             sampler=val_sampler,
                                             worker_init_fn=worker_init_fn,
                                             collate_fn=collate_fn)

    if opt.is_master_node:
        val_logger = Logger(opt.result_path / 'val.log',
                            ['epoch', 'loss', 'acc', 'acc_num'])
    else:
        val_logger = None
    return val_loader, val_logger
Пример #6
0
def get_inference_utils(opt):
    assert opt.inference_crop in ['center', 'nocrop']

    normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm,
                                     opt.no_std_norm)

    spatial_transform = [Resize(opt.sample_size)]
    if opt.inference_crop == 'center':
        spatial_transform.append(CenterCrop(opt.sample_size))
    spatial_transform.append(ToTensor())
    if opt.input_type == 'flow':
        spatial_transform.append(PickFirstChannels(n=2))
    spatial_transform.extend([ScaleValue(opt.value_scale), normalize])
    spatial_transform = Compose(spatial_transform)

    temporal_transform = []
    if opt.sample_t_stride > 1:
        temporal_transform.append(TemporalSubsampling(opt.sample_t_stride))
    temporal_transform.append(
        SlidingWindow(opt.sample_duration, opt.inference_stride))
    temporal_transform = TemporalCompose(temporal_transform)

    inf_data_checkpoint_path = opt.result_path / Path('inf_data_' +
                                                      opt.dataset + '.data')
    inf_collate_checkpoint_path = opt.result_path / Path('inf_coll_' +
                                                         opt.dataset + '.data')
    if os.path.exists(inf_data_checkpoint_path) and os.path.exists(
            inf_collate_checkpoint_path) and opt.save_load_data_checkpoint:
        with open(inf_data_checkpoint_path, 'rb') as filehandle:
            inference_data = pickle.load(filehandle)
        with open(inf_collate_checkpoint_path, 'rb') as filehandle:
            collate_fn = pickle.load(filehandle)
    else:
        inference_data, collate_fn = get_inference_data(
            opt.video_path, opt.annotation_path, opt.dataset, opt.input_type,
            opt.file_type, opt.inference_subset, spatial_transform,
            temporal_transform)
        if opt.save_load_data_checkpoint:
            with open(inf_data_checkpoint_path, 'wb') as filehandle:
                pickle.dump(inference_data, filehandle)
            with open(inf_collate_checkpoint_path, 'wb') as filehandle:
                pickle.dump(collate_fn, filehandle)

    inference_loader = torch.utils.data.DataLoader(
        inference_data,
        batch_size=opt.inference_batch_size,
        shuffle=False,
        num_workers=opt.n_threads,
        pin_memory=True,
        worker_init_fn=worker_init_fn,
        collate_fn=collate_fn)

    return inference_loader, inference_data.class_names
Пример #7
0
def compute_saliency_maps(model, opt):
    # Generate tiny data loader
    # Loop through it to generate saliency maps
    assert opt.inference_crop in ['center', 'nocrop']

    normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm,
                                     opt.no_std_norm)

    spatial_transform = [Resize(opt.sample_size)]
    if opt.inference_crop == 'center':
        spatial_transform.append(CenterCrop(opt.sample_size))
    spatial_transform.append(ToTensor())
    if opt.input_type == 'flow':
        spatial_transform.append(PickFirstChannels(n=2))
    spatial_transform.extend([ScaleValue(opt.value_scale), normalize])
    spatial_transform = Compose(spatial_transform)

    temporal_transform = []
    if opt.sample_t_stride > 1:
        temporal_transform.append(TemporalSubsampling(opt.sample_t_stride))
    temporal_transform.append(
        SlidingWindow(opt.sample_duration, opt.inference_stride))
    temporal_transform = TemporalCompose(temporal_transform)

    tiny_video_path = Path('/home/ruta/teeny_data/nturgb/jpg')
    tiny_annotation_path = Path('/home/ruta/teeny_data/ntu_01.json')
    tiny_data, collate_fn = get_inference_data(
        tiny_video_path, tiny_annotation_path, opt.dataset, opt.input_type,
        opt.file_type, opt.inference_subset, spatial_transform,
        temporal_transform)

    tiny_loader = torch.utils.data.DataLoader(
        tiny_data,
        batch_size=opt.inference_batch_size,
        shuffle=False,
        num_workers=opt.n_threads,
        pin_memory=True,
        sampler=None,
        worker_init_fn=worker_init_fn,
        collate_fn=collate_fn)

    saliency_maps = []
    for i, (inputs, targets) in enumerate(tiny_loader):
        sal_map = get_saliency_map(inputs, targets, model, opt)
        # Plot the saliency map using matplotlib and save to a file
        plot_saliency(sal_map, i, inputs, targets)
        saliency_maps.append(sal_map)

    return saliency_maps
Пример #8
0
def get_inference_utils(opt):
    assert opt.inference_crop in ['center', 'nocrop']

    normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm,
                                     opt.no_std_norm)

    spatial_transform = [Resize(opt.sample_size)]
    if opt.inference_crop == 'center':
        spatial_transform.append(CenterCrop(opt.sample_size))
    spatial_transform.append(ToTensor())
    if opt.input_type == 'flow':
        spatial_transform.append(PickFirstChannels(n=2))
    spatial_transform.extend([ScaleValue(opt.value_scale), normalize])
    spatial_transform = Compose(spatial_transform)

    temporal_transform = []
    if opt.sample_t_stride > 1:
        temporal_transform.append(TemporalSubsampling(opt.sample_t_stride))
    temporal_transform.append(
        SlidingWindow(opt.sample_duration, opt.inference_stride))
    temporal_transform = TemporalCompose(temporal_transform)

    inference_data, collate_fn = get_inference_data(opt.video_path,
                                                    opt.input_type,
                                                    opt.file_type,
                                                    spatial_transform,
                                                    temporal_transform)

    #     inference_data, collate_fn = get_inference_data(
    #         opt.video_path, opt.input_type, opt.file_type,
    #         spatial_transform)

    inference_loader = torch.utils.data.DataLoader(
        inference_data,
        batch_size=opt.inference_batch_size,
        shuffle=False,
        num_workers=opt.n_threads,
        pin_memory=True,
        worker_init_fn=worker_init_fn,
        collate_fn=collate_fn)

    df = pd.read_csv('kinetics_700_labels.csv')
    class_names = {}
    for i in range(df.shape[0]):
        row = df.iloc[i]
        class_names[row[0]] = row[1]

    return inference_loader, class_names
Пример #9
0
def retrieve_spatial_temporal_transforms(opt):
    opt.mean, opt.std = get_mean_std(opt.value_scale, dataset=opt.mean_dataset)
    normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm,
                                     opt.no_std_norm)
    spatial_transform = [Resize(opt.sample_size),
                         CenterCrop(opt.sample_size),
                         ToTensor(),
                         ScaleValue(opt.value_scale),
                         normalize]
    spatial_transform = Compose(spatial_transform)
    temporal_transform = []
    if opt.sample_t_stride > 1:
        temporal_transform.append(TemporalSubsampling(opt.sample_t_stride))
    temporal_transform.append(
        TemporalNonOverlappingWindow(opt.sample_duration))
    temporal_transform = TemporalCompose(temporal_transform)
    return spatial_transform, temporal_transform
Пример #10
0
def get_val_utils(opt):
    normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm,
                                     opt.no_std_norm)
    spatial_transform = [
        Resize(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor()
    ]
    if opt.input_type == 'flow':
        spatial_transform.append(PickFirstChannels(n=2))
    spatial_transform.extend([ScaleValue(opt.value_scale), normalize])
    spatial_transform = Compose(spatial_transform)

    temporal_transform = []
    if opt.sample_t_stride > 1:
        temporal_transform.append(TemporalSubsampling(opt.sample_t_stride))
    temporal_transform.append(
        TemporalEvenCrop(opt.sample_duration, opt.n_val_samples))
    temporal_transform = TemporalCompose(temporal_transform)

    val_data, collate_fn = get_validation_data(opt.label_path, opt.video_id_path,
                                   'val', opt.frame_dir, opt.image_size, window_size=opt.window_size)

    if opt.distributed:
        val_sampler = torch.utils.data.distributed.DistributedSampler(
            val_data, shuffle=False)
    else:
        val_sampler = None
    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=(opt.batch_size //
                                                         opt.n_val_samples),
                                             shuffle=False,
                                             num_workers=opt.n_threads,
                                             pin_memory=False,
                                             sampler=val_sampler,
                                             worker_init_fn=worker_init_fn)
                                             # collate_fn=collate_fn)

    if opt.is_master_node:
        val_logger = Logger(opt.result_path / 'val.log',
                            ['epoch', 'loss', 'acc', 'precision', 'recall', 'f1', 'tiou'])
    else:
        val_logger = None

    return val_loader, val_logger
Пример #11
0
def get_train_utils(opt, model_parameters):
    assert opt.train_crop in ['random', 'corner', 'center']
    spatial_transform = []
    if opt.train_crop == 'random':
        spatial_transform.append(
            RandomResizedCrop(
                opt.sample_size, (opt.train_crop_min_scale, 1.0),
                (opt.train_crop_min_ratio, 1.0 / opt.train_crop_min_ratio)))
    elif opt.train_crop == 'corner':
        scales = [1.0]
        scale_step = 1 / (2**(1 / 4))
        for _ in range(1, 5):
            scales.append(scales[-1] * scale_step)
        spatial_transform.append(MultiScaleCornerCrop(opt.sample_size, scales))
    elif opt.train_crop == 'center':
        spatial_transform.append(Resize(opt.sample_size))
        spatial_transform.append(CenterCrop(opt.sample_size))
    normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm,
                                     opt.no_std_norm)
    if not opt.no_hflip:
        spatial_transform.append(RandomHorizontalFlip())
    if opt.colorjitter:
        spatial_transform.append(ColorJitter())
    spatial_transform.append(ToTensor())
    if opt.input_type == 'flow':
        spatial_transform.append(PickFirstChannels(n=2))
    spatial_transform.append(ScaleValue(opt.value_scale))
    spatial_transform.append(normalize)
    spatial_transform = Compose(spatial_transform)

    assert opt.train_t_crop in ['random', 'center']
    temporal_transform = []
    if opt.sample_t_stride > 1:
        temporal_transform.append(TemporalSubsampling(opt.sample_t_stride))
    if opt.train_t_crop == 'random':
        temporal_transform.append(TemporalRandomCrop(opt.sample_duration))
    elif opt.train_t_crop == 'center':
        temporal_transform.append(TemporalCenterCrop(opt.sample_duration))
    temporal_transform = TemporalCompose(temporal_transform)

    train_data = get_training_data(opt.video_path, opt.annotation_path,
                                   opt.dataset, opt.input_type, opt.file_type,
                                   spatial_transform, temporal_transform)
    if opt.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_data)
    else:
        train_sampler = None
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=opt.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=opt.n_threads,
                                               pin_memory=True,
                                               sampler=train_sampler,
                                               worker_init_fn=worker_init_fn)

    if opt.is_master_node:
        train_logger = Logger(opt.result_path / 'train.log',
                              ['epoch', 'loss', 'acc', 'lr'])
        train_batch_logger = Logger(
            opt.result_path / 'train_batch.log',
            ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr'])
    else:
        train_logger = None
        train_batch_logger = None

    if opt.nesterov:
        dampening = 0
    else:
        dampening = opt.dampening
    optimizer = SGD(model_parameters,
                    lr=opt.learning_rate,
                    momentum=opt.momentum,
                    dampening=dampening,
                    weight_decay=opt.weight_decay,
                    nesterov=opt.nesterov)

    assert opt.lr_scheduler in ['plateau', 'multistep']
    assert not (opt.lr_scheduler == 'plateau' and opt.no_val)
    if opt.lr_scheduler == 'plateau':
        scheduler = lr_scheduler.ReduceLROnPlateau(
            optimizer, 'min', patience=opt.plateau_patience)
    else:
        scheduler = lr_scheduler.MultiStepLR(optimizer,
                                             opt.multistep_milestones)

    return (train_loader, train_sampler, train_logger, train_batch_logger,
            optimizer, scheduler)
inference_stride = 16

#normalize = get_normalize_method(mean, std, no_mean_norm, no_std_norm)
normalize = Normalize(mean, std)
spatial_transform = [Resize(sample_size)]
if inference_crop == 'center':
    spatial_transform.append(CenterCrop(sample_size))
if input_type == 'flow':
    spatial_transform.append(PickFirstChannels(n=2))
spatial_transform.append(ToTensor())
spatial_transform.extend([ScaleValue(value_scale), normalize])
spatial_transform = Compose(spatial_transform)

temporal_transform = []
if sample_t_stride > 1:
    temporal_transform.append(TemporalSubsampling(sample_t_stride))
temporal_transform.append(SlidingWindow(sample_duration, inference_stride))
temporal_transform = TemporalCompose(temporal_transform)

# 加载模型
#print('load model begin!')
model = generate_model_resnet(1)  # 生成resnet模型
#model = torch.load('./save_200.pth')
checkpoint = torch.load('./save_200.pth', map_location='cpu')
model.load_state_dict(checkpoint['state_dict'])
#print(model)
model.eval()  # 固定batchnorm,dropout等,一定要有
model = model.to(device)
#print('load model done!')

count = 0
Пример #13
0
def get_val_utils(opt):
    normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm,
                                     opt.no_std_norm)
    spatial_transform = [
        Resize(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor()
    ]
    if opt.input_type == 'flow':
        spatial_transform.append(PickFirstChannels(n=2))
    spatial_transform.extend([ScaleValue(opt.value_scale), normalize])
    spatial_transform = Compose(spatial_transform)

    temporal_transform = []
    if opt.sample_t_stride > 1:
        temporal_transform.append(TemporalSubsampling(opt.sample_t_stride))
    temporal_transform.append(
        TemporalEvenCrop(opt.sample_duration, opt.n_val_samples))
    temporal_transform = TemporalCompose(temporal_transform)

    val_data_checkpoint_path = opt.result_path / Path('val_data_' +
                                                      opt.dataset + '.data')
    val_collate_checkpoint_path = opt.result_path / Path('val_coll_' +
                                                         opt.dataset + '.data')
    if os.path.exists(val_data_checkpoint_path) and os.path.exists(
            val_collate_checkpoint_path) and opt.save_load_data_checkpoint:
        with open(val_data_checkpoint_path, 'rb') as filehandle:
            val_data = pickle.load(filehandle)
        with open(val_collate_checkpoint_path, 'rb') as filehandle:
            collate_fn = pickle.load(filehandle)
    else:
        val_data, collate_fn = get_validation_data(
            opt.video_path, opt.annotation_path, opt.dataset, opt.input_type,
            opt.file_type, spatial_transform, temporal_transform)
        if opt.save_load_data_checkpoint:
            with open(val_data_checkpoint_path, 'wb') as filehandle:
                pickle.dump(val_data, filehandle)
            with open(val_collate_checkpoint_path, 'wb') as filehandle:
                pickle.dump(collate_fn, filehandle)

    if opt.distributed:
        val_sampler = torch.utils.data.distributed.DistributedSampler(
            val_data, shuffle=False)
    else:
        val_sampler = None
    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=(opt.batch_size //
                                                         opt.n_val_samples),
                                             shuffle=False,
                                             num_workers=opt.n_threads,
                                             pin_memory=True,
                                             sampler=val_sampler,
                                             worker_init_fn=worker_init_fn,
                                             collate_fn=collate_fn)

    if opt.is_master_node:
        val_logger = Logger(opt.result_path / 'val.log',
                            ['epoch', 'loss', 'acc'])
    else:
        val_logger = None

    return val_loader, val_logger
Пример #14
0
def get_train_utils(opt, model_parameters):
    assert opt.train_crop in ['random', 'corner', 'center']
    spatial_transform = []
    if opt.train_crop == 'random':
        spatial_transform.append(
            RandomResizedCrop(
                opt.sample_size, (opt.train_crop_min_scale, 1.0),
                (opt.train_crop_min_ratio, 1.0 / opt.train_crop_min_ratio)))
    elif opt.train_crop == 'corner':
        scales = [1.0]
        scale_step = 1 / (2**(1 / 4))
        for _ in range(1, 5):
            scales.append(scales[-1] * scale_step)
        spatial_transform.append(MultiScaleCornerCrop(opt.sample_size, scales))
    elif opt.train_crop == 'center':
        spatial_transform.append(Resize(opt.sample_size))
        spatial_transform.append(CenterCrop(opt.sample_size))
    normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm,
                                     opt.no_std_norm)
    if not opt.no_hflip:
        spatial_transform.append(RandomHorizontalFlip())
    spatial_transform.append(ToArray())
    if opt.colorjitter:
        spatial_transform.append(ColorJitter())
    if opt.input_type == 'flow':
        spatial_transform.append(PickFirstChannels(n=2))
    spatial_transform.append(ScaleValue(opt.value_scale))
    spatial_transform.append(normalize)
    spatial_transform = Compose(spatial_transform)

    assert opt.train_t_crop in ['random', 'center']
    temporal_transform = []
    if opt.sample_t_stride > 1:
        temporal_transform.append(TemporalSubsampling(opt.sample_t_stride))
    if opt.train_t_crop == 'random':
        temporal_transform.append(TemporalRandomCrop(opt.sample_duration))
    elif opt.train_t_crop == 'center':
        temporal_transform.append(TemporalCenterCrop(opt.sample_duration))
    temporal_transform = TemporalCompose(temporal_transform)

    train_data = get_training_data(opt.video_path, opt.annotation_path,
                                   opt.dataset, opt.input_type, opt.file_type,
                                   spatial_transform, temporal_transform)
    train_loader = paddle.batch(train_data.reader, batch_size=opt.batch_size)

    train_logger = Logger(opt.result_path / 'train.log',
                          ['epoch', 'loss', 'acc', 'lr'])
    train_batch_logger = Logger(
        opt.result_path / 'train_batch.log',
        ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr'])

    assert opt.lr_scheduler in ['plateau', 'multistep']
    assert not (opt.lr_scheduler == 'plateau' and opt.no_val)
    if opt.lr_scheduler == 'plateau':
        scheduler = ReduceLROnPlateau(learning_rate=opt.learning_rate,
                                      mode='min',
                                      patience=opt.plateau_patience)
    else:
        scheduler = MultiStepDecay(learning_rate=opt.learning_rate,
                                   milestones=opt.multistep_milestones)

    optimizer = fluid.optimizer.MomentumOptimizer(
        learning_rate=scheduler,
        momentum=opt.momentum,
        parameter_list=model_parameters,
        use_nesterov=opt.nesterov,
        regularization=fluid.regularizer.L2Decay(
            regularization_coeff=opt.weight_decay))

    return (train_loader, train_logger, train_batch_logger, optimizer,
            scheduler)
    def score(self):

        normalize = get_normalize_method(self.opt.mean, self.opt.std, self.opt.no_mean_norm,
                                         self.opt.no_std_norm)
        spatial_transform = [
            Resize(self.opt.sample_size),
            CenterCrop(self.opt.sample_size),
            ToTensor()
        ]

        spatial_transform.extend([ScaleValue(self.opt.value_scale), normalize])
        spatial_transform = Compose(spatial_transform)

        temporal_transform = []
        if self.opt.sample_t_stride > 1:
            temporal_transform.append(TemporalSubsampling(self.opt.sample_t_stride))
        temporal_transform.append(
            TemporalEvenCrop(self.opt.sample_duration, self.opt.n_val_samples))
        temporal_transform = TemporalCompose(temporal_transform)


        frame_count = get_n_frames(self.opt.video_jpgs_dir_path)

        frame_indices = list(range(0, frame_count))

        frame_indices = temporal_transform(frame_indices)

        spatial_transform.randomize_parameters()

        image_name_formatter = lambda x: f'image_{x:05d}.jpg'

        loader = VideoLoader(image_name_formatter)

        print('frame_indices', frame_indices)

        #clips = []
        video_outputs = []
        model = generate_model(self.opt)


        model = load_pretrained_model(model, self.opt.pretrain_path, self.opt.model,
                                      self.opt.n_finetune_classes)

        i =0
        for frame_indice in frame_indices:
            print("%d indice: %s" % (i, str(frame_indice)))
            i+=1

            clip = loader(self.opt.video_jpgs_dir_path, frame_indice)



            clip = [spatial_transform(img) for img in clip]
            clip = torch.stack(clip, 0).permute(1, 0, 2, 3)





            #parameters = get_fine_tuning_parameters(model, opt.ft_begin_module)


            #print('clips:', clips)


            #for clip in clips:
            with torch.no_grad():

                print(clip.shape)
                output = model(torch.unsqueeze(clip, 0))
                output = F.softmax(output, dim=1).cpu()

                #print(output)
                video_outputs.append(output[0])

            del clip

        video_outputs = torch.stack(video_outputs)
        average_scores = torch.mean(video_outputs, dim=0)

        #inference_loader, inference_class_names = main.get_inference_utils(self.opt)
        with self.opt.annotation_path.open('r') as f:
            data = json.load(f)

        class_to_idx = get_class_labels(data)
        idx_to_class = {}
        for name, label in class_to_idx.items():
            idx_to_class[label] = name
        print(idx_to_class)

        inference_result = inference.get_video_results(
            average_scores, idx_to_class, self.opt.output_topk)

        print(inference_result)