def get_inference_utils(opt): assert opt.inference_crop in ['center', 'nocrop'] normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [Resize(opt.sample_size)] if opt.inference_crop == 'center': spatial_transform.append(CenterCrop(opt.sample_size)) spatial_transform.append(ToTensor()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( SlidingWindow(opt.sample_duration, opt.inference_stride)) temporal_transform = TemporalCompose(temporal_transform) inference_data, collate_fn = get_inference_data(opt.inference_label_path, opt.video_id_path, 'test', opt.inference_frame_dir, opt.image_size, window_size=opt.window_size) inference_loader = torch.utils.data.DataLoader( inference_data, batch_size=opt.inference_batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=False, worker_init_fn=worker_init_fn) # collate_fn=collate_fn) return inference_loader, inference_data.class_names
def get_val_utils(opt): normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) if opt.train_crop == 'other': spatial_transform = [ Resize((opt.scale_h, opt.scale_w)), RandomCrop(opt.sample_size), ToTensor() ] else: spatial_transform = [ Resize(opt.sample_size), CenterCrop(opt.sample_size), ToTensor() ] if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( TemporalEvenCrop(opt.sample_duration, opt.n_val_samples)) temporal_transform = TemporalCompose(temporal_transform) val_data, collate_fn = get_validation_data( opt.video_path, opt.annotation_path, opt.dataset, opt.input_type, opt.file_type, spatial_transform, temporal_transform) if opt.distributed: val_sampler = torch.utils.data.distributed.DistributedSampler( val_data, shuffle=False) else: val_sampler = None val_loader = torch.utils.data.DataLoader(val_data, batch_size=(opt.batch_size // opt.n_val_samples), shuffle=False, num_workers=opt.n_threads, pin_memory=True, sampler=val_sampler, worker_init_fn=worker_init_fn, collate_fn=collate_fn) if opt.is_master_node: val_logger = Logger(opt.result_path / 'val.log', ['epoch', 'loss', 'acc', 'acc_num']) else: val_logger = None return val_loader, val_logger
def get_inference_utils(opt): assert opt.inference_crop in ['center', 'nocrop'] normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [Resize(opt.sample_size)] if opt.inference_crop == 'center': spatial_transform.append(CenterCrop(opt.sample_size)) spatial_transform.append(ToTensor()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( SlidingWindow(opt.sample_duration, opt.inference_stride)) temporal_transform = TemporalCompose(temporal_transform) inf_data_checkpoint_path = opt.result_path / Path('inf_data_' + opt.dataset + '.data') inf_collate_checkpoint_path = opt.result_path / Path('inf_coll_' + opt.dataset + '.data') if os.path.exists(inf_data_checkpoint_path) and os.path.exists( inf_collate_checkpoint_path) and opt.save_load_data_checkpoint: with open(inf_data_checkpoint_path, 'rb') as filehandle: inference_data = pickle.load(filehandle) with open(inf_collate_checkpoint_path, 'rb') as filehandle: collate_fn = pickle.load(filehandle) else: inference_data, collate_fn = get_inference_data( opt.video_path, opt.annotation_path, opt.dataset, opt.input_type, opt.file_type, opt.inference_subset, spatial_transform, temporal_transform) if opt.save_load_data_checkpoint: with open(inf_data_checkpoint_path, 'wb') as filehandle: pickle.dump(inference_data, filehandle) with open(inf_collate_checkpoint_path, 'wb') as filehandle: pickle.dump(collate_fn, filehandle) inference_loader = torch.utils.data.DataLoader( inference_data, batch_size=opt.inference_batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True, worker_init_fn=worker_init_fn, collate_fn=collate_fn) return inference_loader, inference_data.class_names
def compute_saliency_maps(model, opt): # Generate tiny data loader # Loop through it to generate saliency maps assert opt.inference_crop in ['center', 'nocrop'] normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [Resize(opt.sample_size)] if opt.inference_crop == 'center': spatial_transform.append(CenterCrop(opt.sample_size)) spatial_transform.append(ToTensor()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( SlidingWindow(opt.sample_duration, opt.inference_stride)) temporal_transform = TemporalCompose(temporal_transform) tiny_video_path = Path('/home/ruta/teeny_data/nturgb/jpg') tiny_annotation_path = Path('/home/ruta/teeny_data/ntu_01.json') tiny_data, collate_fn = get_inference_data( tiny_video_path, tiny_annotation_path, opt.dataset, opt.input_type, opt.file_type, opt.inference_subset, spatial_transform, temporal_transform) tiny_loader = torch.utils.data.DataLoader( tiny_data, batch_size=opt.inference_batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True, sampler=None, worker_init_fn=worker_init_fn, collate_fn=collate_fn) saliency_maps = [] for i, (inputs, targets) in enumerate(tiny_loader): sal_map = get_saliency_map(inputs, targets, model, opt) # Plot the saliency map using matplotlib and save to a file plot_saliency(sal_map, i, inputs, targets) saliency_maps.append(sal_map) return saliency_maps
def get_inference_utils(opt): assert opt.inference_crop in ['center', 'nocrop'] normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [Resize(opt.sample_size)] if opt.inference_crop == 'center': spatial_transform.append(CenterCrop(opt.sample_size)) spatial_transform.append(ToTensor()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( SlidingWindow(opt.sample_duration, opt.inference_stride)) temporal_transform = TemporalCompose(temporal_transform) inference_data, collate_fn = get_inference_data(opt.video_path, opt.input_type, opt.file_type, spatial_transform, temporal_transform) # inference_data, collate_fn = get_inference_data( # opt.video_path, opt.input_type, opt.file_type, # spatial_transform) inference_loader = torch.utils.data.DataLoader( inference_data, batch_size=opt.inference_batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True, worker_init_fn=worker_init_fn, collate_fn=collate_fn) df = pd.read_csv('kinetics_700_labels.csv') class_names = {} for i in range(df.shape[0]): row = df.iloc[i] class_names[row[0]] = row[1] return inference_loader, class_names
def get_val_utils(opt): normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [ Resize(opt.sample_size), CenterCrop(opt.sample_size), ToTensor() ] if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( TemporalEvenCrop(opt.sample_duration, opt.n_val_samples)) temporal_transform = TemporalCompose(temporal_transform) val_data, collate_fn = get_validation_data(opt.label_path, opt.video_id_path, 'val', opt.frame_dir, opt.image_size, window_size=opt.window_size) if opt.distributed: val_sampler = torch.utils.data.distributed.DistributedSampler( val_data, shuffle=False) else: val_sampler = None val_loader = torch.utils.data.DataLoader(val_data, batch_size=(opt.batch_size // opt.n_val_samples), shuffle=False, num_workers=opt.n_threads, pin_memory=False, sampler=val_sampler, worker_init_fn=worker_init_fn) # collate_fn=collate_fn) if opt.is_master_node: val_logger = Logger(opt.result_path / 'val.log', ['epoch', 'loss', 'acc', 'precision', 'recall', 'f1', 'tiou']) else: val_logger = None return val_loader, val_logger
def get_inference_utils(opt): assert opt.inference_crop in ['center', 'nocrop'] normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [Resize(opt.sample_size)] if opt.inference_crop == 'center': spatial_transform.append(CenterCrop(opt.sample_size)) spatial_transform.append(ToTensor()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( SlidingWindow(opt.sample_duration, opt.inference_stride)) temporal_transform = TemporalCompose(temporal_transform) inference_data, collate_fn = get_inference_data( opt.video_path, opt.annotation_path, opt.dataset, opt.input_type, opt.file_type, opt.inference_subset, spatial_transform, temporal_transform) # video_path='./jpg_mix',annotation_path='./data_dj/ucf101-01.json',dataset='ucf-101',input_type='rgb',file_type='jpg',inference_subset='val' inference_loader = torch.utils.data.DataLoader( inference_data, batch_size=opt.inference_batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True, worker_init_fn=worker_init_fn, collate_fn=collate_fn) return inference_loader, inference_data.class_names
def get_train_utils(opt, model_parameters): assert opt.train_crop in ['random', 'corner', 'center'] spatial_transform = [] if opt.train_crop == 'random': spatial_transform.append( RandomResizedCrop( opt.sample_size, (opt.train_crop_min_scale, 1.0), (opt.train_crop_min_ratio, 1.0 / opt.train_crop_min_ratio))) elif opt.train_crop == 'corner': scales = [1.0] scale_step = 1 / (2**(1 / 4)) for _ in range(1, 5): scales.append(scales[-1] * scale_step) spatial_transform.append(MultiScaleCornerCrop(opt.sample_size, scales)) elif opt.train_crop == 'center': spatial_transform.append(Resize(opt.sample_size)) spatial_transform.append(CenterCrop(opt.sample_size)) normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) if not opt.no_hflip: spatial_transform.append(RandomHorizontalFlip()) if opt.colorjitter: spatial_transform.append(ColorJitter()) spatial_transform.append(ToTensor()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.append(ScaleValue(opt.value_scale)) spatial_transform.append(normalize) spatial_transform = Compose(spatial_transform) assert opt.train_t_crop in ['random', 'center'] temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) if opt.train_t_crop == 'random': temporal_transform.append(TemporalRandomCrop(opt.sample_duration)) elif opt.train_t_crop == 'center': temporal_transform.append(TemporalCenterCrop(opt.sample_duration)) temporal_transform = TemporalCompose(temporal_transform) train_data = get_training_data(opt.video_path, opt.annotation_path, opt.dataset, opt.input_type, opt.file_type, spatial_transform, temporal_transform) if opt.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_data) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_data, batch_size=opt.batch_size, shuffle=(train_sampler is None), num_workers=opt.n_threads, pin_memory=True, sampler=train_sampler, worker_init_fn=worker_init_fn) if opt.is_master_node: train_logger = Logger(opt.result_path / 'train.log', ['epoch', 'loss', 'acc', 'lr']) train_batch_logger = Logger( opt.result_path / 'train_batch.log', ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr']) else: train_logger = None train_batch_logger = None if opt.nesterov: dampening = 0 else: dampening = opt.dampening optimizer = SGD(model_parameters, lr=opt.learning_rate, momentum=opt.momentum, dampening=dampening, weight_decay=opt.weight_decay, nesterov=opt.nesterov) assert opt.lr_scheduler in ['plateau', 'multistep'] assert not (opt.lr_scheduler == 'plateau' and opt.no_val) if opt.lr_scheduler == 'plateau': scheduler = lr_scheduler.ReduceLROnPlateau( optimizer, 'min', patience=opt.plateau_patience) else: scheduler = lr_scheduler.MultiStepLR(optimizer, opt.multistep_milestones) return (train_loader, train_sampler, train_logger, train_batch_logger, optimizer, scheduler)
no_mean_norm = False no_std_norm = False sample_size = 112 value_scale = 1 input_type = 'rgb' sample_t_stride = 1 sample_duration = 16 inference_stride = 16 #normalize = get_normalize_method(mean, std, no_mean_norm, no_std_norm) normalize = Normalize(mean, std) spatial_transform = [Resize(sample_size)] if inference_crop == 'center': spatial_transform.append(CenterCrop(sample_size)) if input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.append(ToTensor()) spatial_transform.extend([ScaleValue(value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(sample_t_stride)) temporal_transform.append(SlidingWindow(sample_duration, inference_stride)) temporal_transform = TemporalCompose(temporal_transform) # 加载模型 #print('load model begin!') model = generate_model_resnet(1) # 生成resnet模型 #model = torch.load('./save_200.pth') checkpoint = torch.load('./save_200.pth', map_location='cpu')
def get_val_utils(opt): normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [ Resize(opt.sample_size), CenterCrop(opt.sample_size), ToTensor() ] if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( TemporalEvenCrop(opt.sample_duration, opt.n_val_samples)) temporal_transform = TemporalCompose(temporal_transform) val_data_checkpoint_path = opt.result_path / Path('val_data_' + opt.dataset + '.data') val_collate_checkpoint_path = opt.result_path / Path('val_coll_' + opt.dataset + '.data') if os.path.exists(val_data_checkpoint_path) and os.path.exists( val_collate_checkpoint_path) and opt.save_load_data_checkpoint: with open(val_data_checkpoint_path, 'rb') as filehandle: val_data = pickle.load(filehandle) with open(val_collate_checkpoint_path, 'rb') as filehandle: collate_fn = pickle.load(filehandle) else: val_data, collate_fn = get_validation_data( opt.video_path, opt.annotation_path, opt.dataset, opt.input_type, opt.file_type, spatial_transform, temporal_transform) if opt.save_load_data_checkpoint: with open(val_data_checkpoint_path, 'wb') as filehandle: pickle.dump(val_data, filehandle) with open(val_collate_checkpoint_path, 'wb') as filehandle: pickle.dump(collate_fn, filehandle) if opt.distributed: val_sampler = torch.utils.data.distributed.DistributedSampler( val_data, shuffle=False) else: val_sampler = None val_loader = torch.utils.data.DataLoader(val_data, batch_size=(opt.batch_size // opt.n_val_samples), shuffle=False, num_workers=opt.n_threads, pin_memory=True, sampler=val_sampler, worker_init_fn=worker_init_fn, collate_fn=collate_fn) if opt.is_master_node: val_logger = Logger(opt.result_path / 'val.log', ['epoch', 'loss', 'acc']) else: val_logger = None return val_loader, val_logger
def get_train_utils(opt, model_parameters): assert opt.train_crop in ['random', 'corner', 'center'] spatial_transform = [] if opt.train_crop == 'random': spatial_transform.append( RandomResizedCrop( opt.sample_size, (opt.train_crop_min_scale, 1.0), (opt.train_crop_min_ratio, 1.0 / opt.train_crop_min_ratio))) elif opt.train_crop == 'corner': scales = [1.0] scale_step = 1 / (2**(1 / 4)) for _ in range(1, 5): scales.append(scales[-1] * scale_step) spatial_transform.append(MultiScaleCornerCrop(opt.sample_size, scales)) elif opt.train_crop == 'center': spatial_transform.append(Resize(opt.sample_size)) spatial_transform.append(CenterCrop(opt.sample_size)) normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) if not opt.no_hflip: spatial_transform.append(RandomHorizontalFlip()) spatial_transform.append(ToArray()) if opt.colorjitter: spatial_transform.append(ColorJitter()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.append(ScaleValue(opt.value_scale)) spatial_transform.append(normalize) spatial_transform = Compose(spatial_transform) assert opt.train_t_crop in ['random', 'center'] temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) if opt.train_t_crop == 'random': temporal_transform.append(TemporalRandomCrop(opt.sample_duration)) elif opt.train_t_crop == 'center': temporal_transform.append(TemporalCenterCrop(opt.sample_duration)) temporal_transform = TemporalCompose(temporal_transform) train_data = get_training_data(opt.video_path, opt.annotation_path, opt.dataset, opt.input_type, opt.file_type, spatial_transform, temporal_transform) train_loader = paddle.batch(train_data.reader, batch_size=opt.batch_size) train_logger = Logger(opt.result_path / 'train.log', ['epoch', 'loss', 'acc', 'lr']) train_batch_logger = Logger( opt.result_path / 'train_batch.log', ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr']) assert opt.lr_scheduler in ['plateau', 'multistep'] assert not (opt.lr_scheduler == 'plateau' and opt.no_val) if opt.lr_scheduler == 'plateau': scheduler = ReduceLROnPlateau(learning_rate=opt.learning_rate, mode='min', patience=opt.plateau_patience) else: scheduler = MultiStepDecay(learning_rate=opt.learning_rate, milestones=opt.multistep_milestones) optimizer = fluid.optimizer.MomentumOptimizer( learning_rate=scheduler, momentum=opt.momentum, parameter_list=model_parameters, use_nesterov=opt.nesterov, regularization=fluid.regularizer.L2Decay( regularization_coeff=opt.weight_decay)) return (train_loader, train_logger, train_batch_logger, optimizer, scheduler)