def set_model(args, n_data): # set the model if args.model == 'c3d': model = C3D(with_classifier=False) elif args.model == 'r3d': model = R3DNet(layer_sizes=(1, 1, 1, 1), with_classifier=False) elif args.model == 'r21d': model = R2Plus1DNet(layer_sizes=(1, 1, 1, 1), with_classifier=False) if args.intra_neg: contrast = NCEAverage(args.feat_dim, n_data, args.nce_k, args.nce_t, args.nce_m, args.softmax) else: contrast = NCEAverage_ori(args.feat_dim, n_data, args.nce_k, args.nce_t, args.nce_m, args.softmax) criterion_1 = NCESoftmaxLoss() if args.softmax else NCECriterion(n_data) criterion_2 = NCESoftmaxLoss() if args.softmax else NCECriterion(n_data) # GPU mode model = model.cuda() contrast = contrast.cuda() criterion_1 = criterion_1.cuda() criterion_2 = criterion_2.cuda() cudnn.benchmark = True return model, contrast, criterion_1, criterion_2
torch.manual_seed(args.seed) if args.gpu: torch.cuda.manual_seed_all(args.seed) ########### model ############## if args.dataset == 'ucf101': class_num = 101 elif args.dataset == 'hmdb51': class_num = 51 elif args.dataset == 'K400': class_num = 400 if args.model == 'c3d': model = C3D(with_classifier=True, num_classes=class_num).cuda() elif args.model == 'r3d': model = R3DNet(layer_sizes=(1,1,1,1), with_classifier=True, num_classes=class_num).cuda() elif args.model == 'r21d': model = R2Plus1DNet(layer_sizes=(1,1,1,1), with_classifier=True, num_classes=class_num).cuda() elif args.model == 's3d': model = S3D(num_classes=class_num, space_to_depth=False, with_classifier=True).cuda() if args.ckpt: pretrained_weights = torch.load(args.ckpt)['model'] model.load_state_dict({k.replace('module.base_network.',''):v for k,v in pretrained_weights.items()},strict=False) if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model, device_ids=[0]).cuda() if args.desp: exp_name = 'K400_TCG_split1_finetuned_loss_{}_cl{}_{}_{}'.format(args.model, args.cl, args.desp, time.strftime('%m%d%H%M')) else:
#os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # Force the pytorch to create context on the specific device os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3,4,5,6,7" device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu") if args.seed: random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) ########### model ############## if args.model == 'c3d': base = C3D(with_classifier=False).to(device) elif args.model == 'r3d': base = R3DNet(layer_sizes=(3, 4, 6, 3), with_classifier=False).to(device) elif args.model == 'r21d': base = R2Plus1DNet(layer_sizes=(1, 1, 1, 1), with_classifier=False).to(device) elif args.model == 'i3d': base = InceptionI3d(final_endpoint='Logits', with_classifier=False).to(device) elif args.model == 'r3d50': base = resnet50(sample_size=112, sample_duration=16, with_classifier=False, return_conv=False).to(device) elif args.model == 's3d': base = S3D(gating=True, with_classifier=False, return_conv=False).to(device)
def extract_feature(args): """Extract and save features for train split, several clips per video.""" torch.backends.cudnn.benchmark = True # Force the pytorch to create context on the specific device #os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu") ########### model ############## if args.model == 'c3d': model = C3D(with_classifier=False, return_conv=True).to(device) elif args.model == 'r3d': model = R3DNet(layer_sizes=(1, 1, 1, 1), with_classifier=False, return_conv=True).to(device) elif args.model == 'r21d': model = R2Plus1DNet(layer_sizes=(1, 1, 1, 1), with_classifier=False, return_conv=True).to(device) elif args.model == 's3d': model = S3D(space_to_depth=False, with_classifier=False, return_conv=True).to(device) if args.ckpt: pretrained_weights = torch.load(args.ckpt)['model'] model.load_state_dict( { k.replace('module.base_network.', ''): v for k, v in pretrained_weights.items() }, strict=False) if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model, device_ids=[0, 1]).cuda() model.eval() torch.set_grad_enabled(False) ### Exract for train split ### train_transforms = transforms.Compose([ transforms.Resize((128, 171)), transforms.CenterCrop(112), transforms.ToTensor() ]) if args.dataset == 'ucf101': train_dataset = UCF101ClipRetrievalDataset('data/ucf101', 16, 10, True, train_transforms) elif args.dataset == 'hmdb51': train_dataset = HMDB51ClipRetrievalDataset('data/hmdb51', 16, 10, True, train_transforms) elif args.dataset == 'K400': train_dataset = K400ClipRetrievalDataset('data/K400', 16, 10, True, train_transforms) train_dataloader = DataLoader(train_dataset, batch_size=args.bs, shuffle=False, num_workers=args.workers, pin_memory=True, drop_last=True) features = [] classes = [] for data in tqdm(train_dataloader): sampled_clips, idxs = data clips = sampled_clips.reshape((-1, 3, 16, 112, 112)) inputs = clips.to(device) # forward outputs = model(inputs) # print(outputs.shape) # exit() features.append(outputs.cpu().numpy().tolist()) classes.append(idxs.cpu().numpy().tolist()) features = np.array(features).reshape(-1, 10, outputs.shape[1]) classes = np.array(classes).reshape(-1, 10) np.save(os.path.join(args.feature_dir, 'train_feature.npy'), features) np.save(os.path.join(args.feature_dir, 'train_class.npy'), classes) ### Exract for test split ### test_transforms = transforms.Compose([ transforms.Resize((128, 171)), transforms.CenterCrop(112), transforms.ToTensor() ]) if args.dataset == 'ucf101': test_dataset = UCF101ClipRetrievalDataset('data/ucf101', 16, 10, False, test_transforms) elif args.dataset == 'hmdb51': test_dataset = HMDB51ClipRetrievalDataset('data/hmdb51', 16, 10, False, test_transforms) elif args.dataset == 'K400': test_dataset = K400ClipRetrievalDataset('data/K400', 16, 10, False, test_transforms) test_dataloader = DataLoader(test_dataset, batch_size=args.bs, shuffle=False, num_workers=args.workers, pin_memory=True, drop_last=True) features = [] classes = [] for data in tqdm(test_dataloader): sampled_clips, idxs = data clips = sampled_clips.reshape((-1, 3, 16, 112, 112)) inputs = clips.to(device) # forward outputs = model(inputs) features.append(outputs.cpu().numpy().tolist()) classes.append(idxs.cpu().numpy().tolist()) features = np.array(features).reshape(-1, 10, outputs.shape[1]) classes = np.array(classes).reshape(-1, 10) np.save(os.path.join(args.feature_dir, 'test_feature.npy'), features) np.save(os.path.join(args.feature_dir, 'test_class.npy'), classes)
def extract_feature(args): """Extract and save features for train split, several clips per video.""" torch.backends.cudnn.benchmark = True device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu") ########### model ############## if args.model == 'c3d': model = C3D(with_classifier=False, return_conv=True).to(device) elif args.model == 'r3d': model = R3DNet(layer_sizes=(1, 1, 1, 1), with_classifier=False, return_conv=True).to(device) elif args.model == 'r21d': model = R2Plus1DNet(layer_sizes=(1, 1, 1, 1), with_classifier=False, return_conv=True).to(device) #model = nn.DataParallel(model) if args.ckpt: pretrained_weights = torch.load(args.ckpt)['model'] model.load_state_dict(pretrained_weights, strict=True) model.eval() torch.set_grad_enabled(False) ### Exract for train split ### train_transforms = transforms.Compose([ transforms.Resize((128, 171)), transforms.CenterCrop(112), transforms.ToTensor() ]) if args.dataset == 'ucf101': train_dataset = UCF101ClipRetrievalDataset('data/ucf101', 16, 10, True, train_transforms) elif args.dataset == 'hmdb51': train_dataset = HMDB51ClipRetrievalDataset('data/hmdb51', 16, 10, True, train_transforms) train_dataloader = DataLoader(train_dataset, batch_size=args.bs, shuffle=False, num_workers=args.workers, pin_memory=True, drop_last=True) features = [] classes = [] for data in tqdm(train_dataloader): sampled_clips, u_clips, v_clips, idxs = data if args.modality == 'u': input_clips = u_clips elif args.modality == 'v': input_clips = v_clips else: # rgb and res input_clips = sampled_clips clips = input_clips.reshape((-1, 3, 16, 112, 112)) inputs = clips.to(device) if args.modality == 'res': outputs = model(diff(inputs)) else: outputs = model(inputs) if args.merge: rgb_clips = sampled_clips.reshape((-1, 3, 16, 112, 112)).to(device) outputs_rgb = model(rgb_clips) outputs = torch.cat((outputs_rgb, outputs), 1) features.append(outputs.cpu().numpy().tolist()) classes.append(idxs.cpu().numpy().tolist()) features = np.array(features).reshape(-1, 10, outputs.shape[1]) classes = np.array(classes).reshape(-1, 10) np.save(os.path.join(args.feature_dir, 'train_feature.npy'), features) np.save(os.path.join(args.feature_dir, 'train_class.npy'), classes) ## Exract for test split ### test_transforms = transforms.Compose([ transforms.Resize((128, 171)), transforms.CenterCrop(112), transforms.ToTensor() ]) if args.dataset == 'ucf101': test_dataset = UCF101ClipRetrievalDataset('data/ucf101', 16, 10, False, test_transforms) elif args.dataset == 'hmdb51': test_dataset = HMDB51ClipRetrievalDataset('data/hmdb51', 16, 10, False, test_transforms) test_dataloader = DataLoader(test_dataset, batch_size=args.bs, shuffle=False, num_workers=args.workers, pin_memory=True, drop_last=True) features = [] classes = [] for data in tqdm(test_dataloader): sampled_clips, u_clips, v_clips, idxs = data if args.modality == 'u': input_clips = u_clips elif args.modality == 'v': input_clips = v_clips else: # rgb and res input_clips = sampled_clips clips = input_clips.reshape((-1, 3, 16, 112, 112)) inputs = clips.to(device) # forward if args.modality == 'res': outputs = model(diff(inputs)) else: outputs = model(inputs) if args.merge: rgb_clips = sampled_clips.reshape((-1, 3, 16, 112, 112)).to(device) outputs_rgb = model(rgb_clips) outputs = torch.cat((outputs_rgb, outputs), 1) features.append(outputs.cpu().numpy().tolist()) classes.append(idxs.cpu().numpy().tolist()) features = np.array(features).reshape(-1, 10, outputs.shape[1]) classes = np.array(classes).reshape(-1, 10) np.save(os.path.join(args.feature_dir, 'test_feature.npy'), features) np.save(os.path.join(args.feature_dir, 'test_class.npy'), classes) print('Saving features to ...', args.feature_dir)
# Force the pytorch to create context on the specific device os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu") if args.seed: random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if args.gpu: torch.cuda.manual_seed_all(args.seed) ########### model ############## if args.model == 'c3d': base = C3D(with_classifier=False) elif args.model == 'r3d': base = R3DNet(layer_sizes=(1, 1, 1, 1), with_classifier=False) elif args.model == 'r21d': base = R2Plus1DNet(layer_sizes=(1, 1, 1, 1), with_classifier=False) vcopn = VCOPN(base_network=base, feature_size=512, tuple_len=args.tl).to(device) if args.mode == 'train': ########### Train ############# if args.ckpt: # resume training vcopn.load_state_dict(torch.load(args.ckpt)) log_dir = os.path.dirname(args.ckpt) else: if args.desp: exp_name = '{}_cl{}_it{}_tl{}_{}_{}'.format( args.model, args.cl, args.it, args.tl, args.desp, time.strftime('%m%d%H%M')) else:
torch.cuda.manual_seed_all(args.seed) ########### model ############## if args.dataset == 'ucf101': class_num = 101 elif args.dataset == 'hmdb51': class_num = 51 elif args.dataset == 'activity': class_num = 200 if args.model == 'c3d': print(class_num) model = C3D(with_classifier=True, num_classes=class_num).cuda() elif args.model == 'r3d': model = R3DNet(layer_sizes=(3, 4, 6, 3), with_classifier=False, num_classes=class_num).cuda() elif args.model == 'r21d': model = R2Plus1DNet(layer_sizes=(1, 1, 1, 1), with_classifier=True, num_classes=class_num).cuda() elif args.model == 's3d': model = S3D(num_classes=class_num, space_to_depth=False, with_classifier=True).cuda() elif args.model == 'r3d_50': model = generate_model(model_depth=50, with_classifier=False, return_conv=False).to(device) if torch.cuda.device_count() > 1:
def extract_feature(args): """Extract and save features for train split, several clips per video.""" torch.backends.cudnn.benchmark = True # Force the pytorch to create context on the specific device #os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu") ########### model ############## if args.model == 'r3d': model = R3DNet(layer_sizes=(1, 1, 1, 1), with_classifier=False, return_conv=True).to(device) elif args.model == 'r18': model = R18(with_classifier=False).to(device) if args.ckpt: if args.model == 'r3d': pretrained_weights = load_pretrained_weights(args.ckpt) model.load_state_dict( pretrained_weights, strict=False) # Set True to check whether loaded successfully else: # for r18 pretrained_weights = torch.load(args.ckpt) model.load_state_dict(pretrained_weights, strict=False) model.eval() torch.set_grad_enabled(False) ### Exract for train split ### normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_transforms = transforms.Compose([ transforms.Resize((128, 171)), transforms.CenterCrop(112), transforms.ToTensor(), normalize ]) if args.dataset == 'ucf101': train_dataset = UCF101ClipRetrievalDataset('data', 16, 10, True, train_transforms) elif args.dataset == 'hmdb51': train_dataset = HMDB51ClipRetrievalDataset('data', 16, 10, True, train_transforms) train_dataloader = DataLoader(train_dataset, batch_size=args.bs, shuffle=False, num_workers=args.workers, pin_memory=True, drop_last=True) features = [] classes = [] for data in tqdm(train_dataloader): sampled_clips, idxs = data clips = sampled_clips.reshape((-1, 3, 16, 112, 112)) inputs = clips.to(device) if args.modality == 'res': outputs = model(diff(inputs)) else: outputs = model(inputs) features.append(outputs.cpu().numpy().tolist()) classes.append(idxs.cpu().numpy().tolist()) features = np.array(features).reshape(-1, 10, outputs.shape[1]) classes = np.array(classes).reshape(-1, 10) np.save(os.path.join(args.feature_dir, 'train_feature.npy'), features) np.save(os.path.join(args.feature_dir, 'train_class.npy'), classes) ### Exract for test split ### test_transforms = transforms.Compose([ transforms.Resize((128, 171)), transforms.CenterCrop(112), transforms.ToTensor(), normalize ]) if args.dataset == 'ucf101': test_dataset = UCF101ClipRetrievalDataset('data', 16, 10, False, test_transforms) elif args.dataset == 'hmdb51': test_dataset = HMDB51ClipRetrievalDataset('data', 16, 10, False, test_transforms) test_dataloader = DataLoader(test_dataset, batch_size=args.bs, shuffle=False, num_workers=args.workers, pin_memory=True, drop_last=True) features = [] classes = [] for data in tqdm(test_dataloader): sampled_clips, idxs = data clips = sampled_clips.reshape((-1, 3, 16, 112, 112)) inputs = clips.to(device) # forward if args.modality == 'res': outputs = model(diff(inputs)) else: outputs = model(inputs) features.append(outputs.cpu().numpy().tolist()) classes.append(idxs.cpu().numpy().tolist()) features = np.array(features).reshape(-1, 10, outputs.shape[1]) classes = np.array(classes).reshape(-1, 10) np.save(os.path.join(args.feature_dir, 'test_feature.npy'), features) np.save(os.path.join(args.feature_dir, 'test_class.npy'), classes) print('Saving features to ...', args.feature_dir)