def __init__(self, opt, train=True): super(VideoDataset, self).__init__(opt, train=train) # Dataroot & Transform if opt.dataset == 'mgif': data_root = './dataset/moving-gif' vtrans = [vtransforms.Scale(size=128)] elif opt.dataset == 'kth': data_root = './dataset/kth_action/' vtrans = [ vtransforms.CenterCrop(size=120), vtransforms.Scale(size=128) ] elif opt.dataset == 'penn': data_root = './dataset/penn_action/' vtrans = [vtransforms.Scale(size=128)] if self.train: vtrans += [vtransforms.RandomHorizontalFlip()] vtrans += [vtransforms.RandomRotation()] vtrans += [vtransforms.ToTensor(scale=True)] vtrans += [vtransforms.Normalize(0.5, 0.5)] if opt.input_norm else [] self.vtrans = T.Compose(vtrans) if self.train: self.image_path = os.path.join(data_root, 'train') else: self.image_path = os.path.join(data_root, 'test') threshold = self.window_size if opt.irregular else self.sample_size if opt.dataset in ['kth', 'sintel', 'ucf101', 'penn']: self.image_list = os.listdir(self.image_path) elif opt.dataset in ['mgif', 'stickman']: self.image_list = remove_files_under_sample_size( image_path=self.image_path, threshold=threshold) self.image_list = sorted(self.image_list)
def main(): global args, best_prec1 args = parser.parse_args() # create model print("Building model ... ") model = build_model() print("Model %s is loaded. " % (args.modality + "_" + args.arch)) if not os.path.exists(args.resume): os.makedirs(args.resume) print("Saving everything to directory %s." % (args.resume)) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) cudnn.benchmark = True # Data transforming clip_mean = [0.485, 0.456, 0.406] * args.new_length clip_std = [0.229, 0.224, 0.225] * args.new_length normalize = video_transforms.Normalize(mean=clip_mean, std=clip_std) if args.modality == "rgb": scale_ratios = [1.0, 0.875, 0.75, 0.66] elif args.modality == "flow": scale_ratios = [1.0, 0.875, 0.75] else: print("No such modality. Only rgb and flow supported.") train_transform = video_transforms.Compose([ video_transforms.Scale((256)), video_transforms.MultiScaleCrop((224, 224), scale_ratios), video_transforms.RandomHorizontalFlip(), video_transforms.ToTensor(), normalize, ]) val_transform = video_transforms.Compose([ video_transforms.Scale((256)), video_transforms.CenterCrop((224)), video_transforms.ToTensor(), normalize, ]) # data loading train_setting_file = "train_%s_split%d.txt" % (args.modality, args.split) train_split_file = os.path.join(args.settings, args.dataset, train_setting_file) val_setting_file = "val_%s_split%d.txt" % (args.modality, args.split) val_split_file = os.path.join(args.settings, args.dataset, val_setting_file) if not os.path.exists(train_split_file) or not os.path.exists( val_split_file): print( "No split file exists in %s directory. Preprocess the dataset first" % (args.settings)) train_dataset = datasets.__dict__[args.dataset]( args.data, train_split_file, "train", args.new_length, video_transform=train_transform) val_dataset = datasets.__dict__[args.dataset]( args.data, val_split_file, "val", args.new_length, video_transform=val_transform) print('{} samples found, {} train samples and {} test samples.'.format( len(val_dataset) + len(train_dataset), len(train_dataset), len(val_dataset))) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) if args.evaluate: validate(val_loader, model, criterion) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) if (epoch + 1) % args.save_freq == 0: checkpoint_name = "%03d_%s" % (epoch + 1, "checkpoint.pth.tar") save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, checkpoint_name, args.resume)
def main(): device = 'cuda' if torch.cuda.is_available() else 'cpu' model = DynamicTrajectoryPredictor(9).to(device) model = model.float() model = nn.DataParallel(model) # 输出网络结构 # summary(model, input_size=(18, 224, 224)) model.load_state_dict(torch.load('./model.weights'), False) load_path = './data_inference/' img_root = '../../../flow_result/' # Training settings epochs = 15 batch_size = 1 learning_rate = 1e-5 num_workers = 8 weight_decay = 1e-2 NUM_FLOW_FRAMES = 9 training_proportion = 100 # Transformers transform_val = video_transforms.Compose([ video_transforms.Scale((224)), video_transforms.ToTensor(), ]) valset = LocationDatasetBDD(filename='myvideo_val_yolo_0.pkl', root_dir=load_path, transform=transform_val, img_root=img_root, NUM_FLOW_FRAMES=NUM_FLOW_FRAMES) val_loader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False, num_workers=num_workers) # for param in model.parameters(): # param.requires_grad = False model.eval() tmp_result = [] start_time = time.time() with torch.no_grad(): for batch_idx, data in enumerate(val_loader): if batch_idx % 100 == 0: end_time = time.time() print(' Batch ', batch_idx, ' of ', len(val_loader), ' Cost time: ', end_time - start_time) start_time = end_time flow = data['flow_stack'].to(device) flow = flow.float() output = model(flow).detach().cpu().numpy() tmp_result.append(output) # print(output.shape) # if batch_idx == 1: # print(batch_idx, ' : \n', output) # print(batch_idx, ' : \n', output) if batch_idx == 0: break ans = np.array(tmp_result).reshape(-1, 120) print(ans) print(ans.shape) # with open('record_extract.txt', 'w') as f: # f.write(fold_type + ' ' + str(fold_num) + ' ' + str(ans.shape)) # np.save('./data_inference/model_val_inference_result.npy', ans) np.save('./data_inference/val_prediction.npy', ans)
def main(args): ############################################################################ # Path to optical flow images img_root = './data/human-annotated/' # Path to training and testing files load_path = './data/' # CPU or GPU? device = torch.device("cuda") # Training settings epochs = 30 batch_size = 64 learning_rate = 1e-5 num_workers = 8 pretrained = False weight_decay = 1e-2 NUM_FLOW_FRAMES = 9 model_load_path = args.model_load_path model_save_path = args.model_save_path # Transformers for training and validation transform_train = video_transforms.Compose([ video_transforms.MultiScaleCrop((224, 224), [1.0]), video_transforms.ToTensor(), ]) transform_val = video_transforms.Compose([ video_transforms.Scale((224)), video_transforms.ToTensor(), ]) ############################################################################ print('################### Training settings ###################') print('epochs:', epochs, ' batch_size:', batch_size, ' learning_rate:', learning_rate, ' num_workers:', num_workers, ' model_load_path:', model_load_path, ' NUM_FLOW_FRAMES:', NUM_FLOW_FRAMES) results = pd.DataFrame() for fold in [1, 2, 3, 4, 5]: if pretrained: learning_rate = 1e-6 epochs = 30 else: learning_rate = 1e-5 epochs = 40 print('Training on fold ' + str(fold)) try: testset = LocationDatasetJAAD(filename='jaad_cv_test.pkl', root_dir=load_path, transform=transform_val, img_root=img_root, NUM_FLOW_FRAMES=NUM_FLOW_FRAMES) test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_workers) trainset = LocationDatasetJAAD(filename='jaad_cv_train_' + str(fold) + '.pkl', root_dir=load_path, transform=transform_train, img_root=img_root, NUM_FLOW_FRAMES=NUM_FLOW_FRAMES) train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_workers) valset = LocationDatasetJAAD(filename='jaad_cv_val_' + str(fold) + '.pkl', root_dir=load_path, transform=transform_val, img_root=img_root, NUM_FLOW_FRAMES=NUM_FLOW_FRAMES) val_loader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False, num_workers=num_workers) except: sys.exit( 'ERROR: Could not load pkl data file. Check the jaad .pkl files are in the correct path.' ) model = DynamicTrajectoryPredictor(NUM_FLOW_FRAMES).to(device) model = model.float() model = nn.DataParallel(model) if model_load_path is not None: print('loading model from', model_load_path) model.load_state_dict(torch.load(model_load_path)) optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) loss_function = torch.nn.MSELoss() best_FDE = np.inf best_MSE = np.inf best_model = copy.deepcopy(model) # Begin training for epoch in range(1, epochs + 1): # Set learning rate to 1e-6 after 30 epochs if epoch > 30: optimizer = optim.Adam(model.parameters(), lr=1e-6, weight_weight_decay=decay) train(model, device, train_loader, optimizer, epoch, loss_function) MSE_5, FDE_5, MSE_10, FDE_10, MSE_15, FDE_15, _, _ = test( model, device, val_loader, loss_function) if MSE_15 < best_MSE: best_MSE = MSE_15 best_model = copy.deepcopy(model) best_FDE = FDE_15 print(epoch) print('Best MSE:', round(best_MSE, 0)) test_mse_5, test_fde_5, test_mse_10, test_fde_10, test_mse_15, test_fde_15, all_outputs, all_targets = test( best_model, device, test_loader, loss_function) print('Test mse @ 15:', round(test_mse_15, 0)) # Save the model torch.save( best_model.state_dict(), model_save_path + 'rn18_flow_css_' + str(NUM_FLOW_FRAMES) + 'stack_fold_' + str(fold) + '_pretrained-' + str(pretrained) + '_disp.weights') # Save the predictions and the targets np.save( './predictions_rn18_flow_css_' + str(NUM_FLOW_FRAMES) + 'stack_jaad_fold_' + str(fold) + 'pretrained-' + str(pretrained) + '_disp.npy', all_outputs) np.save( './targets_rn18_flow_css_' + str(NUM_FLOW_FRAMES) + 'stack_jaad_fold_' + str(fold) + 'pretrained-' + str(pretrained) + '_disp.npy', all_targets) # Save the results result = { 'NUM_FLOW_FRAMES': NUM_FLOW_FRAMES, 'fold': fold, 'val_mse': best_MSE, 'val_fde': best_FDE, 'test_mse_5': test_mse_5, 'test_fde_5': test_fde_5, 'test_mse_10': test_mse_10, 'test_fde_10': test_fde_10, 'test_mse_15': test_mse_15, 'test_fde_15': test_fde_15, 'pretrained': pretrained } results = results.append(result, ignore_index=True) results.to_csv('./results_rn18_jaad.csv', index=False)
def main(args): ############################################################################ # Path to optical flow images img_root = './data/human-annotated/' # Path to training and testing files load_path = './data/' # CPU or GPU? device = torch.device("cuda") # Training settings epochs = 30 batch_size = 64 learning_rate = 1e-5 num_workers = 8 pretrained = False weight_decay = 1e-2 NUM_FLOW_FRAMES = 9 model_load_path = args.model_load_path model_save_path = args.model_save_path # Transformers for training and validation transform_train = video_transforms.Compose([ video_transforms.MultiScaleCrop((224, 224), [1.0]), video_transforms.ToTensor(), ]) transform_val = video_transforms.Compose([ video_transforms.Scale((224)), video_transforms.ToTensor(), ]) ############################################################################ print('################### Training settings ###################') print('epochs:', epochs, ' batch_size:', batch_size, ' learning_rate:', learning_rate, ' num_workers:', num_workers, ' model_load_path:', model_load_path, ' NUM_FLOW_FRAMES:', NUM_FLOW_FRAMES) results = pd.DataFrame() testset = LocationDatasetJAAD(filename='jaad_cv_test.pkl', root_dir=load_path, transform=transform_val, img_root=img_root, NUM_FLOW_FRAMES=NUM_FLOW_FRAMES) test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_workers) trainset = LocationDatasetJAAD(filename='jaad_cv_train_' + str(1) + '.pkl', root_dir=load_path, transform=transform_train, img_root=img_root, NUM_FLOW_FRAMES=NUM_FLOW_FRAMES) train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_workers) valset = LocationDatasetJAAD(filename='jaad_cv_val_' + str(1) + '.pkl', root_dir=load_path, transform=transform_val, img_root=img_root, NUM_FLOW_FRAMES=NUM_FLOW_FRAMES) val_loader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False, num_workers=num_workers) print('test_loader flow_stack size = ', test_loader['flow_stack'])
def main(): ''' 导入模型 ''' device = 'cuda' if torch.cuda.is_available() else 'cpu' model = DynamicTrajectoryPredictor(9).to(device) model = model.float() model = nn.DataParallel(model) # summary(model, input_size=(18, 224, 224)) model.load_state_dict( torch.load( './data/yolomyvideo_rn50_flow_css_9stack_training_proportion_100_shuffled_disp.weights' ), False) model.eval() load_path = './data/' img_root = '../../flow_result/' # Training settings epochs = 15 batch_size = 1 learning_rate = 1e-5 num_workers = 8 weight_decay = 1e-2 NUM_FLOW_FRAMES = 9 training_proportion = 100 # Transformers transform_val = video_transforms.Compose([ video_transforms.Scale((224)), video_transforms.ToTensor(), ]) for fold_type in ['train', 'val', 'test']: for fold_num in range(1, 4): result.clear() valset = LocationDatasetBDD(filename=fold_type + str(fold_num) + '_myvideo_location_features_yolo.pkl', root_dir=load_path, transform=transform_val, img_root=img_root, NUM_FLOW_FRAMES=NUM_FLOW_FRAMES) val_loader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False, num_workers=num_workers) for param in model.parameters(): param.requires_grad = False start_time = time.time() for batch_idx, data in enumerate(val_loader): if batch_idx % 100 == 0: end_time = time.time() print(fold_type + ':', fold_num, ' Batch ', batch_idx, ' of ', len(val_loader), ' Cost time: ', end_time - start_time) start_time = end_time # break # if batch_idx == 20: # break flow = data['flow_stack'].to(device) flow = flow.float() output = model(flow) # print('Processing: ', batch_idx) ans = np.array(result).reshape(-1, 2048) print(ans.shape) with open('record_extract.txt', 'w') as f: f.write(fold_type + ' ' + str(fold_num) + ' ' + str(ans.shape)) np.save( './data/sted_feature/fold_' + str(fold_num) + '_' + fold_type + '_dtp_features.npy', ans)
def main(args): ############################################################################ # Path to optical flow images if args.detector == 'yolo': img_root = './data/yolov3/' else: img_root = './data/faster-rcnn/' # Path to training and testing files load_path = './data/' # CPU or GPU? device = torch.device("cuda") # Model saving and loading model_save_path = './data/' model_load_path = './data/' # Training settings epochs = 15 batch_size = 64 learning_rate = 1e-5 num_workers = 8 weight_decay = 1e-2 NUM_FLOW_FRAMES = 9 training_proportion = 100 # How much of the dataset to use? 100 = 100percent # Transformers for training and validation transform_train = video_transforms.Compose([ video_transforms.MultiScaleCrop((224, 224), [1.0]), video_transforms.ToTensor(), ]) transform_val = video_transforms.Compose([ video_transforms.Scale((224)), video_transforms.ToTensor(), ]) ############################################################################ print('################### Training settings ###################') print('epochs:', epochs, ' batch_size:', batch_size, ' learning_rate:', learning_rate, ' num_workers:', num_workers, ' NUM_FLOW_FRAMES:', NUM_FLOW_FRAMES) results = pd.DataFrame() print('Training model') print(args.detector + '_bdd10k_val.pkl') try: testset = LocationDatasetBDD(filename='bdd10k_val_' + args.detector + '.pkl', root_dir=load_path, transform=transform_val, img_root=img_root, NUM_FLOW_FRAMES=NUM_FLOW_FRAMES) test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_workers) trainset = LocationDatasetBDD(filename='bdd10k_train_' + args.detector + '.pkl', root_dir=load_path, transform=transform_train, img_root=img_root, NUM_FLOW_FRAMES=NUM_FLOW_FRAMES, proportion=training_proportion) train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_workers) valset = LocationDatasetBDD(filename='bdd10k_val_' + args.detector + '.pkl', root_dir=load_path, transform=transform_val, img_root=img_root, NUM_FLOW_FRAMES=NUM_FLOW_FRAMES) val_loader = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=False, num_workers=num_workers) except: sys.exit( 'ERROR: Could not load pkl data file. Check the bdd .pkl files are in the correct path.' ) model = DynamicTrajectoryPredictor(NUM_FLOW_FRAMES).to(device) model = model.float() model = nn.DataParallel(model) optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) loss_function = torch.nn.MSELoss() best_FDE = np.inf best_MSE = np.inf best_model = copy.deepcopy(model) # Begin training for epoch in range(1, epochs + 1): train(model, device, train_loader, optimizer, epoch, loss_function) MSE_5, FDE_5, MSE_10, FDE_10, MSE_15, FDE_15, _, _ = test( model, device, val_loader, loss_function) if MSE_15 < best_MSE: best_MSE = MSE_15 best_model = copy.deepcopy(model) best_FDE = FDE_15 torch.save( best_model.state_dict(), model_save_path + args.detector + '_rn18_bdd10k_flow_css_' + str(NUM_FLOW_FRAMES) + 'stack_training_proportion_' + str(training_proportion) + '_shuffled_disp.weights') print(epoch) print('Best MSE:', round(best_MSE, 0)) test_mse_5, test_fde_5, test_mse_10, test_fde_10, test_mse_15, test_fde_15, all_outputs, all_targets = test( best_model, device, test_loader, loss_function) print('Test mse @ 15:', round(test_mse_15, 0)) # Save the model torch.save( best_model.state_dict(), model_save_path + args.detector + 'bdd10k_rn18_flow_css_' + str(NUM_FLOW_FRAMES) + 'stack_training_proportion_' + str(training_proportion) + '_shuffled_disp.weights') # Save the predictions and the targets np.save( './' + args.detector + '_predictions_rn18_flow_css_' + str(NUM_FLOW_FRAMES) + 'stack_bdd10k_training_proportion_' + str(training_proportion) + '_shuffled_disp.npy', all_outputs) np.save( './' + args.detector + '_targets_rn18_flow_css_' + str(NUM_FLOW_FRAMES) + 'stack_bdd10k__shuffled_disp.npy', all_targets) # Save the results result = { 'NUM_FLOW_FRAMES': NUM_FLOW_FRAMES, 'training_proportion': training_proportion, 'val_mse': best_MSE, 'val_fde': best_FDE, 'test_mse_5': test_mse_5, 'test_fde_5': test_fde_5, 'test_mse_10': test_mse_10, 'test_fde_10': test_fde_10, 'test_mse_15': test_mse_15, 'test_fde_15': test_fde_15 } results = results.append(result, ignore_index=True) results.to_csv('./' + args.detector + '_results_rn18_bdd10k.csv', index=False)