def main(base_name, SEQ_SIZE=16, BATCH_SIZE=256, HIDDEN_SIZE=1000, N_EPOCHS=30, \ N_LAYERS=1, threshold=0.5, seq_threshold=0.5, use_gpu=False): """ Function to read c3d FC7 features and train an RNN on them, evaluate on the validation videos, write the predictions in a JSON, save the trained model and losses to pickle. Parameters: ------ base_name: path to the wts, losses, predictions and log files SEQ_SIZE: No. of frames sent to the RNN at a time BATCH_SIZE: Depends on GPU memory HIDDEN_SIZE: Size of hidden layer in the RNN N_EPOCHS: Training iterations (no. of times the training set is seen) N_LAYERS: No. of hidden layers in the RNN threshold and seq_threshold: threshold values during prediction use_gpu: True if training to be done on GPU, False for CPU """ if not os.path.exists(base_name): os.makedirs(base_name) seed = 1234 utils.seed_everything(seed) print(60*"#") # Form dataloaders train_lst_main_ext = get_main_dataset_files(MAIN_DATASET) #with extensions train_lst_main = [t.rsplit('.', 1)[0] for t in train_lst_main_ext] # remove the extension val_lst_main_ext = get_main_dataset_files(VAL_DATASET) val_lst_main = [t.rsplit('.', 1)[0] for t in val_lst_main_ext] # Divide the samples files into training set, validation and test sets train_lst, val_lst, test_lst = utils.split_dataset_files(DATASET) print("SEQ_SIZE : {}".format(SEQ_SIZE)) # form the names of the list of label files, should be at destination train_lab = [f+".json" for f in train_lst] val_lab = [f+".json" for f in val_lst] test_lab = [f+".json" for f in test_lst] train_lab_main = [f+".json" for f in train_lst_main] val_lab_main = [f+".json" for f in val_lst_main] # get complete path lists of label files tr_labs = [os.path.join(LABELS, f) for f in train_lab] val_labs = [os.path.join(LABELS, f) for f in test_lab] tr_labs_main = [os.path.join(MAIN_LABELS, f) for f in train_lab_main] val_labs_main = [os.path.join(VAL_LABELS, f) for f in val_lab_main] ##################################################################### sizes = [utils.getNFrames(os.path.join(DATASET, f+".avi")) for f in train_lst] val_sizes = [utils.getNFrames(os.path.join(DATASET, f+".avi")) for f in test_lst] sizes_main = [utils.getNFrames(os.path.join(MAIN_DATASET, f)) for f in train_lst_main_ext] val_sizes_main = [utils.getNFrames(os.path.join(VAL_DATASET, f)) for f in val_lst_main_ext] ########################################################################### # Merge the training highlights and main dataset variables train_lab.extend(train_lab_main) tr_labs.extend(tr_labs_main) sizes.extend(sizes_main) print("No. of training videos : {}".format(len(train_lst))) print("Size : {}".format(sizes)) hlDataset = VideoDataset(tr_labs, sizes, seq_size=SEQ_SIZE, is_train_set = True) print(hlDataset.__len__()) ##################################################################### # Create a DataLoader object and sample batches of examples. # These batch samples are used to extract the features from videos parallely train_loader = DataLoader(dataset=hlDataset, batch_size=BATCH_SIZE, shuffle=True) datasets_loader = {'train': train_loader} # can have a test loader also # read into dictionary {vidname: np array, ...} print("Loading features from disk...") features = utils.readAllPartitionFeatures(HOGFeatsPath, train_lst) #HOGfeatures = utils.readAllHOGfeatures(HOGfeaturesPath, train_lst) mainFeatures = utils.readAllPartitionFeatures(HOGMainFeatsPath, train_lst_main) features.update(mainFeatures) # Merge dicts print(len(train_loader.dataset)) ######## #HOG feature output size INP_VEC_SIZE = features[list(features.keys())[0]].shape[-1] print("INP_VEC_SIZE = ", INP_VEC_SIZE) # Creating the RNN and training classifier = RNNClassifier(INP_VEC_SIZE, HIDDEN_SIZE, 1, N_LAYERS, \ bidirectional=False, use_gpu=use_gpu) # classifier = LSTMModel(INP_VEC_SIZE, HIDDEN_SIZE, 1, N_LAYERS, \ # use_gpu=use_gpu) if use_gpu: # if torch.cuda.device_count() > 1: # print("Let's use", torch.cuda.device_count(), "GPUs!") # # Parallely run on multiple GPUs using DataParallel # classifier = nn.DataParallel(classifier) classifier.cuda(0) optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001) #criterion = nn.CrossEntropyLoss() criterion = nn.BCELoss() step_lr_scheduler = StepLR(optimizer, step_size=10, gamma=0.1) start = time.time() print("Training for %d epochs..." % N_EPOCHS) # Training the model on the features for N_EPOCHS train(features, classifier, datasets_loader, optimizer, step_lr_scheduler, \ criterion, SEQ_SIZE, N_EPOCHS, use_gpu, base_name) mod_file = os.path.join(base_name, \ "GRU_HOG_ep"+str(N_EPOCHS)+"_seq"+str(SEQ_SIZE)+"_Adam.pt") classifier.load_state_dict(torch.load(mod_file)) end = time.time() print("Time for training : {}".format(end-start)) ##################################################################### # Test a video or calculate the accuracy using the learned model print("Prediction video meta info.") print("Size : {}".format(val_sizes)) hlvalDataset = VideoDataset(val_labs_main, val_sizes_main, seq_size=SEQ_SIZE, \ is_train_set = False) print(hlvalDataset.__len__()) # Create a DataLoader object and sample batches of examples. # These batch samples are used to extract the features from videos parallely val_loader = DataLoader(dataset=hlvalDataset, batch_size=BATCH_SIZE, shuffle=False) #print(len(val_loader.dataset)) classifier.eval() val_keys, predictions = predict(HOGValFeatsPath, val_lst_main, classifier, val_loader, \ use_gpu) with open(os.path.join(base_name, "predictions_seq"+str(SEQ_SIZE)+".pkl"), "wb") as fp: pickle.dump(predictions, fp) with open(os.path.join(base_name, "val_keys_seq"+str(SEQ_SIZE)+".pkl"), "wb") as fp: pickle.dump(val_keys, fp) ##################################################################### # [4949, 4369, 4455, 4317, 4452] #predictions = [p.cpu() for p in predictions] # convert to CPU tensor values localization_dict = getScoredLocalizations(val_keys, predictions, BATCH_SIZE, \ threshold, seq_threshold) # print localization_dict # Apply filtering i = 60 # optimum filtered_shots = utils.filter_action_segments(localization_dict, epsilon=i) #i = 7 # #filtered_shots = filter_non_action_segments(filtered_shots, epsilon=i) filt_shots_filename = os.path.join(base_name, "predicted_localizations_HLMainTest_th0_5_filt"\ +str(i)+"_ep"+str(N_EPOCHS)+"_seq"+str(SEQ_SIZE)+".json") with open(filt_shots_filename, 'w') as fp: json.dump(filtered_shots, fp) print("Prediction file {} !!".format(filt_shots_filename)) tiou = calculate_tIoU(VAL_LABELS, filtered_shots) ##################################################################### # count no. of parameters in the model print("#Parameters : {} ".format(utils.count_parameters(classifier))) print("TIoU : {}".format(tiou)) print(60*'#') return tiou
# Form dataloaders # Divide the samples files into training set, validation and test sets train_lst, val_lst, test_lst = utils.split_dataset_files(DATASET) print("No. of Training / Val / Test videos : {} / {} / {}".format(len(train_lst), \ len(val_lst), len(test_lst))) print(60 * "-") # form the names of the list of label files, should be at destination val_lab = [f + ".json" for f in val_lst] test_lab = [f + ".json" for f in test_lst] val_labs = [os.path.join(LABELS, f) for f in val_lab] val_sizes = [ utils.getNFrames(os.path.join(DATASET, f + ".avi")) for f in val_lst ] print("Test #VideoFrames : {}".format(val_sizes)) ##################################################################### framesPath = os.path.join(os.getcwd(), data_dir) # read into dictionary {vidname: np array, ...} print("Loading validation/test features from disk...") valFrames = utils.readAllNumpyFrames(framesPath, val_lst) ##################################################################### # Load the model
# Divide the samples files into training set, validation and test sets train_lst, val_lst, test_lst = utils.split_dataset_files(DATASET) print("No. of Training / Val / Test videos : {} / {} / {}".format(len(train_lst), \ len(val_lst), len(test_lst))) print(60*"-") # form the names of the list of label files, should be at destination train_lab = [f+".json" for f in train_lst] val_lab = [f+".json" for f in val_lst] test_lab = [f+".json" for f in test_lst] # get complete path lists of label files tr_labs = [os.path.join(LABELS, f) for f in train_lab] val_labs = [os.path.join(LABELS, f) for f in val_lab] sizes = [utils.getNFrames(os.path.join(DATASET, f+".avi")) for f in train_lst] val_sizes = [utils.getNFrames(os.path.join(DATASET, f+".avi")) for f in val_lst] print("Train #VideoFrames : {}".format(sizes)) print("Test #VideoFrames : {}".format(val_sizes)) # create VideoDataset object, create sequences(use meta info) hlDataset = VideoDataset(tr_labs, sizes, seq_size=SEQ_SIZE, is_train_set = True) hlvalDataset = VideoDataset(val_labs, val_sizes, seq_size=SEQ_SIZE, is_train_set = False) # total number of training examples (clips) print("No. of Train examples : {} ".format(hlDataset.__len__())) print("No. of Test examples : {} ".format(hlvalDataset.__len__())) # Create a DataLoader object and sample batches of examples. (get meta-info) # These batch samples are used to extract the features from videos parallely