示例#1
0
def main(base_name, SEQ_SIZE=16, BATCH_SIZE=256, HIDDEN_SIZE=1000, N_EPOCHS=30, \
         N_LAYERS=1, threshold=0.5, seq_threshold=0.5, use_gpu=False):
    """
    Function to read c3d FC7 features and train an RNN on them, evaluate on the 
    validation videos, write the predictions in a JSON, save the trained model and
    losses to pickle. 
    
    Parameters:
    ------
    
    base_name: path to the wts, losses, predictions and log files
    SEQ_SIZE: No. of frames sent to the RNN at a time
    BATCH_SIZE: Depends on GPU memory
    HIDDEN_SIZE: Size of hidden layer in the RNN
    N_EPOCHS: Training iterations (no. of times the training set is seen)
    N_LAYERS: No. of hidden layers in the RNN
    threshold and seq_threshold: threshold values during prediction
    use_gpu: True if training to be done on GPU, False for CPU
    
    """

    if not os.path.exists(base_name):
        os.makedirs(base_name)
    
    seed = 1234
    utils.seed_everything(seed)
    
    print(60*"#")
    
    # Form dataloaders 
    train_lst_main_ext = get_main_dataset_files(MAIN_DATASET)   #with extensions
    train_lst_main = [t.rsplit('.', 1)[0] for t in train_lst_main_ext]   # remove the extension
    val_lst_main_ext = get_main_dataset_files(VAL_DATASET)
    val_lst_main = [t.rsplit('.', 1)[0] for t in val_lst_main_ext]
    
    # Divide the samples files into training set, validation and test sets
    train_lst, val_lst, test_lst = utils.split_dataset_files(DATASET)
    print("SEQ_SIZE : {}".format(SEQ_SIZE))
    
    # form the names of the list of label files, should be at destination 
    train_lab = [f+".json" for f in train_lst]
    val_lab = [f+".json" for f in val_lst]
    test_lab = [f+".json" for f in test_lst]
    train_lab_main = [f+".json" for f in train_lst_main]
    val_lab_main = [f+".json" for f in val_lst_main]
    
    # get complete path lists of label files
    tr_labs = [os.path.join(LABELS, f) for f in train_lab]
    val_labs = [os.path.join(LABELS, f) for f in test_lab]
    tr_labs_main = [os.path.join(MAIN_LABELS, f) for f in train_lab_main]
    val_labs_main = [os.path.join(VAL_LABELS, f) for f in val_lab_main]
    #####################################################################
    
    sizes = [utils.getNFrames(os.path.join(DATASET, f+".avi")) for f in train_lst]
    val_sizes = [utils.getNFrames(os.path.join(DATASET, f+".avi")) for f in test_lst]
    sizes_main = [utils.getNFrames(os.path.join(MAIN_DATASET, f)) for f in train_lst_main_ext]
    val_sizes_main = [utils.getNFrames(os.path.join(VAL_DATASET, f)) for f in val_lst_main_ext]
    
    ###########################################################################
    # Merge the training highlights and main dataset variables
    train_lab.extend(train_lab_main)
    tr_labs.extend(tr_labs_main)
    sizes.extend(sizes_main)
    
    print("No. of training videos : {}".format(len(train_lst)))
    
    print("Size : {}".format(sizes))
    hlDataset = VideoDataset(tr_labs, sizes, seq_size=SEQ_SIZE, is_train_set = True)
    print(hlDataset.__len__())
    
    #####################################################################
    
    # Create a DataLoader object and sample batches of examples. 
    # These batch samples are used to extract the features from videos parallely
    train_loader = DataLoader(dataset=hlDataset, batch_size=BATCH_SIZE, shuffle=True)
    datasets_loader = {'train': train_loader}       # can have a test loader also
    
    # read into dictionary {vidname: np array, ...}
    print("Loading features from disk...")
    features = utils.readAllPartitionFeatures(HOGFeatsPath, train_lst)
    #HOGfeatures = utils.readAllHOGfeatures(HOGfeaturesPath, train_lst)
    mainFeatures = utils.readAllPartitionFeatures(HOGMainFeatsPath, train_lst_main)
    features.update(mainFeatures)     # Merge dicts
    print(len(train_loader.dataset))
    
    ########
    
    #HOG feature output size
    INP_VEC_SIZE = features[list(features.keys())[0]].shape[-1] 
    print("INP_VEC_SIZE = ", INP_VEC_SIZE)
    
    # Creating the RNN and training
    classifier = RNNClassifier(INP_VEC_SIZE, HIDDEN_SIZE, 1, N_LAYERS, \
                               bidirectional=False, use_gpu=use_gpu)
#    classifier = LSTMModel(INP_VEC_SIZE, HIDDEN_SIZE, 1, N_LAYERS, \
#                           use_gpu=use_gpu)
    if use_gpu:
#        if torch.cuda.device_count() > 1:
#            print("Let's use", torch.cuda.device_count(), "GPUs!")
#            # Parallely run on multiple GPUs using DataParallel
#            classifier = nn.DataParallel(classifier)
        classifier.cuda(0)

    optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)
    #criterion = nn.CrossEntropyLoss()
    
    criterion = nn.BCELoss()

    step_lr_scheduler = StepLR(optimizer, step_size=10, gamma=0.1)
    start = time.time()
    
    print("Training for %d epochs..." % N_EPOCHS)
    # Training the model on the features for N_EPOCHS 
    train(features, classifier, datasets_loader, optimizer, step_lr_scheduler, \
          criterion, SEQ_SIZE, N_EPOCHS, use_gpu, base_name)
    mod_file = os.path.join(base_name, \
                "GRU_HOG_ep"+str(N_EPOCHS)+"_seq"+str(SEQ_SIZE)+"_Adam.pt")
    classifier.load_state_dict(torch.load(mod_file))    
    end = time.time()
    print("Time for training : {}".format(end-start))
    #####################################################################
    
    # Test a video or calculate the accuracy using the learned model
    print("Prediction video meta info.")
    print("Size : {}".format(val_sizes))
    hlvalDataset = VideoDataset(val_labs_main, val_sizes_main, seq_size=SEQ_SIZE, \
                                is_train_set = False)
    print(hlvalDataset.__len__())
    
    # Create a DataLoader object and sample batches of examples. 
    # These batch samples are used to extract the features from videos parallely
    val_loader = DataLoader(dataset=hlvalDataset, batch_size=BATCH_SIZE, shuffle=False)
    #print(len(val_loader.dataset))

    classifier.eval()
    val_keys, predictions = predict(HOGValFeatsPath, val_lst_main, classifier, val_loader, \
                                    use_gpu)
    
    with open(os.path.join(base_name, "predictions_seq"+str(SEQ_SIZE)+".pkl"), "wb") as fp:
        pickle.dump(predictions, fp)
    
    with open(os.path.join(base_name, "val_keys_seq"+str(SEQ_SIZE)+".pkl"), "wb") as fp:
        pickle.dump(val_keys, fp)

    #####################################################################

    # [4949, 4369, 4455, 4317, 4452]
    #predictions = [p.cpu() for p in predictions]  # convert to CPU tensor values
    localization_dict = getScoredLocalizations(val_keys, predictions, BATCH_SIZE, \
                                         threshold, seq_threshold)

#    print localization_dict

    # Apply filtering    
    i = 60  # optimum
    filtered_shots = utils.filter_action_segments(localization_dict, epsilon=i)
    #i = 7  # 
    #filtered_shots = filter_non_action_segments(filtered_shots, epsilon=i)
    filt_shots_filename = os.path.join(base_name, "predicted_localizations_HLMainTest_th0_5_filt"\
            +str(i)+"_ep"+str(N_EPOCHS)+"_seq"+str(SEQ_SIZE)+".json")
    with open(filt_shots_filename, 'w') as fp:
        json.dump(filtered_shots, fp)
    print("Prediction file {} !!".format(filt_shots_filename))
    
    tiou =  calculate_tIoU(VAL_LABELS, filtered_shots)
    #####################################################################
    # count no. of parameters in the model
    print("#Parameters : {} ".format(utils.count_parameters(classifier)))
    print("TIoU : {}".format(tiou))
    print(60*'#')
    return tiou
    # Form dataloaders

    # Divide the samples files into training set, validation and test sets
    train_lst, val_lst, test_lst = utils.split_dataset_files(DATASET)
    print("No. of Training / Val / Test videos : {} / {} / {}".format(len(train_lst), \
          len(val_lst), len(test_lst)))
    print(60 * "-")

    # form the names of the list of label files, should be at destination
    val_lab = [f + ".json" for f in val_lst]
    test_lab = [f + ".json" for f in test_lst]

    val_labs = [os.path.join(LABELS, f) for f in val_lab]

    val_sizes = [
        utils.getNFrames(os.path.join(DATASET, f + ".avi")) for f in val_lst
    ]

    print("Test #VideoFrames : {}".format(val_sizes))

    #####################################################################

    framesPath = os.path.join(os.getcwd(), data_dir)

    # read into dictionary {vidname: np array, ...}
    print("Loading validation/test features from disk...")
    valFrames = utils.readAllNumpyFrames(framesPath, val_lst)

    #####################################################################

    # Load the model
 # Divide the samples files into training set, validation and test sets
 train_lst, val_lst, test_lst = utils.split_dataset_files(DATASET)
 print("No. of Training / Val / Test videos : {} / {} / {}".format(len(train_lst), \
       len(val_lst), len(test_lst)))
 print(60*"-")
 
 # form the names of the list of label files, should be at destination 
 train_lab = [f+".json" for f in train_lst]
 val_lab = [f+".json" for f in val_lst]
 test_lab = [f+".json" for f in test_lst]
 
 # get complete path lists of label files
 tr_labs = [os.path.join(LABELS, f) for f in train_lab]
 val_labs = [os.path.join(LABELS, f) for f in val_lab]
 
 sizes = [utils.getNFrames(os.path.join(DATASET, f+".avi")) for f in train_lst]
 val_sizes = [utils.getNFrames(os.path.join(DATASET, f+".avi")) for f in val_lst]
 
 print("Train #VideoFrames : {}".format(sizes))
 print("Test #VideoFrames : {}".format(val_sizes))
 
 # create VideoDataset object, create sequences(use meta info)
 hlDataset = VideoDataset(tr_labs, sizes, seq_size=SEQ_SIZE, is_train_set = True)
 hlvalDataset = VideoDataset(val_labs, val_sizes, seq_size=SEQ_SIZE, is_train_set = False)
 
 # total number of training examples (clips)
 print("No. of Train examples : {} ".format(hlDataset.__len__()))
 print("No. of Test examples : {} ".format(hlvalDataset.__len__()))
 
 # Create a DataLoader object and sample batches of examples. (get meta-info)
 # These batch samples are used to extract the features from videos parallely