def main(DATASET, LABELS, CLASS_IDS, BATCH_SIZE, ANNOTATION_FILE, SEQ_SIZE=16, STEP=16, nstrokes=-1, N_EPOCHS=25): ''' Extract sequence features from AutoEncoder. Parameters: ----------- DATASET : str path to the video dataset LABELS : str path containing stroke labels CLASS_IDS : str path to txt file defining classes, similar to THUMOS BATCH_SIZE : int size for batch of clips SEQ_SIZE : int no. of frames in a clip (min. 16 for 3D CNN extraction) STEP : int stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ... partition : str 'all' / 'train' / 'test' / 'val' : Videos to be considered nstrokes : int partial extraction of features (do not execute for entire dataset) Returns: -------- trajectories, stroke_names ''' ########################################################################### attn_utils.seed_everything(1234) if not os.path.isdir(log_path): os.makedirs(log_path) # Read the strokes # Divide the highlight dataset files into training, validation and test sets train_lst, val_lst, test_lst = autoenc_utils.split_dataset_files(DATASET) print("No. of training videos : {}".format(len(train_lst))) # extract_of_features(feat_path, DATASET, LABELS, train_lst, val_lst) features, stroke_names_id = attn_utils.read_feats(feat_path, feat, snames) # get matrix of features from dictionary (N, vec_size) vecs = [] for key in sorted(list(features.keys())): vecs.append(features[key]) vecs = np.vstack(vecs) vecs[np.isnan(vecs)] = 0 vecs[np.isinf(vecs)] = 0 #fc7 layer output size (4096) INP_VEC_SIZE = vecs.shape[-1] print("INP_VEC_SIZE = ", INP_VEC_SIZE) km_filepath = os.path.join(log_path, km_filename) # # Uncomment only while training. if not os.path.isfile(km_filepath + "_C" + str(cluster_size) + ".pkl"): km_model = make_codebook(vecs, cluster_size) #, model_type='gmm') ## # Save to disk, if training is performed print("Writing the KMeans models to disk...") pickle.dump( km_model, open(km_filepath + "_C" + str(cluster_size) + ".pkl", "wb")) else: # Load from disk, for validation and test sets. km_model = pickle.load( open(km_filepath + "_C" + str(cluster_size) + ".pkl", 'rb')) print("Create numpy one hot representation for train features...") onehot_feats = create_bovw_SA(features, stroke_names_id, km_model) ft_path = os.path.join(log_path, "C" + str(cluster_size) + "_train.pkl") with open(ft_path, "wb") as fp: pickle.dump(onehot_feats, fp) ########################################################################### features_val, stroke_names_id_val = attn_utils.read_feats( feat_path, feat_val, snames_val) print("Create numpy one hot representation for val features...") onehot_feats_val = create_bovw_SA(features_val, stroke_names_id_val, km_model) ft_path_val = os.path.join(log_path, "C" + str(cluster_size) + "_val.pkl") with open(ft_path_val, "wb") as fp: pickle.dump(onehot_feats_val, fp) ########################################################################### features_test, stroke_names_id_test = attn_utils.read_feats( feat_path, feat_test, snames_test) print("Create numpy one hot representation for test features...") onehot_feats_test = create_bovw_SA(features_test, stroke_names_id_test, km_model) ft_path_test = os.path.join(log_path, "C" + str(cluster_size) + "_test.pkl") with open(ft_path_test, "wb") as fp: pickle.dump(onehot_feats_test, fp) ########################################################################### # Create a Dataset train_dataset = StrokeFeaturePairsDataset(ft_path, train_lst, DATASET, LABELS, CLASS_IDS, frames_per_clip=SEQ_SIZE, extracted_frames_per_clip=2, step_between_clips=STEP, train=True) val_dataset = StrokeFeaturePairsDataset(ft_path_val, val_lst, DATASET, LABELS, CLASS_IDS, frames_per_clip=SEQ_SIZE, extracted_frames_per_clip=2, step_between_clips=STEP, train=False) # get labels labs_keys, labs_values = attn_utils.get_cluster_labels(ANNOTATION_FILE) # # created weighted Sampler for class imbalance # samples_weight = attn_utils.get_sample_weights(train_dataset, labs_keys, labs_values, # train_lst) # sampler = WeightedRandomSampler(samples_weight, len(samples_weight)) train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True) # sampler=sampler, worker_init_fn=np.random.seed(12)) val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False) data_loaders = {"train": train_loader, "test": val_loader} num_classes = len(list(set(labs_values))) ########################################################################### # load model and set loss function ntokens = cluster_size # the size of vocabulary emsize = 200 # embedding dimension nhid = 200 # the dimension of the feedforward network model in nn.TransformerEncoder nlayers = 2 # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder nhead = 2 # the number of heads in the multiheadattention models dropout = 0.2 # the dropout value model = tt.TransformerModelSA(ntokens, emsize, nhead, nhid, nlayers, dropout).to(device) # Setup the loss fxn # criterion = nn.CrossEntropyLoss() criterion = nn.MSELoss() model = model.to(device) # print("Params to learn:") params_to_update = [] for name, param in model.named_parameters(): if param.requires_grad == True: params_to_update.append(param) print("\t", name) # # Observe that all parameters are being optimized ## optimizer_ft = torch.optim.Adam(model.parameters(), lr=0.001) # optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) # # # Decay LR by a factor of 0.1 every 7 epochs # scheduler = StepLR(optimizer, step_size=10, gamma=0.1) lr = 5.0 # learning rate optimizer = torch.optim.SGD(model.parameters(), lr=lr) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 2.0, gamma=0.95) ########################################################################### # Training the model # start = time.time() # # model = train_model(features, stroke_names_id, model, data_loaders, criterion, # optimizer, scheduler, labs_keys, labs_values, # num_epochs=N_EPOCHS) # # end = time.time() # ## # save the best performing model # save_model_checkpoint(log_path, model, N_EPOCHS, # "S"+str(SEQ_SIZE)+"C"+str(cluster_size)+"_SGD") # Load model checkpoints model = load_weights( log_path, model, N_EPOCHS, "S" + str(SEQ_SIZE) + "C" + str(cluster_size) + "_SGD") # print("Total Execution time for {} epoch : {}".format(N_EPOCHS, (end-start))) ########################################################################### ########################################################################### # Extract attention model features if not os.path.isfile(os.path.join(log_path, "trans_feats.pkl")): if not os.path.exists(log_path): os.makedirs(log_path) # # Extract Grid OF / HOOF features {mth = 2, and vary nbins} print("Training extraction ... ") feats_dict, stroke_names = extract_trans_feats(model, DATASET, LABELS, CLASS_IDS, BATCH_SIZE, SEQ_SIZE, SEQ_SIZE - 1, partition='train', nstrokes=nstrokes, base_name=log_path) with open(os.path.join(log_path, "trans_feats.pkl"), "wb") as fp: pickle.dump(feats_dict, fp) with open(os.path.join(log_path, "trans_snames.pkl"), "wb") as fp: pickle.dump(stroke_names, fp) if not os.path.isfile(os.path.join(log_path, "trans_feats_val.pkl")): print("Validation extraction ....") feats_dict_val, stroke_names_val = extract_trans_feats( model, DATASET, LABELS, CLASS_IDS, BATCH_SIZE, SEQ_SIZE, SEQ_SIZE - 1, partition='val', nstrokes=nstrokes, base_name=log_path) with open(os.path.join(log_path, "trans_feats_val.pkl"), "wb") as fp: pickle.dump(feats_dict_val, fp) with open(os.path.join(log_path, "trans_snames_val.pkl"), "wb") as fp: pickle.dump(stroke_names_val, fp) if not os.path.isfile(os.path.join(log_path, "trans_feats_test.pkl")): print("Testing extraction ....") feats_dict_val, stroke_names_val = extract_trans_feats( model, DATASET, LABELS, CLASS_IDS, BATCH_SIZE, SEQ_SIZE, SEQ_SIZE - 1, partition='test', nstrokes=nstrokes, base_name=log_path) with open(os.path.join(log_path, "trans_feats_test.pkl"), "wb") as fp: pickle.dump(feats_dict_val, fp) with open(os.path.join(log_path, "trans_snames_test.pkl"), "wb") as fp: pickle.dump(stroke_names_val, fp) # call count_paramters(model) for displaying total no. of parameters print("#Parameters : {} ".format(autoenc_utils.count_parameters(model))) return 0
def main(base_name, nbins=10, ft_type='2dcnn', cluster_size=10): """ Function to extract orientation features and find the directions of strokes, using LDA model/clustering and evaluate on three cluster analysis on highlights. The videos can be visualized by writing trimmed class videos into their respective classes. Parameters: ------ base_name: path to the wts, losses, predictions and log files use_gpu: True if training to be done on GPU, False for CPU """ seed = 1234 print(60 * "#") ##################################################################### # Form dataloaders # train_lst_main_ext = get_main_dataset_files(MAIN_DATASET) #with extensions # train_lst_main = [t.rsplit('.', 1)[0] for t in train_lst_main_ext] # remove the extension # val_lst_main_ext = get_main_dataset_files(VAL_DATASET) # val_lst_main = [t.rsplit('.', 1)[0] for t in val_lst_main_ext] # Divide the samples files into training set, validation and test sets train_lst, val_lst, test_lst = utils.split_dataset_files(DATASET) #print("c3dWinSize : {}".format(c3dWinSize)) # form the names of the list of label files, should be at destination train_lab = [f + ".json" for f in train_lst] val_lab = [f + ".json" for f in val_lst] test_lab = [f + ".json" for f in test_lst] # train_lab_main = [f+".json" for f in train_lst_main] # val_lab_main = [f+".json" for f in val_lst_main] # get complete path lists of label files tr_labs = [os.path.join(LABELS, f) for f in train_lab] val_labs = [os.path.join(LABELS, f) for f in val_lab] # tr_labs_main = [os.path.join(MAIN_LABELS, f) for f in train_lab_main] # val_labs_main = [os.path.join(VAL_LABELS, f) for f in val_lab_main] ##################################################################### sizes = [ utils.getNFrames(os.path.join(DATASET, f + ".avi")) for f in train_lst ] val_sizes = [ utils.getNFrames(os.path.join(DATASET, f + ".avi")) for f in val_lst ] # sizes_main = [utils.getNFrames(os.path.join(MAIN_DATASET, f)) for f in train_lst_main_ext] # val_sizes_main = [utils.getNFrames(os.path.join(VAL_DATASET, f)) for f in val_lst_main_ext] ########################################################################### # Merge the training highlights and main dataset variables # train_lab.extend(train_lab_main) # tr_labs.extend(tr_labs_main) # sizes.extend(sizes_main) print("No. of training videos : {}".format(len(train_lst))) print("Size : {}".format(sizes)) # hlDataset = VideoDataset(tr_labs, sizes, seq_size=SEQ_SIZE, is_train_set = True) # print(hlDataset.__len__()) ##################################################################### # Feature Extraction : (GRID OF / HOOF / 2D CNN / 3DCNN / IDT) print("Feature Type : {} :: nClusters : {} ".format(ft_type, cluster_size)) ##################################################################### # read into dictionary {vidname: np array, ...} BATCH_SIZE, SEQ_SIZE, STEP = 16, 16, 1 print("Loading features from disk...") if not os.path.exists(base_name): os.makedirs(base_name) features, strokes_name_id = extract_feats(DATASET, LABELS, CLASS_IDS, BATCH_SIZE, SEQ_SIZE, STEP, extractor=ft_type, model_path=model_path, nclasses=NUM_TOPICS, part='train') with open(os.path.join(base_name, ft_type + "_feats_train.pkl"), "wb") as fp: pickle.dump(features, fp) with open(os.path.join(base_name, ft_type + "_snames_train.pkl"), "wb") as fp: pickle.dump(strokes_name_id, fp) with open(os.path.join(base_name, ft_type + "_feats_train.pkl"), "rb") as fp: features = pickle.load(fp) with open(os.path.join(base_name, ft_type + "_snames_train.pkl"), "rb") as fp: strokes_name_id = pickle.load(fp) ##################################################################### # get matrix of features from dictionary (N, vec_size) vecs = [] for key in sorted(list(features.keys())): vecs.append(features[key]) vecs = np.vstack(vecs) vecs[np.isnan(vecs)] = 0 vecs[np.isinf(vecs)] = 0 #fc7 layer output size (4096) INP_VEC_SIZE = vecs.shape[-1] print("INP_VEC_SIZE = ", INP_VEC_SIZE) km_filepath = os.path.join(base_name, km_filename) # # Uncomment only while training. if not os.path.isfile(km_filepath + "_C" + str(cluster_size) + ".pkl"): km_model = make_codebook(vecs, cluster_size) #, model_type='gmm') ## # Save to disk, if training is performed print("Writing the KMeans models to disk...") pickle.dump( km_model, open(km_filepath + "_C" + str(cluster_size) + ".pkl", "wb")) else: # Load from disk, for validation and test sets. km_model = pickle.load( open(km_filepath + "_C" + str(cluster_size) + ".pkl", 'rb')) ########################################################################### # Form the training dataset for supervised classification # Assign the words (flow frames) to their closest cluster centres and count the # frequency for each document(video). Create IDF bow dataframe by weighting # df_train is (nVids, 50) for magnitude, with index as videonames # print("Create a dataframe for C3D FC7 features...") # df_train_c3d, words_train = create_bovw_c3d_traindf(features, \ # strokes_name_id, km_model, c3dWinSize) print("Create a dataframe for HOOF features...") df_train, words_train = create_bovw_df(features, strokes_name_id, km_model,\ base_name, "train") # read the stroke annotation labels from text file. vids_list = list(df_train.index) labs_keys, labs_values = get_cluster_labels(ANNOTATION_FILE) if min(labs_values) == 1: labs_values = [l - 1 for l in labs_values] labs_keys = [k.replace('.avi', '') for k in labs_keys] train_labels = np.array( [labs_values[labs_keys.index(v)] for v in vids_list]) ########################################################################### # apply_clustering(df_train, DATASET, LABELS, ANNOTATION_FILE, base_name) ########################################################################### # print("Training stroke labels : ") # print(train_labels) # print(train_labels.shape) # concat dataframe to contain features and corresponding labels #df_train = pd.concat([df_train_mag, labs_df], axis=1) ########################################################################### # Train SVM clf = LinearSVC(verbose=False, random_state=124, max_iter=3000) clf.fit(df_train, train_labels) print("Training Complete.") ########################################################################### # # Train a classifier on the features. # print("LDA execution !!! ") # #Run LDA # # # Get list of lists. Each sublist contains video cluster strIDs (words). # # Eg. [["39","29","39","39","0", ...], ...] # doc_clean = [doc.split() for doc in words_train] # #print(doc_clean) # diction=corpora.Dictionary(doc_clean) # Form a dictionary # print("printing dictionary after corp {} ".format(diction)) # doc_term_matrix = [diction.doc2bow(doc) for doc in doc_clean] # #dictionary = corpora.Dictionary(diction) # # # Inference using the data. # ldamodel_obj = gensim.models.ldamodel.LdaModel(doc_term_matrix, \ # num_topics = NUM_TOPICS, id2word=diction, passes=10, \ # random_state=seed) ## ldamodel_obj = gensim.models.ldaseqmodel.LdaSeqModel(doc_term_matrix, \ ## num_topics=3, time_slice=[351]) ## ldamodel_obj = gensim.models.LsiModel(doc_term_matrix, num_topics=3, \ ## id2word = diction) # # print("training complete saving to disk ") # #save model to disk # joblib.dump(ldamodel_obj, os.path.join(base_name, mnb_modelname+".pkl")) # # # Load trained model from disk # ldamodel_obj = joblib.load(os.path.join(base_name, mnb_modelname+".pkl")) # # # Print all the topics # for i,topic in enumerate(ldamodel_obj.print_topics(num_topics=3, num_words=10)): # #print("topic is {}".format(topic)) # word = topic[1].split("+") # print("{} : {} ".format(topic[0], word)) # # # actions are rows and discovered topics are columns # topic_action_map = np.zeros((real_topic, NUM_TOPICS)) # # predicted_labels = [] # #vids_list = list(df_train_mag.index) # for j,vname in enumerate(vids_list): # label_vid = train_labels[j] # # sort the tuples with descending topic probabilities # for index, score in sorted(ldamodel_obj[doc_term_matrix[j]], key=lambda tup: -1*tup[1]): ## for index in [ldamodel_obj[doc_term_matrix[j]].argmax(axis=0)]: # # print("Score is : {} of Topic: {}".format(score,index)) # #if score>0.5: # # topic_action_map[label_vid][index]+=1 ## score = ldamodel_obj[doc_term_matrix[j]][index] # topic_action_map[label_vid][index]+=score # predicted_labels.append(index) # break # print("Training Time : topic action mapping is : ") # print("topic0 topic1 topic2") # #coloumn are topics and rows are labels # print(topic_action_map) # acc_values_tr, perm_tuples_tr, gt_list, pred_list = calculate_accuracy(train_labels,\ # predicted_labels) # acc_perc = [sum(k)/len(predicted_labels) for k in acc_values_tr] # # best_indx = acc_perc.index(max(acc_perc)) # print("Max Acc. : ", max(acc_perc)) # print("Acc values : ", acc_perc) # print("Acc values : ", acc_values_tr) # print("perm_tuples : ", perm_tuples_tr) #model_ang = joblib.load(os.path.join(destpath, mnb_modelname+"_ang.pkl")) ################################################################################## # Evaluation on validation set print("Validation phase ....") if not os.path.isfile(os.path.join(base_name, ft_type + "_feats_val.pkl")): # features_val, strokes_name_id_val = select_trimmed_feats(c3dFC7FeatsPath, \ # LABELS, val_lst, c3dWinSize) features_val, strokes_name_id_val = extract_feats( DATASET, LABELS, CLASS_IDS, BATCH_SIZE, SEQ_SIZE, STEP, extractor=ft_type, model_path=model_path, nclasses=NUM_TOPICS, part='val') with open(os.path.join(base_name, ft_type + "_feats_val.pkl"), "wb") as fp: pickle.dump(features_val, fp) with open(os.path.join(base_name, ft_type + "_snames_val.pkl"), "wb") as fp: pickle.dump(strokes_name_id_val, fp) else: with open(os.path.join(base_name, ft_type + "_feats_val.pkl"), "rb") as fp: features_val = pickle.load(fp) with open(os.path.join(base_name, ft_type + "_snames_val.pkl"), "rb") as fp: strokes_name_id_val = pickle.load(fp) print("Create dataframe BOVW validation set...") df_val_hoof, words_val = create_bovw_df(features_val, strokes_name_id_val, \ km_model, base_name, "val") vids_list_val = list(df_val_hoof.index) val_labels = np.array( [labs_values[labs_keys.index(v)] for v in vids_list_val]) # topic_action_map_val = np.zeros((real_topic, NUM_TOPICS)) # doc_clean_val = [doc.split() for doc in words_val] # # Creating Dictionary for val set words # diction_val=corpora.Dictionary(doc_clean_val) # # doc_term_matrix_val = [diction_val.doc2bow(doc) for doc in doc_clean_val] # predicted_label_val = [] # for j,vname in enumerate(vids_list_val): # label_vid = val_labels[j] # for index, score in sorted(ldamodel_obj[doc_term_matrix_val[j]], key=lambda tup: -1*tup[1]): ## for index in [ldamodel_obj[doc_term_matrix[j]].argmax(axis=0)]: ## score = ldamodel_obj[doc_term_matrix[j]][index] # # print("Score is : {} of Topic: {}".format(score,index)) # #if score>0.5: # # topic_action_map_val[label_vid][index]+=1 # topic_action_map_val[label_vid][index]+=score # predicted_label_val.append(index) # break # # print(topic_action_map_val) # labs_df = pd.DataFrame(labels, index=vids_list, columns=['label']) # print("Evaluating on the validation set...") # evaluate(model_mag, df_test_mag, labs_df) # Find maximum permutation accuracy using predicted_label_val and label_val # acc_values, perm_tuples, gt_list, pred_list = calculate_accuracy(val_labels, \ # predicted_label_val) # acc_perc = [sum(k)/len(predicted_label_val) for k in acc_values] # # best_indx = acc_perc.index(max(acc_perc)) # print("Max Acc. : ", max(acc_perc)) # print("Acc values : ", acc_perc) # print("Acc values : ", acc_values) # print("perm_tuples : ", perm_tuples) ########################################################################### # Evaluate the BOW classifier (SVM) confusion_mat = np.zeros((NUM_TOPICS, NUM_TOPICS)) pred = clf.predict(df_val_hoof) correct = 0 for i, true_val in enumerate(val_labels): if pred[i] == true_val: correct += 1 confusion_mat[pred[i], true_val] += 1 print('#' * 30) print("BOW Classification Results:") print("%d/%d Correct" % (correct, len(pred))) print("Accuracy = {} ".format(float(correct) / len(pred))) print("Confusion matrix") print(confusion_mat) return (float(correct) / len(pred))
def main(base_name, traj_len=None, cluster_size=10): """ Function to read IDT features, form BOW based model after clustering and evaluate on 3/5 cluster analysis of highlights dataset. The videos can be visualized by writing trimmed class videos into their respective classes. Parameters: ------ base_name: path to the wts, losses, predictions and log files """ seed = 1234 np.random.seed(seed) print(60 * "#") ##################################################################### # Divide the sample files into training set, validation and test sets train_lst, val_lst, test_lst = utils.split_dataset_files(DATASET) # form the names of the list of label files, should be at destination train_lab = [f + ".json" for f in train_lst] val_lab = [f + ".json" for f in val_lst] test_lab = [f + ".json" for f in test_lst] # get complete path lists of label files tr_labs = [os.path.join(LABELS, f) for f in train_lab] val_labs = [os.path.join(LABELS, f) for f in val_lab] ##################################################################### sizes = [ utils.getNFrames(os.path.join(DATASET, f + ".avi")) for f in train_lst ] val_sizes = [ utils.getNFrames(os.path.join(DATASET, f + ".avi")) for f in val_lst ] ########################################################################### print("No. of training videos : {}".format(len(train_lst))) print("Size : {}".format(sizes)) # hlDataset = VideoDataset(tr_labs, sizes, seq_size=SEQ_SIZE, is_train_set = True) ##################################################################### # Feature Extraction : (IDT) # Get feats for only the training videos. Get ordered histograms of freq print("Trajectory Length : {}, nClusters : {} ".format( traj_len, cluster_size)) ##################################################################### # read into dictionary {vidname: np array, ...} print("Loading features from disk...") # get Nx4096 numpy matrix with columns as features and rows as window placement features if not os.path.exists(base_name): os.makedirs(base_name) # # Read IDT features {with trajectory length = traj_len} features, strokes_name_id = read_partition_feats( DATASET, LABELS, IDT_FEATS + "_TrajLen" + str(traj_len), train_lst, traj_len) with open( os.path.join(base_name, "idt_feats_traj" + str(traj_len) + ".pkl"), "wb") as fp: pickle.dump(features, fp) with open( os.path.join(base_name, "idt_snames_traj" + str(traj_len) + ".pkl"), "wb") as fp: pickle.dump(strokes_name_id, fp) with open( os.path.join(base_name, "idt_feats_traj" + str(traj_len) + ".pkl"), "rb") as fp: features = pickle.load(fp) with open( os.path.join(base_name, "idt_snames_traj" + str(traj_len) + ".pkl"), "rb") as fp: strokes_name_id = pickle.load(fp) ##################################################################### # get small sample of the IDT features and form a matrix (N, vec_size) vecs = [] for key in sorted(list(features.keys())): vecs.append(features[key]) vecs = np.vstack(vecs) vecs[np.isnan(vecs)] = 0 vecs[np.isinf(vecs)] = 0 # sample points for clustering if vecs.shape[0] > MAX_SAMPLES: vecs = vecs[ np.random.choice(vecs.shape[0], MAX_SAMPLES, replace=False), :] #fc7 layer output size (4096) INP_VEC_SIZE = vecs.shape[-1] print("INP_VEC_SIZE = ", INP_VEC_SIZE) km_filepath = os.path.join(base_name, km_filename) # # Uncomment only while training. if not os.path.isfile(km_filepath + "_C" + str(cluster_size) + ".pkl"): km_model = make_codebook(vecs, cluster_size) #, model_type='gmm') ## # Save to disk, if training is performed print("Writing the KMeans models to disk...") pickle.dump( km_model, open(km_filepath + "_C" + str(cluster_size) + ".pkl", "wb")) else: # Load from disk, for validation and test sets. km_model = pickle.load( open(km_filepath + "_C" + str(cluster_size) + ".pkl", 'rb')) ########################################################################### # Form the training dataset for supervised classification # Assign the words (flow frames) to their closest cluster centres and count the # frequency for each document(video). Create IDF bow dataframe by weighting # df_train is (nVids, 50) for magnitude, with index as videonames print("Create a dataframe for HOOF features...") df_train, words_train = create_bovw_df(features, strokes_name_id, km_model,\ base_name, "train") # read the stroke annotation labels from text file. vids_list = list(df_train.index) labs_keys, labs_values = get_cluster_labels(ANNOTATION_FILE) if min(labs_values) == 1: labs_values = [l - 1 for l in labs_values] labs_keys = [k.replace('.avi', '') for k in labs_keys] train_labels = np.array( [labs_values[labs_keys.index(v)] for v in vids_list]) ########################################################################### # apply_clustering(df_train, DATASET, LABELS, ANNOTATION_FILE, base_name) ########################################################################### ########################################################################### # Train SVM clf = LinearSVC(verbose=False, random_state=124, max_iter=3000) clf.fit(df_train, train_labels) print("Training Complete.") ########################################################################### # # Train a classifier on the features. ################################################################################## # Evaluation on validation set print("Validation phase ....") if not os.path.isfile( os.path.join(base_name, "idt_feats_val_traj" + str(traj_len) + ".pkl")): features_val, strokes_name_id_val = read_partition_feats(DATASET, LABELS, \ IDT_FEATS+"_TrajLen"+str(traj_len), val_lst, traj_len) with open( os.path.join(base_name, "idt_feats_val_traj" + str(traj_len) + ".pkl"), "wb") as fp: pickle.dump(features_val, fp) with open( os.path.join(base_name, "idt_snames_val_traj" + str(traj_len) + ".pkl"), "wb") as fp: pickle.dump(strokes_name_id_val, fp) else: with open( os.path.join(base_name, "idt_feats_val_traj" + str(traj_len) + ".pkl"), "rb") as fp: features_val = pickle.load(fp) with open( os.path.join(base_name, "idt_snames_val_traj" + str(traj_len) + ".pkl"), "rb") as fp: strokes_name_id_val = pickle.load(fp) print("Create dataframe BOVW validation set...") df_val_hoof, words_val = create_bovw_df(features_val, strokes_name_id_val, \ km_model, base_name, "val") vids_list_val = list(df_val_hoof.index) val_labels = np.array( [labs_values[labs_keys.index(v)] for v in vids_list_val]) # labs_df = pd.DataFrame(labels, index=vids_list, columns=['label']) print("Evaluating on the validation set...") # evaluate(model_mag, df_test_mag, labs_df) # Find maximum permutation accuracy using predicted_label_val and label_val # acc_values, perm_tuples, gt_list, pred_list = calculate_accuracy(val_labels, \ # predicted_label_val) # acc_perc = [sum(k)/len(predicted_label_val) for k in acc_values] # # best_indx = acc_perc.index(max(acc_perc)) # print("Max Acc. : ", max(acc_perc)) # print("Acc values : ", acc_perc) # print("Acc values : ", acc_values) # print("perm_tuples : ", perm_tuples) ########################################################################### # Evaluate the BOW classifier (SVM) confusion_mat = np.zeros((NUM_TOPICS, NUM_TOPICS)) pred = clf.predict(df_val_hoof) correct = 0 for i, true_val in enumerate(val_labels): if pred[i] == true_val: correct += 1 confusion_mat[pred[i], true_val] += 1 print('#' * 30) print("BOW Classification Results:") print("%d/%d Correct" % (correct, len(pred))) print("Accuracy = {} ".format(float(correct) / len(pred))) print("Confusion matrix") print(confusion_mat) return (float(correct) / len(pred))
def main(DATASET, LABELS, CLASS_IDS, BATCH_SIZE, ANNOTATION_FILE, SEQ_SIZE=16, STEP=16, nstrokes=-1, N_EPOCHS=25): ''' Extract sequence features from AutoEncoder. Parameters: ----------- DATASET : str path to the video dataset LABELS : str path containing stroke labels CLASS_IDS : str path to txt file defining classes, similar to THUMOS BATCH_SIZE : int size for batch of clips SEQ_SIZE : int no. of frames in a clip (min. 16 for 3D CNN extraction) STEP : int stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ... partition : str 'all' / 'train' / 'test' / 'val' : Videos to be considered nstrokes : int partial extraction of features (do not execute for entire dataset) Returns: -------- trajectories, stroke_names ''' ########################################################################### attn_utils.seed_everything(1234) if not os.path.isdir(log_path): os.makedirs(log_path) # Read the strokes # Divide the highlight dataset files into training, validation and test sets train_lst, val_lst, test_lst = autoenc_utils.split_dataset_files(DATASET) print("No. of training videos : {}".format(len(train_lst))) features, stroke_names_id = attn_utils.read_feats(feat_path, feat, snames) features2, stroke_names_id2 = attn_utils.read_feats( feat_path2, feat2, snames2) # get matrix of features from dictionary (N, vec_size) vecs, vecs2 = [], [] for key in stroke_names_id: vecs.append(features[key]) vecs2.append(features2[key]) vecs, vecs2 = np.vstack(vecs), np.vstack(vecs2) vecs[np.isnan(vecs)] = 0 vecs[np.isinf(vecs)] = 0 vecs2[np.isnan(vecs2)] = 0 vecs2[np.isinf(vecs2)] = 0 # vecs = traj_utils.apply_PCA(vecs, 10) # vecs2 = traj_utils.apply_PCA(vecs2, 10) # form_lower_dim_dict(features, stroke_names_id, vecs) # form_lower_dim_dict(features2, stroke_names_id2, vecs2) #fc7 layer output size (4096) INP_VEC_SIZE, INP_VEC_SIZE2 = vecs.shape[-1], vecs2.shape[-1] print("INP_VEC_SIZE = {} : INP_VEC_SIZE2 = {}".format( INP_VEC_SIZE, INP_VEC_SIZE2)) km_filepath = os.path.join(log_path, km_filename) # Feats1 if not os.path.isfile(km_filepath + "_C" + str(cluster_size) + ".pkl"): km_model = make_codebook(vecs, cluster_size) #, model_type='gmm') ## # Save to disk, if training is performed print("Writing the KMeans models to disk...") pickle.dump( km_model, open(km_filepath + "_C" + str(cluster_size) + ".pkl", "wb")) else: # Load from disk, for validation and test sets. km_model = pickle.load( open(km_filepath + "_C" + str(cluster_size) + ".pkl", 'rb')) # Feats2 if not os.path.isfile(km_filepath + "_C" + str(cluster_size) + "_2.pkl"): km_model2 = make_codebook(vecs2, cluster_size) #, model_type='gmm') ## # Save to disk, if training is performed print("Writing the KMeans models to disk...") pickle.dump( km_model2, open(km_filepath + "_C" + str(cluster_size) + "_2.pkl", "wb")) else: # Load from disk, for validation and test sets. km_model2 = pickle.load( open(km_filepath + "_C" + str(cluster_size) + "_2.pkl", 'rb')) print("Create numpy one hot representation for train features...") onehot_feats = create_bovw_SA(features, stroke_names_id, km_model) print("Create numpy one hot representation for train features2...") onehot_feats2 = create_bovw_SA(features2, stroke_names_id2, km_model2) ft_path = os.path.join(log_path, "onehot_C" + str(cluster_size) + "_train.pkl") ft_path2 = os.path.join(log_path, "onehot_C" + str(cluster_size) + "_train_2.pkl") with open(ft_path, "wb") as fp: pickle.dump(onehot_feats, fp) with open(ft_path2, "wb") as fp: pickle.dump(onehot_feats2, fp) ########################################################################### features_val, stroke_names_id_val = attn_utils.read_feats( feat_path, feat_val, snames_val) features_val2, stroke_names_id_val2 = attn_utils.read_feats( feat_path2, feat_val2, snames_val2) # # get matrix of features from dictionary (N, vec_size) # vecs, vecs2 = [], [] # for key in stroke_names_id: # vecs.append(features[key]) # vecs2.append(features2[key]) # vecs, vecs2 = np.vstack(vecs), np.vstack(vecs2) # # vecs[np.isnan(vecs)] = 0 # vecs[np.isinf(vecs)] = 0 # vecs2[np.isnan(vecs2)] = 0 # vecs2[np.isinf(vecs2)] = 0 # # form_lower_dim_dict(features, stroke_names_id, vecs) # form_lower_dim_dict(features2, stroke_names_id2, vecs2) print("Create numpy one hot representation for val features...") onehot_feats_val = create_bovw_SA(features_val, stroke_names_id_val, km_model) print("Create numpy one hot representation for val features2...") onehot_feats_val2 = create_bovw_SA(features_val2, stroke_names_id_val2, km_model2) ft_path_val = os.path.join(log_path, "onehot_C" + str(cluster_size) + "_val.pkl") ft_path_val2 = os.path.join(log_path, "onehot_C" + str(cluster_size) + "_val_2.pkl") with open(ft_path_val, "wb") as fp: pickle.dump(onehot_feats_val, fp) with open(ft_path_val2, "wb") as fp: pickle.dump(onehot_feats_val2, fp) ########################################################################### # Create a Dataset # ft_path = os.path.join(base_name, ft_dir, feat) train_dataset = StrokeFeatureSequencesDataset(ft_path, ft_path2, train_lst, DATASET, LABELS, CLASS_IDS, frames_per_clip=SEQ_SIZE, extracted_frames_per_clip=2, step_between_clips=STEP, train=True) # ft_path_val = os.path.join(base_name, ft_dir, feat_val) val_dataset = StrokeFeatureSequencesDataset(ft_path_val, ft_path_val2, val_lst, DATASET, LABELS, CLASS_IDS, frames_per_clip=SEQ_SIZE, extracted_frames_per_clip=2, step_between_clips=STEP, train=False) # get labels labs_keys, labs_values = attn_utils.get_cluster_labels(ANNOTATION_FILE) # created weighted Sampler for class imbalance samples_weight = attn_utils.get_sample_weights(train_dataset, labs_keys, labs_values, train_lst) sampler = WeightedRandomSampler(samples_weight, len(samples_weight)) train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, sampler=sampler, worker_init_fn=np.random.seed(12)) val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False) data_loaders = {"train": train_loader, "test": val_loader} num_classes = len(list(set(labs_values))) # vis_clusters(features, onehot_feats, stroke_names_id, 2, DATASET, log_path) ########################################################################### # load model and set loss function model = attn_model.GRUBoWMultiStreamClassifier(INPUT_SIZE, INPUT_SIZE, HIDDEN_SIZE, HIDDEN_SIZE, num_classes, N_LAYERS, bidirectional) # model = load_weights(base_name, model, N_EPOCHS, "Adam") # for ft in model.parameters(): # ft.requires_grad = False # Setup the loss fxn criterion = nn.CrossEntropyLoss() model = model.to(device) print("Params to learn:") params_to_update = [] for name, param in model.named_parameters(): if param.requires_grad == True: params_to_update.append(param) # print("\t",name) # Observe that all parameters are being optimized optimizer_ft = torch.optim.Adam(model.parameters(), lr=0.001) # optimizer_ft = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9) # Decay LR by a factor of 0.1 every 7 epochs exp_lr_scheduler = StepLR(optimizer_ft, step_size=15, gamma=0.1) ########################################################################### # Training the model start = time.time() model = train_model(features, stroke_names_id, model, data_loaders, criterion, optimizer_ft, exp_lr_scheduler, labs_keys, labs_values, num_epochs=N_EPOCHS) end = time.time() # # # save the best performing model attn_utils.save_model_checkpoint( log_path, model, N_EPOCHS, "S" + str(SEQ_SIZE) + "C" + str(cluster_size) + "_SGD") # Load model checkpoints model = attn_utils.load_weights( log_path, model, N_EPOCHS, "S" + str(SEQ_SIZE) + "C" + str(cluster_size) + "_SGD") print("Total Execution time for {} epoch : {}".format( N_EPOCHS, (end - start))) # ########################################################################### acc = predict(features_val, stroke_names_id_val, model, data_loaders, labs_keys, labs_values, SEQ_SIZE, phase='test') # call count_paramters(model) for displaying total no. of parameters print("#Parameters : {} ".format(autoenc_utils.count_parameters(model))) return acc
def main(DATASET, LABELS, SEQ_SIZE=16, STEP=16, nstrokes=-1): ''' Extract sequence features from AutoEncoder. Parameters: ----------- DATASET : str path to the video dataset LABELS : str path containing stroke labels BATCH_SIZE : int size for batch of clips SEQ_SIZE : int no. of frames in a clip (min. 16 for 3D CNN extraction) STEP : int stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ... partition : str 'all' / 'train' / 'test' / 'val' : Videos to be considered nstrokes : int partial extraction of features (do not execute for entire dataset) Returns: -------- trajectories, stroke_names ''' ########################################################################### # attn_utils.seed_everything(1234) if not os.path.isdir(log_path): os.makedirs(log_path) hmdb51_train = hmdb.HMDB51(DATASET, LABELS, SEQ_SIZE, step_between_clips=STEP, fold=1, train=True, transform=None) hmdb51_test = hmdb.HMDB51(DATASET, LABELS, SEQ_SIZE, step_between_clips=STEP, fold=1, train=False, transform=None) # # Extracting training features # extract_of_features(feat_path, hmdb51_train.video_list, hmdb51_train.fold, hmdb51_train.train) # # Extracting testing features # extract_of_features(feat_path, hmdb51_test.video_list, hmdb51_test.fold, hmdb51_test.train) features, stroke_names_id = read_feats(feat_path, feat, snames) # get matrix of features from dictionary (N, vec_size) vecs = [] for key in sorted(list(features.keys())): vecs.append(features[key]) vecs = np.vstack(vecs) vecs[np.isnan(vecs)] = 0 vecs[np.isinf(vecs)] = 0 #fc7 layer output size (4096) INP_VEC_SIZE = vecs.shape[-1] print("INP_VEC_SIZE = ", INP_VEC_SIZE) km_filepath = os.path.join(log_path, km_filename) # # Uncomment only while training. if not os.path.isfile(km_filepath + "_C" + str(cluster_size) + ".pkl"): km_model = make_codebook(vecs, cluster_size) #, model_type='gmm') ## # Save to disk, if training is performed print("Writing the KMeans models to disk...") pickle.dump( km_model, open(km_filepath + "_C" + str(cluster_size) + ".pkl", "wb")) else: # Load from disk, for validation and test sets. km_model = pickle.load( open(km_filepath + "_C" + str(cluster_size) + ".pkl", 'rb')) df_train, words_train = create_bovw_df(features, stroke_names_id, km_model, log_path, 'train') # read the stroke annotation labels from text file. vids_list = list(df_train.index) labs_keys = hmdb51_train.video_list labs_indx = hmdb51_train.indices labs_values = [hmdb51_train.samples[i][1] for i in labs_indx] train_labels = np.array( [labs_values[labs_keys.index(v)] for v in vids_list]) num_classes = len(list(set(labs_values))) print("Training dataframe formed.") ########################################################################### # Train a classifier on the features. ########################################################################### # Train SVM clf = LinearSVC(verbose=False, random_state=124, max_iter=3000) clf.fit(df_train, train_labels) print("Training Complete.") ########################################################################### #print("Training complete. Saving to disk.") # Save model to disk joblib.dump(clf, os.path.join(log_path, "clf.pkl")) # Load trained model from disk clf = joblib.load(os.path.join(log_path, "clf.pkl")) # Train a classifier on both the features. #print("Training with SVM") #df_train = pd.concat([df_train_mag, df_train_ang], axis=1) #clf_both = SVC(kernel="linear",verbose=True) #clf_both = LinearSVC(verbose=True, random_state=123, max_iter=2000) #clf_both.fit(df_train, labels) #print("Training with SVM (ang)") #clf_ang = SVC(kernel="linear",verbose=True) #clf_ang.fit(df_train_ang, labels) ########################################################################## features_test, stroke_names_id_test = read_feats(feat_path, feat_test, snames_test) print("Create dataframe BOVW validation set...") df_test_hoof, words_test = create_bovw_df(features_test, stroke_names_id_test, \ km_model, log_path, "test") vids_list_test = list(df_test_hoof.index) labs_keys = hmdb51_test.video_list labs_indx = hmdb51_test.indices labs_values = [hmdb51_test.samples[i][1] for i in labs_indx] test_labels = np.array( [labs_values[labs_keys.index(v)] for v in vids_list_test]) ########################################################################### # Evaluate the BOW classifier (SVM) confusion_mat = np.zeros((num_classes, num_classes)) pred = clf.predict(df_test_hoof) correct = 0 for i, true_val in enumerate(test_labels): if pred[i] == true_val: correct += 1 confusion_mat[pred[i], true_val] += 1 print('#' * 30) print("BOW Classification Results:") print("%d/%d Correct" % (correct, len(pred))) print("Accuracy = {} ".format(float(correct) / len(pred))) print("Confusion matrix") print(confusion_mat) return (float(correct) / len(pred))
with open(os.path.join(base_name, "feats.pkl"), "rb") as fp: features = pickle.load(fp) # ##################################################################### # get matrix of features from dictionary (N, vec_size) vecs = [] for key in sorted(list(features.keys())): vecs.append(features[key]) vecs = np.vstack(vecs) #fc7 layer output size (4096) INP_VEC_SIZE = vecs.shape[-1] print("INP_VEC_SIZE = ", INP_VEC_SIZE) km_filepath = os.path.join(base_name, km_filename+".pkl") # # Uncomment only while training. km_model = make_codebook(vecs, cluster_size) # # Save to disk, if training is performed # print("Writing the KMeans models to disk...") pickle.dump(km_model, open(km_filepath, "wb")) # Load from disk, for validation and test sets. km_model = pickle.load(open(km_filepath, 'rb')) ########################################################################### # Form the training dataset for supervised classification # Assign the words (flow frames) to their closest cluster centres and count the # frequency for each document(video). Create IDF bow dataframe by weighting # df_train is (nVids, 50) for magnitude, with index as videonames # print("Create a dataframe for C3D FC7 features...") # df_train_c3d, words_train = create_bovw_c3d_traindf(features, \ # strokes_name_id, km_model, c3dWinSize)
def main(DATASET, LABELS, BATCH_SIZE, SEQ_SIZE=16, STEP=16, nstrokes=-1, N_EPOCHS=25): ''' Extract sequence features from AutoEncoder. Parameters: ----------- DATASET : str path to the video dataset LABELS : str path containing stroke labels BATCH_SIZE : int size for batch of clips SEQ_SIZE : int no. of frames in a clip (min. 16 for 3D CNN extraction) STEP : int stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ... partition : str 'all' / 'train' / 'test' / 'val' : Videos to be considered nstrokes : int partial extraction of features (do not execute for entire dataset) Returns: -------- trajectories, stroke_names ''' ########################################################################### attn_utils.seed_everything(1234) if not os.path.isdir(log_path): os.makedirs(log_path) features, stroke_names_id = attn_utils.read_feats(feat_path, feat, snames) # get matrix of features from dictionary (N, vec_size) vecs = [] for key in sorted(list(features.keys())): vecs.append(features[key]) vecs = np.vstack(vecs) vecs[np.isnan(vecs)] = 0 vecs[np.isinf(vecs)] = 0 #fc7 layer output size (4096) INP_VEC_SIZE = vecs.shape[-1] print("INP_VEC_SIZE = ", INP_VEC_SIZE) km_filepath = os.path.join(log_path, km_filename) # # Uncomment only while training. if not os.path.isfile(km_filepath + "_C" + str(cluster_size) + ".pkl"): km_model = make_codebook(vecs, cluster_size) #, model_type='gmm') ## # Save to disk, if training is performed print("Writing the KMeans models to disk...") pickle.dump( km_model, open(km_filepath + "_C" + str(cluster_size) + ".pkl", "wb")) else: # Load from disk, for validation and test sets. km_model = pickle.load( open(km_filepath + "_C" + str(cluster_size) + ".pkl", 'rb')) print("Create numpy one hot representation for train features...") onehot_feats = create_bovw_onehot(features, stroke_names_id, km_model) ft_path = os.path.join(log_path, "C" + str(cluster_size) + "_train.pkl") # ft_path = os.path.join(feat_path, feat) with open(ft_path, "wb") as fp: pickle.dump(onehot_feats, fp) with open( os.path.join(log_path, "C" + str(cluster_size) + "_snames_train.pkl"), "wb") as fp: pickle.dump(stroke_names_id, fp) ######################################################################### ######################################################################### features_test, stroke_names_id_test = attn_utils.read_feats( feat_path, feat_test, snames_test) print("Create numpy one hot representation for val features...") onehot_feats_test = create_bovw_onehot(features_test, stroke_names_id_test, km_model) ft_path_test = os.path.join(log_path, "C" + str(cluster_size) + "_test.pkl") # ft_path_test = os.path.join(feat_path, feat_test) with open(ft_path_test, "wb") as fp: pickle.dump(onehot_feats_test, fp) with open( os.path.join(log_path, "C" + str(cluster_size) + "_snames_test.pkl"), "wb") as fp: pickle.dump(stroke_names_id_test, fp) ########################################################################### # Create a Dataset train_dataset = hmdb.HMDB51FeatureSequenceDataset( ft_path, DATASET, LABELS, frames_per_clip=SEQ_SIZE, extracted_frames_per_clip=16, step_between_clips=STEP, train=True) test_dataset = hmdb.HMDB51FeatureSequenceDataset( ft_path_test, DATASET, LABELS, frames_per_clip=SEQ_SIZE, extracted_frames_per_clip=16, step_between_clips=STEP, train=False) # display_sizes(train_dataset.video_list) # display_sizes(test_dataset.video_list) # # created weighted Sampler for class imbalance samples_weight = get_hmdb_sample_weights(train_dataset) sampler = WeightedRandomSampler(samples_weight, len(samples_weight)) train_loader = DataLoader( dataset=train_dataset, batch_size=BATCH_SIZE, #shuffle=True) sampler=sampler, worker_init_fn=np.random.seed(12)) test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False) data_loaders = {"train": train_loader, "test": test_loader} # num_classes = len(list(set(labs_values))) num_classes = 51 ########################################################################### # load model and set loss function model = attn_model.GRUBoWHAClassifier(INPUT_SIZE, HIDDEN_SIZE, num_classes, N_LAYERS, bidirectional) # model = load_weights(base_name, model, N_EPOCHS, "Adam") # for ft in model.parameters(): # ft.requires_grad = False # Setup the loss fxn criterion = nn.CrossEntropyLoss() model = model.to(device) # print("Params to learn:") params_to_update = [] for name, param in model.named_parameters(): if param.requires_grad == True: params_to_update.append(param) # print("\t",name) # Observe that all parameters are being optimized optimizer_ft = torch.optim.Adam(model.parameters(), lr=0.001) # optimizer_ft = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9) # Decay LR by a factor of 0.1 every 7 epochs exp_lr_scheduler = StepLR(optimizer_ft, step_size=10, gamma=0.1) ########################################################################### # Training the model start = time.time() model = train_model(model, data_loaders, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=N_EPOCHS) end = time.time() # save the best performing model attn_utils.save_model_checkpoint( log_path, model, N_EPOCHS, "S" + str(SEQ_SIZE) + "C" + str(cluster_size) + "_SGD") # Load model checkpoints model = attn_utils.load_weights( log_path, model, N_EPOCHS, "S" + str(SEQ_SIZE) + "C" + str(cluster_size) + "_SGD") print("Total Execution time for {} epoch : {}".format( N_EPOCHS, (end - start))) ########################################################################### acc = predict(model, data_loaders, SEQ_SIZE, phase='test') # call count_paramters(model) for displaying total no. of parameters print("#Parameters : {} ".format(autoenc_utils.count_parameters(model))) return acc