def run_evaluations(): data = [] test_file = f"output\\nfl\\test\\6.csv" model_file = f"models\\nfl\\6_model.pkl" output_file = "output\\nfl\\html\\testdata.json" model = common.load_model(model_file) _, X, y = common.read_data_from_file(test_file, "home_win", get_feature_headers()) data.append(evaluate.evaluate("6", model, X, y)) dict = {"data": data} with open(output_file, 'w') as summary_file: json.dump(dict, summary_file) groups = common.read_data_grouped(test_file, ['year']) for key in groups: X = groups[key][get_feature_headers()] y = groups[key]["home_win"] accuracy, manual_accuracy = evaluate.calculate_accuracy(model, X, y) print(f"{key}:{accuracy:.2f}")
def evaluate(self): self.models.eval() # generate samples inputs0, inputs1, outputs0, outputs1 = style_transfer( encoder=self.models['encoder'], generator=self.models['generator'], text_path=args.val_text_file_path, n_samples=args.n_samples) # display 10 samples for each print('=' * 30 + '\nnegative -> positive\n' + '=' * 30 + '\n') for original, transfer in zip(inputs0[:10], outputs0[:10]): print(original + ' -> ' + transfer + '\n') print('=' * 30 + '\npositive -> negative\n' + '=' * 30 + '\n') for original, transfer in zip(inputs1[:10], outputs1[:10]): print(original + ' -> ' + transfer + '\n') print("Evaluation from {} samples".format(args.n_samples)) fed = (calculate_frechet_distance(inputs1, outputs0) + calculate_frechet_distance(inputs0, outputs1)) print('FED: {:.4f}'.format(fed)) loss, acc = calculate_accuracy( self.clf, outputs0 + outputs1, torch.cat([torch.ones(len(outputs0)), torch.zeros(len(outputs1))]).long().to(args.device)) print('Loss: {:.4f}'.format(loss.item())) print('Accuracy: {:.4f}\n'.format(acc.item())) return fed, loss.item(), acc.item()
def run_evaluations(model_file: str, model_name: str, data_file: str, feature_columns: List[str], summary_file: str): model = common.load_model(model_file) _, X, y = common.read_data_from_file(data_file, "home_win", feature_columns) eval_results = evaluate.evaluate(f"{model_name}", model, X, y) add_to_json_summary(summary_file, eval_results) accuracy, _ = evaluate.calculate_accuracy(model, X, y) return accuracy
def main(base_name, nbins=10, grid=None, cluster_size=10): # main(base_name, c3dWinSize=16, use_gpu=False): """ Function to extract orientation features and find the directions of strokes, using LDA model/clustering and evaluate on three cluster analysis on highlights. The videos can be visualized by writing trimmed class videos into their respective classes. Parameters: ------ base_name: path to the wts, losses, predictions and log files use_gpu: True if training to be done on GPU, False for CPU """ seed = 1234 print(60 * "#") ##################################################################### # Form dataloaders # train_lst_main_ext = get_main_dataset_files(MAIN_DATASET) #with extensions # train_lst_main = [t.rsplit('.', 1)[0] for t in train_lst_main_ext] # remove the extension # val_lst_main_ext = get_main_dataset_files(VAL_DATASET) # val_lst_main = [t.rsplit('.', 1)[0] for t in val_lst_main_ext] # Divide the samples files into training set, validation and test sets train_lst, val_lst, test_lst = utils.split_dataset_files(DATASET) #print("c3dWinSize : {}".format(c3dWinSize)) # form the names of the list of label files, should be at destination train_lab = [f + ".json" for f in train_lst] val_lab = [f + ".json" for f in val_lst] test_lab = [f + ".json" for f in test_lst] # train_lab_main = [f+".json" for f in train_lst_main] # val_lab_main = [f+".json" for f in val_lst_main] # get complete path lists of label files tr_labs = [os.path.join(LABELS, f) for f in train_lab] val_labs = [os.path.join(LABELS, f) for f in val_lab] # tr_labs_main = [os.path.join(MAIN_LABELS, f) for f in train_lab_main] # val_labs_main = [os.path.join(VAL_LABELS, f) for f in val_lab_main] ##################################################################### sizes = [ utils.getNFrames(os.path.join(DATASET, f + ".avi")) for f in train_lst ] val_sizes = [ utils.getNFrames(os.path.join(DATASET, f + ".avi")) for f in val_lst ] # sizes_main = [utils.getNFrames(os.path.join(MAIN_DATASET, f)) for f in train_lst_main_ext] # val_sizes_main = [utils.getNFrames(os.path.join(VAL_DATASET, f)) for f in val_lst_main_ext] ########################################################################### # Merge the training highlights and main dataset variables # train_lab.extend(train_lab_main) # tr_labs.extend(tr_labs_main) # sizes.extend(sizes_main) print("No. of training videos : {}".format(len(train_lst))) print("Size : {}".format(sizes)) # hlDataset = VideoDataset(tr_labs, sizes, seq_size=SEQ_SIZE, is_train_set = True) # print(hlDataset.__len__()) ##################################################################### # Feature Extraction : (GRID OF / HOOF / 2D CNN / 3DCNN / IDT) # Get feats for only the training videos. Get ordered histograms of freq if grid is not None: print("GRID : {}, nClusters : {} ".format(grid, cluster_size)) else: print("mth : {}, nBins : {}, nClusters : {}".format( mth, nbins, cluster_size)) ##################################################################### # read into dictionary {vidname: np array, ...} print("Loading features from disk...") #features = utils.readAllPartitionFeatures(c3dFC7FeatsPath, train_lst) # mainFeatures = utils.readAllPartitionFeatures(c3dFC7MainFeatsPath, train_lst_main) # features.update(mainFeatures) # Merge dicts # get Nx4096 numpy matrix with columns as features and rows as window placement features # features, strokes_name_id = select_trimmed_feats(c3dFC7FeatsPath, LABELS, \ # train_lst, c3dWinSize) if not os.path.exists(base_name): os.makedirs(base_name) # # Extract Grid OF / HOOF features {mth = 2, and vary nbins} features, strokes_name_id = extract_stroke_feats(DATASET, LABELS, train_lst, \ nbins, mth, True, grid) # BATCH_SIZE, SEQ_SIZE, STEP = 16, 16, 1 # features, strokes_name_id = extract_feats(DATASET, LABELS, CLASS_IDS, BATCH_SIZE, # SEQ_SIZE, STEP, extractor='3dcnn', # part='train') with open( os.path.join(base_name, "of_feats_grid" + str(grid) + ".pkl"), "wb") as fp: pickle.dump(features, fp) with open( os.path.join(base_name, "of_snames_grid" + str(grid) + ".pkl"), "wb") as fp: pickle.dump(strokes_name_id, fp) with open(os.path.join(base_name, "of_feats_grid" + str(grid) + ".pkl"), "rb") as fp: features = pickle.load(fp) with open(os.path.join(base_name, "of_snames_grid" + str(grid) + ".pkl"), "rb") as fp: strokes_name_id = pickle.load(fp) ##################################################################### # get matrix of features from dictionary (N, vec_size) vecs = [] for key in sorted(list(features.keys())): vecs.append(features[key]) vecs = np.vstack(vecs) vecs[np.isnan(vecs)] = 0 vecs[np.isinf(vecs)] = 0 #fc7 layer output size (4096) INP_VEC_SIZE = vecs.shape[-1] print("INP_VEC_SIZE = ", INP_VEC_SIZE) km_filepath = os.path.join(base_name, km_filename) # # Uncomment only while training. if not os.path.isfile(km_filepath + "_C" + str(cluster_size) + ".pkl"): km_model = make_codebook(vecs, cluster_size) #, model_type='gmm') ## # Save to disk, if training is performed print("Writing the KMeans models to disk...") pickle.dump( km_model, open(km_filepath + "_C" + str(cluster_size) + ".pkl", "wb")) else: # Load from disk, for validation and test sets. km_model = pickle.load( open(km_filepath + "_C" + str(cluster_size) + ".pkl", 'rb')) ########################################################################### # Form the training dataset for supervised classification # Assign the words (flow frames) to their closest cluster centres and count the # frequency for each document(video). Create IDF bow dataframe by weighting # df_train is (nVids, 50) for magnitude, with index as videonames # print("Create a dataframe for C3D FC7 features...") # df_train_c3d, words_train = create_bovw_c3d_traindf(features, \ # strokes_name_id, km_model, c3dWinSize) print("Create a dataframe for HOOF features...") df_train, words_train = create_bovw_df(features, strokes_name_id, km_model,\ base_name, "train") # read the stroke annotation labels from text file. vids_list = list(df_train.index) labs_keys, labs_values = get_cluster_labels(ANNOTATION_FILE) if min(labs_values) == 1: labs_values = [l - 1 for l in labs_values] labs_keys = [k.replace('.avi', '') for k in labs_keys] train_labels = np.array( [labs_values[labs_keys.index(v)] for v in vids_list]) ########################################################################### # apply_clustering(df_train, DATASET, LABELS, ANNOTATION_FILE, base_name) ########################################################################### # print("Training stroke labels : ") # print(train_labels) # print(train_labels.shape) # concat dataframe to contain features and corresponding labels #df_train = pd.concat([df_train_mag, labs_df], axis=1) ########################################################################### # Train SVM clf = LinearSVC(verbose=False, random_state=124, max_iter=3000) clf.fit(df_train, train_labels) print("Training Complete.") ########################################################################### # # Train a classifier on the features. print("LDA execution !!! ") # #Run LDA # # # Get list of lists. Each sublist contains video cluster strIDs (words). # # Eg. [["39","29","39","39","0", ...], ...] doc_clean = [doc.split() for doc in words_train] #print(doc_clean) diction = corpora.Dictionary(doc_clean) # Form a dictionary print("printing dictionary after corp {} ".format(diction)) doc_term_matrix = [diction.doc2bow(doc) for doc in doc_clean] #dictionary = corpora.Dictionary(diction) # Inference using the data. ldamodel_obj = gensim.models.ldamodel.LdaModel(doc_term_matrix, \ num_topics = NUM_TOPICS, id2word=diction, passes=10, \ random_state=seed) # ldamodel_obj = gensim.models.ldaseqmodel.LdaSeqModel(doc_term_matrix, \ # num_topics=3, time_slice=[351]) # ldamodel_obj = gensim.models.LsiModel(doc_term_matrix, num_topics=3, \ # id2word = diction) # print("training complete saving to disk ") ## #save model to disk # joblib.dump(ldamodel_obj, os.path.join(lda_base, mnb_modelname+".pkl")) ## ## # Load trained model from disk # ldamodel_obj = joblib.load(os.path.join(lda_base, mnb_modelname+".pkl")) # Print all the topics for i, topic in enumerate( ldamodel_obj.print_topics(num_topics=3, num_words=10)): #print("topic is {}".format(topic)) word = topic[1].split("+") print("{} : {} ".format(topic[0], word)) # actions are rows and discovered topics are columns topic_action_map = np.zeros((real_topic, NUM_TOPICS)) predicted_labels = [] #vids_list = list(df_train_mag.index) for j, vname in enumerate(vids_list): label_vid = train_labels[j] # sort the tuples with descending topic probabilities for index, score in sorted(ldamodel_obj[doc_term_matrix[j]], key=lambda tup: -1 * tup[1]): # for index in [ldamodel_obj[doc_term_matrix[j]].argmax(axis=0)]: # print("Score is : {} of Topic: {}".format(score,index)) #if score>0.5: # topic_action_map[label_vid][index]+=1 # score = ldamodel_obj[doc_term_matrix[j]][index] topic_action_map[label_vid][index] += score predicted_labels.append(index) break print("Training Time : topic action mapping is : ") print("topic0 topic1 topic2") #coloumn are topics and rows are labels print(topic_action_map) acc_values_tr, perm_tuples_tr, gt_list, pred_list = calculate_accuracy(train_labels,\ predicted_labels) acc_perc = [sum(k) / len(predicted_labels) for k in acc_values_tr] best_indx = acc_perc.index(max(acc_perc)) print("Max Acc. : ", max(acc_perc)) print("Acc values : ", acc_perc) print("Acc values : ", acc_values_tr) print("perm_tuples : ", perm_tuples_tr) #model_ang = joblib.load(os.path.join(destpath, mnb_modelname+"_ang.pkl")) ################################################################################## # Evaluation on validation set print("Validation phase ....") if not os.path.isfile( os.path.join(base_name, "of_feats_val_grid" + str(grid) + ".pkl")): # features_val, strokes_name_id_val = select_trimmed_feats(c3dFC7FeatsPath, \ # LABELS, val_lst, c3dWinSize) features_val, strokes_name_id_val = extract_stroke_feats(DATASET, LABELS, test_lst, \ nbins, mth, True, grid) # features_val, strokes_name_id_val = extract_feats(DATASET, LABELS, CLASS_IDS, BATCH_SIZE, # SEQ_SIZE, STEP, extractor='3dcnn', # part='val') with open( os.path.join(base_name, "of_feats_val_grid" + str(grid) + ".pkl"), "wb") as fp: pickle.dump(features_val, fp) with open( os.path.join(base_name, "of_snames_val_grid" + str(grid) + ".pkl"), "wb") as fp: pickle.dump(strokes_name_id_val, fp) else: with open( os.path.join(base_name, "of_feats_val_grid" + str(grid) + ".pkl"), "rb") as fp: features_val = pickle.load(fp) with open( os.path.join(base_name, "of_snames_val_grid" + str(grid) + ".pkl"), "rb") as fp: strokes_name_id_val = pickle.load(fp) print("Create dataframe BOVW validation set...") df_val_hoof, words_val = create_bovw_df(features_val, strokes_name_id_val, \ km_model, base_name, "val") vids_list_val = list(df_val_hoof.index) val_labels = np.array( [labs_values[labs_keys.index(v)] for v in vids_list_val]) topic_action_map_val = np.zeros((real_topic, NUM_TOPICS)) doc_clean_val = [doc.split() for doc in words_val] # Creating Dictionary for val set words diction_val = corpora.Dictionary(doc_clean_val) doc_term_matrix_val = [diction_val.doc2bow(doc) for doc in doc_clean_val] predicted_label_val = [] for j, vname in enumerate(vids_list_val): label_vid = val_labels[j] for index, score in sorted(ldamodel_obj[doc_term_matrix_val[j]], key=lambda tup: -1 * tup[1]): # for index in [ldamodel_obj[doc_term_matrix[j]].argmax(axis=0)]: # score = ldamodel_obj[doc_term_matrix[j]][index] # print("Score is : {} of Topic: {}".format(score,index)) #if score>0.5: # topic_action_map_val[label_vid][index]+=1 topic_action_map_val[label_vid][index] += score predicted_label_val.append(index) break print(topic_action_map_val) labs_df = pd.DataFrame(labels, index=vids_list, columns=['label']) # print("Evaluating on the validation set...") evaluate(model_mag, df_test_mag, labs_df) # Find maximum permutation accuracy using predicted_label_val and label_val acc_values, perm_tuples, gt_list, pred_list = calculate_accuracy(val_labels, \ predicted_label_val) acc_perc = [sum(k) / len(predicted_label_val) for k in acc_values] best_indx = acc_perc.index(max(acc_perc)) print("Max Acc. : ", max(acc_perc)) print("Acc values : ", acc_perc) print("Acc values : ", acc_values) print("perm_tuples : ", perm_tuples) ########################################################################### # Evaluate the BOW classifier (SVM) confusion_mat = np.zeros((NUM_TOPICS, NUM_TOPICS)) pred = clf.predict(df_val_hoof) correct = 0 for i, true_val in enumerate(val_labels): if pred[i] == true_val: correct += 1 confusion_mat[pred[i], true_val] += 1 print('#' * 30) print("BOW Classification Results:") print("%d/%d Correct" % (correct, len(pred))) print("Accuracy = {} ".format(float(correct) / len(pred))) print("Confusion matrix") print(confusion_mat) return (float(correct) / len(pred))
#vids_list = list(df_train_mag.index) for j,vname in enumerate(vids_list): label_vid = train_labels[j] # sort the tuples with descending topic probabilities for index, score in sorted(ldamodel_obj[doc_term_matrix[j]], key=lambda tup: -1*tup[1]): # print("Score is : {} of Topic: {}".format(score,index)) #if score>0.5: # topic_action_map[label_vid][index]+=1 topic_action_map[label_vid][index]+=score predicted_labels.append(index) break print("topic action mapping is : ") print("topic0 topic1 topic2") #coloumn are topics and rows are labels print(topic_action_map) acc_values_tr, perm_tuples_tr, gt_list, pred_list = calculate_accuracy(train_labels, predicted_labels) acc_perc = [sum(k)/len(predicted_labels) for k in acc_values_tr] best_indx = acc_perc.index(max(acc_perc)) print("Max Acc. : ", max(acc_perc)) print("Acc values : ", acc_perc) print("Acc values : ", acc_values_tr) print("perm_tuples : ", perm_tuples_tr) #model_ang = joblib.load(os.path.join(destpath, mnb_modelname+"_ang.pkl")) #####################################################################################################################################1 # Evaluation on validation set #bgthresh = 0 #target_file = os.path.join(base_name, flow_filename+"_val_BG"+str(bgthresh)+".pkl")