示例#1
0
def run_evaluations():
	data = []

	test_file = f"output\\nfl\\test\\6.csv"
	model_file = f"models\\nfl\\6_model.pkl"
	output_file = "output\\nfl\\html\\testdata.json"

	model = common.load_model(model_file)
	_, X, y = common.read_data_from_file(test_file, "home_win", get_feature_headers())

	data.append(evaluate.evaluate("6", model, X, y))

	dict = {"data": data}

	with open(output_file, 'w') as summary_file:
		json.dump(dict, summary_file)

	groups = common.read_data_grouped(test_file, ['year'])

	for key in groups:

		X = groups[key][get_feature_headers()]
		y = groups[key]["home_win"]
		
		accuracy, manual_accuracy = evaluate.calculate_accuracy(model, X, y)

		print(f"{key}:{accuracy:.2f}")
示例#2
0
    def evaluate(self):
        self.models.eval()
        # generate samples
        inputs0, inputs1, outputs0, outputs1 = style_transfer(
            encoder=self.models['encoder'],
            generator=self.models['generator'],
            text_path=args.val_text_file_path,
            n_samples=args.n_samples)

        # display 10 samples for each
        print('=' * 30 + '\nnegative -> positive\n' + '=' * 30 + '\n')
        for original, transfer in zip(inputs0[:10], outputs0[:10]):
            print(original + ' -> ' + transfer + '\n')
        print('=' * 30 + '\npositive -> negative\n' + '=' * 30 + '\n')
        for original, transfer in zip(inputs1[:10], outputs1[:10]):
            print(original + ' -> ' + transfer + '\n')

        print("Evaluation from {} samples".format(args.n_samples))
        fed = (calculate_frechet_distance(inputs1, outputs0) +
               calculate_frechet_distance(inputs0, outputs1))
        print('FED: {:.4f}'.format(fed))

        loss, acc = calculate_accuracy(
            self.clf, outputs0 + outputs1,
            torch.cat([torch.ones(len(outputs0)),
                       torch.zeros(len(outputs1))]).long().to(args.device))
        print('Loss: {:.4f}'.format(loss.item()))
        print('Accuracy: {:.4f}\n'.format(acc.item()))
        return fed, loss.item(), acc.item()
示例#3
0
def run_evaluations(model_file: str, model_name: str, data_file: str,
                    feature_columns: List[str], summary_file: str):

    model = common.load_model(model_file)

    _, X, y = common.read_data_from_file(data_file, "home_win",
                                         feature_columns)

    eval_results = evaluate.evaluate(f"{model_name}", model, X, y)

    add_to_json_summary(summary_file, eval_results)

    accuracy, _ = evaluate.calculate_accuracy(model, X, y)

    return accuracy
def main(base_name,
         nbins=10,
         grid=None,
         cluster_size=10):  # main(base_name, c3dWinSize=16, use_gpu=False):
    """
    Function to extract orientation features and find the directions of strokes, 
    using LDA model/clustering and evaluate on three cluster analysis on highlights.
    The videos can be visualized by writing trimmed class videos into their respective
    classes.
    
    Parameters:
    ------
    
    base_name: path to the wts, losses, predictions and log files
    use_gpu: True if training to be done on GPU, False for CPU
    
    """
    seed = 1234

    print(60 * "#")

    #####################################################################

    # Form dataloaders
    #    train_lst_main_ext = get_main_dataset_files(MAIN_DATASET)   #with extensions
    #    train_lst_main = [t.rsplit('.', 1)[0] for t in train_lst_main_ext]   # remove the extension
    #    val_lst_main_ext = get_main_dataset_files(VAL_DATASET)
    #    val_lst_main = [t.rsplit('.', 1)[0] for t in val_lst_main_ext]

    # Divide the samples files into training set, validation and test sets
    train_lst, val_lst, test_lst = utils.split_dataset_files(DATASET)
    #print("c3dWinSize : {}".format(c3dWinSize))

    # form the names of the list of label files, should be at destination
    train_lab = [f + ".json" for f in train_lst]
    val_lab = [f + ".json" for f in val_lst]
    test_lab = [f + ".json" for f in test_lst]
    #    train_lab_main = [f+".json" for f in train_lst_main]
    #    val_lab_main = [f+".json" for f in val_lst_main]

    # get complete path lists of label files
    tr_labs = [os.path.join(LABELS, f) for f in train_lab]
    val_labs = [os.path.join(LABELS, f) for f in val_lab]
    #    tr_labs_main = [os.path.join(MAIN_LABELS, f) for f in train_lab_main]
    #    val_labs_main = [os.path.join(VAL_LABELS, f) for f in val_lab_main]
    #####################################################################

    sizes = [
        utils.getNFrames(os.path.join(DATASET, f + ".avi")) for f in train_lst
    ]
    val_sizes = [
        utils.getNFrames(os.path.join(DATASET, f + ".avi")) for f in val_lst
    ]
    #    sizes_main = [utils.getNFrames(os.path.join(MAIN_DATASET, f)) for f in train_lst_main_ext]
    #    val_sizes_main = [utils.getNFrames(os.path.join(VAL_DATASET, f)) for f in val_lst_main_ext]

    ###########################################################################
    # Merge the training highlights and main dataset variables
    #    train_lab.extend(train_lab_main)
    #    tr_labs.extend(tr_labs_main)
    #    sizes.extend(sizes_main)

    print("No. of training videos : {}".format(len(train_lst)))

    print("Size : {}".format(sizes))
    #    hlDataset = VideoDataset(tr_labs, sizes, seq_size=SEQ_SIZE, is_train_set = True)
    #    print(hlDataset.__len__())

    #####################################################################

    # Feature Extraction : (GRID OF / HOOF / 2D CNN / 3DCNN / IDT)

    # Get feats for only the training videos. Get ordered histograms of freq
    if grid is not None:
        print("GRID : {}, nClusters : {} ".format(grid, cluster_size))
    else:
        print("mth : {}, nBins : {}, nClusters : {}".format(
            mth, nbins, cluster_size))

    #####################################################################
    # read into dictionary {vidname: np array, ...}
    print("Loading features from disk...")
    #features = utils.readAllPartitionFeatures(c3dFC7FeatsPath, train_lst)
    #    mainFeatures = utils.readAllPartitionFeatures(c3dFC7MainFeatsPath, train_lst_main)
    #    features.update(mainFeatures)     # Merge dicts
    # get Nx4096 numpy matrix with columns as features and rows as window placement features
    #    features, strokes_name_id = select_trimmed_feats(c3dFC7FeatsPath, LABELS, \
    #                                    train_lst, c3dWinSize)
    if not os.path.exists(base_name):
        os.makedirs(base_name)
        #    # Extract Grid OF / HOOF features {mth = 2, and vary nbins}
        features, strokes_name_id = extract_stroke_feats(DATASET, LABELS, train_lst, \
                                                     nbins, mth, True, grid)

        #        BATCH_SIZE, SEQ_SIZE, STEP = 16, 16, 1
        #        features, strokes_name_id = extract_feats(DATASET, LABELS, CLASS_IDS, BATCH_SIZE,
        #                                                  SEQ_SIZE, STEP, extractor='3dcnn',
        #                                                  part='train')
        with open(
                os.path.join(base_name, "of_feats_grid" + str(grid) + ".pkl"),
                "wb") as fp:
            pickle.dump(features, fp)
        with open(
                os.path.join(base_name, "of_snames_grid" + str(grid) + ".pkl"),
                "wb") as fp:
            pickle.dump(strokes_name_id, fp)

    with open(os.path.join(base_name, "of_feats_grid" + str(grid) + ".pkl"),
              "rb") as fp:
        features = pickle.load(fp)
    with open(os.path.join(base_name, "of_snames_grid" + str(grid) + ".pkl"),
              "rb") as fp:
        strokes_name_id = pickle.load(fp)

    #####################################################################
    # get matrix of features from dictionary (N, vec_size)
    vecs = []
    for key in sorted(list(features.keys())):
        vecs.append(features[key])
    vecs = np.vstack(vecs)

    vecs[np.isnan(vecs)] = 0
    vecs[np.isinf(vecs)] = 0

    #fc7 layer output size (4096)
    INP_VEC_SIZE = vecs.shape[-1]
    print("INP_VEC_SIZE = ", INP_VEC_SIZE)

    km_filepath = os.path.join(base_name, km_filename)
    #    # Uncomment only while training.
    if not os.path.isfile(km_filepath + "_C" + str(cluster_size) + ".pkl"):
        km_model = make_codebook(vecs, cluster_size)  #, model_type='gmm')
        ##    # Save to disk, if training is performed
        print("Writing the KMeans models to disk...")
        pickle.dump(
            km_model,
            open(km_filepath + "_C" + str(cluster_size) + ".pkl", "wb"))
    else:
        # Load from disk, for validation and test sets.
        km_model = pickle.load(
            open(km_filepath + "_C" + str(cluster_size) + ".pkl", 'rb'))

    ###########################################################################
    # Form the training dataset for supervised classification
    # Assign the words (flow frames) to their closest cluster centres and count the
    # frequency for each document(video). Create IDF bow dataframe by weighting
    # df_train is (nVids, 50) for magnitude, with index as videonames


#    print("Create a dataframe for C3D FC7 features...")
#    df_train_c3d, words_train = create_bovw_c3d_traindf(features, \
#                                strokes_name_id, km_model, c3dWinSize)

    print("Create a dataframe for HOOF features...")
    df_train, words_train = create_bovw_df(features, strokes_name_id, km_model,\
                                                base_name, "train")

    # read the stroke annotation labels from text file.
    vids_list = list(df_train.index)
    labs_keys, labs_values = get_cluster_labels(ANNOTATION_FILE)
    if min(labs_values) == 1:
        labs_values = [l - 1 for l in labs_values]
        labs_keys = [k.replace('.avi', '') for k in labs_keys]
    train_labels = np.array(
        [labs_values[labs_keys.index(v)] for v in vids_list])

    ###########################################################################

    #    apply_clustering(df_train, DATASET, LABELS, ANNOTATION_FILE, base_name)

    ###########################################################################

    #    print("Training stroke labels : ")
    #    print(train_labels)
    #    print(train_labels.shape)

    # concat dataframe to contain features and corresponding labels
    #df_train = pd.concat([df_train_mag, labs_df], axis=1)

    ###########################################################################
    # Train SVM
    clf = LinearSVC(verbose=False, random_state=124, max_iter=3000)
    clf.fit(df_train, train_labels)

    print("Training Complete.")
    ###########################################################################
    #    # Train a classifier on the features.
    print("LDA execution !!! ")
    #    #Run LDA
    #
    #    # Get list of lists. Each sublist contains video cluster strIDs (words).
    #    # Eg. [["39","29","39","39","0", ...], ...]
    doc_clean = [doc.split() for doc in words_train]
    #print(doc_clean)
    diction = corpora.Dictionary(doc_clean)  # Form a dictionary
    print("printing dictionary after corp  {} ".format(diction))
    doc_term_matrix = [diction.doc2bow(doc) for doc in doc_clean]
    #dictionary = corpora.Dictionary(diction)

    # Inference using the data.
    ldamodel_obj = gensim.models.ldamodel.LdaModel(doc_term_matrix, \
                    num_topics = NUM_TOPICS, id2word=diction, passes=10, \
                    random_state=seed)
    #    ldamodel_obj = gensim.models.ldaseqmodel.LdaSeqModel(doc_term_matrix, \
    #                                        num_topics=3, time_slice=[351])
    #    ldamodel_obj = gensim.models.LsiModel(doc_term_matrix, num_topics=3, \
    #                                          id2word = diction)

    #    print("training complete saving to disk ")
    ##    #save model to disk
    #    joblib.dump(ldamodel_obj, os.path.join(lda_base, mnb_modelname+".pkl"))
    ##
    ##    # Load trained model from disk
    #    ldamodel_obj = joblib.load(os.path.join(lda_base, mnb_modelname+".pkl"))

    # Print all the topics
    for i, topic in enumerate(
            ldamodel_obj.print_topics(num_topics=3, num_words=10)):
        #print("topic is {}".format(topic))
        word = topic[1].split("+")
        print("{} : {} ".format(topic[0], word))

    # actions are rows and discovered topics are columns
    topic_action_map = np.zeros((real_topic, NUM_TOPICS))

    predicted_labels = []
    #vids_list = list(df_train_mag.index)
    for j, vname in enumerate(vids_list):
        label_vid = train_labels[j]
        # sort the tuples with descending topic probabilities
        for index, score in sorted(ldamodel_obj[doc_term_matrix[j]],
                                   key=lambda tup: -1 * tup[1]):
            #        for index in [ldamodel_obj[doc_term_matrix[j]].argmax(axis=0)]:
            #   print("Score is : {} of Topic: {}".format(score,index))
            #if score>0.5:
            #    topic_action_map[label_vid][index]+=1
            #            score = ldamodel_obj[doc_term_matrix[j]][index]
            topic_action_map[label_vid][index] += score
            predicted_labels.append(index)
            break
    print("Training Time : topic action mapping is : ")
    print("topic0  topic1  topic2")
    #coloumn are topics and rows are labels
    print(topic_action_map)
    acc_values_tr, perm_tuples_tr, gt_list, pred_list = calculate_accuracy(train_labels,\
                                                            predicted_labels)
    acc_perc = [sum(k) / len(predicted_labels) for k in acc_values_tr]

    best_indx = acc_perc.index(max(acc_perc))
    print("Max Acc. : ", max(acc_perc))
    print("Acc values : ", acc_perc)
    print("Acc values : ", acc_values_tr)
    print("perm_tuples : ", perm_tuples_tr)

    #model_ang = joblib.load(os.path.join(destpath, mnb_modelname+"_ang.pkl"))
    ##################################################################################

    # Evaluation on validation set
    print("Validation phase ....")

    if not os.path.isfile(
            os.path.join(base_name, "of_feats_val_grid" + str(grid) + ".pkl")):

        #        features_val, strokes_name_id_val = select_trimmed_feats(c3dFC7FeatsPath, \
        #                                                LABELS, val_lst, c3dWinSize)
        features_val, strokes_name_id_val = extract_stroke_feats(DATASET, LABELS, test_lst, \
                                                         nbins, mth, True, grid)
        #        features_val, strokes_name_id_val = extract_feats(DATASET, LABELS, CLASS_IDS, BATCH_SIZE,
        #                                                  SEQ_SIZE, STEP, extractor='3dcnn',
        #                                                  part='val')
        with open(
                os.path.join(base_name,
                             "of_feats_val_grid" + str(grid) + ".pkl"),
                "wb") as fp:
            pickle.dump(features_val, fp)
        with open(
                os.path.join(base_name,
                             "of_snames_val_grid" + str(grid) + ".pkl"),
                "wb") as fp:
            pickle.dump(strokes_name_id_val, fp)
    else:
        with open(
                os.path.join(base_name,
                             "of_feats_val_grid" + str(grid) + ".pkl"),
                "rb") as fp:
            features_val = pickle.load(fp)
        with open(
                os.path.join(base_name,
                             "of_snames_val_grid" + str(grid) + ".pkl"),
                "rb") as fp:
            strokes_name_id_val = pickle.load(fp)

    print("Create dataframe BOVW validation set...")
    df_val_hoof, words_val = create_bovw_df(features_val, strokes_name_id_val, \
                                            km_model, base_name, "val")

    vids_list_val = list(df_val_hoof.index)
    val_labels = np.array(
        [labs_values[labs_keys.index(v)] for v in vids_list_val])

    topic_action_map_val = np.zeros((real_topic, NUM_TOPICS))
    doc_clean_val = [doc.split() for doc in words_val]
    # Creating Dictionary for val set words
    diction_val = corpora.Dictionary(doc_clean_val)

    doc_term_matrix_val = [diction_val.doc2bow(doc) for doc in doc_clean_val]
    predicted_label_val = []
    for j, vname in enumerate(vids_list_val):
        label_vid = val_labels[j]
        for index, score in sorted(ldamodel_obj[doc_term_matrix_val[j]],
                                   key=lambda tup: -1 * tup[1]):
            #        for index in [ldamodel_obj[doc_term_matrix[j]].argmax(axis=0)]:
            #            score = ldamodel_obj[doc_term_matrix[j]][index]
            #   print("Score is : {} of Topic: {}".format(score,index))
            #if score>0.5:
            #    topic_action_map_val[label_vid][index]+=1
            topic_action_map_val[label_vid][index] += score
            predicted_label_val.append(index)
            break

    print(topic_action_map_val)

    labs_df = pd.DataFrame(labels, index=vids_list, columns=['label'])
    #
    print("Evaluating on the validation set...")
    evaluate(model_mag, df_test_mag, labs_df)

    # Find maximum permutation accuracy using predicted_label_val and label_val
    acc_values, perm_tuples, gt_list, pred_list = calculate_accuracy(val_labels, \
                                                        predicted_label_val)
    acc_perc = [sum(k) / len(predicted_label_val) for k in acc_values]

    best_indx = acc_perc.index(max(acc_perc))
    print("Max Acc. : ", max(acc_perc))
    print("Acc values : ", acc_perc)
    print("Acc values : ", acc_values)
    print("perm_tuples : ", perm_tuples)

    ###########################################################################
    # Evaluate the BOW classifier (SVM)
    confusion_mat = np.zeros((NUM_TOPICS, NUM_TOPICS))
    pred = clf.predict(df_val_hoof)
    correct = 0
    for i, true_val in enumerate(val_labels):
        if pred[i] == true_val:
            correct += 1
        confusion_mat[pred[i], true_val] += 1
    print('#' * 30)
    print("BOW Classification Results:")
    print("%d/%d Correct" % (correct, len(pred)))
    print("Accuracy = {} ".format(float(correct) / len(pred)))
    print("Confusion matrix")
    print(confusion_mat)
    return (float(correct) / len(pred))
示例#5
0
    #vids_list = list(df_train_mag.index)
    for j,vname in enumerate(vids_list):
        label_vid = train_labels[j]
        # sort the tuples with descending topic probabilities
        for index, score in sorted(ldamodel_obj[doc_term_matrix[j]], key=lambda tup: -1*tup[1]):
         #   print("Score is : {} of Topic: {}".format(score,index))
            #if score>0.5:
            #    topic_action_map[label_vid][index]+=1
            topic_action_map[label_vid][index]+=score
            predicted_labels.append(index)  
            break
    print("topic action mapping is : ")
    print("topic0  topic1  topic2")
    #coloumn are topics and rows are labels
    print(topic_action_map)
    acc_values_tr, perm_tuples_tr, gt_list, pred_list = calculate_accuracy(train_labels, predicted_labels)
    acc_perc = [sum(k)/len(predicted_labels) for k in acc_values_tr]
    
    best_indx = acc_perc.index(max(acc_perc))
    print("Max Acc. : ", max(acc_perc))
    print("Acc values : ", acc_perc)
    print("Acc values : ", acc_values_tr)
    print("perm_tuples : ", perm_tuples_tr)
    
    #model_ang = joblib.load(os.path.join(destpath, mnb_modelname+"_ang.pkl"))
#####################################################################################################################################1

    # Evaluation on validation set
    
    #bgthresh = 0
    #target_file = os.path.join(base_name, flow_filename+"_val_BG"+str(bgthresh)+".pkl")