Exemplo n.º 1
0
def main():
	X, y = loadDataSet()

	kf = KFold(n_splits=10, shuffle=True)
	kf.get_n_splits(X)

	sorted_indices = np.loadtxt('final_sorted_indices.txt', dtype=int)
	
	rLIMIT = 201;

	f1 = [0 for r in range(0, rLIMIT)]

	for train_index, test_index in kf.split(X):
	    	# print("TRAIN:", train_index, "TEST:", test_index)

		X_train, X_test = X[train_index], X[test_index]
		y_train, y_test = y[train_index], y[test_index]


		for r in range(1, rLIMIT):
		    	selected_feature_indices = sorted_indices[:r]
		    	X_train_selected_features = X_train[:, selected_feature_indices]
		    	X_test_selected_features = X_test[:, selected_feature_indices]

    			clf = GaussianNB()
    			y_test_pred = clf.fit(X_train_selected_features, y_train).predict(X_test_selected_features)

    			f1[r] += fscore(y_test, y_test_pred)
    	
	for r in range(1,rLIMIT):
		f1[r] = f1[r]/10
		print(f1[r])
Exemplo n.º 2
0
    def evaluate(self, data):
        valid_y, valid_true = [], []
        for x_true, y_true in data:
            y_pred = self.model.predict(x_true).argmax(axis=1)
            y_true = np.argmax(y_true, axis=1)
            valid_y.extend(y_pred)
            valid_true.extend(y_true)
            # print(len(valid_y))
        valid_y, valid_true = np.array(valid_y), np.array(valid_true)

        right = (valid_true == valid_y).sum()
        acc = right / len(valid_y)
        res = fscore(valid_true, valid_y, average='macro')
        return acc, res[0], res[1], res[2]
Exemplo n.º 3
0
def get_metrics(x, y, num_labels):
    """
    Get F1 Score and accuracy for a predicted and target values.
    
    :param x: np.array
    :param y: np.array
    :param num_labels: number of unique labels in dataset
    :returns (total_f1_score: float, total_accuracy: float)
    """    
    total_f1_score = 0
    total_accuracy = 0
    
    for inp, out in zip(x, y):        
        f1 = fscore(inp, list(out), labels=np.arange(num_labels), average='weighted')
        
        total_f1_score += f1
        total_accuracy += get_accuracy(inp, out)        
        
    return total_f1_score/len(x), total_accuracy/len(x)
def main():
    X, y = loadDataSet()

    kf = KFold(n_splits=10, shuffle=True)
    kf.get_n_splits(X)
    for train_index, test_index in kf.split(X):
        # print("TRAIN:", train_index, "TEST:", test_index)

        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        y_train_index0 = np.where(y_train == 0)[0]
        y_train_index1 = np.where(y_train == 1)[0]

        mean0 = np.mean(X_train[y_train_index0], axis=0)
        var0 = np.var(X_train[y_train_index0], axis=0)

        mean1 = np.mean(X_train[y_train_index1], axis=0)
        var1 = np.var(X_train[y_train_index1], axis=0)
        lgd = computeLGD(mean0, mean1, var0, var1)
        sorted_indices = np.argsort(lgd)

        with open('sorted_indices.txt', 'a+') as file:
            for i in sorted_indices:
                file.write(str(i) + ' ')
            file.write('\n')

        for r in range(1, 201):
            selected_feature_indices = sorted_indices[:r]
            X_train_selected_features = X_train[:, selected_feature_indices]
            X_test_selected_features = X_test[:, selected_feature_indices]

            clf = GaussianNB()
            y_test_pred = clf.fit(X_train_selected_features,
                                  y_train).predict(X_test_selected_features)

            f1 = fscore(y_test, y_test_pred)
            with open('f1score.txt', 'a+') as file:
                file.write(str(f1) + '\n')
def crossValidate(X, y, nfold):
    kf = KFold(n_splits=nfold, shuffle=True)
    kf.get_n_splits(X)

    sorted_indices = np.loadtxt('final_sorted_indices.txt', dtype=int)

    r = 16

    print("K-fold: K=", nfold)
    f1 = 0
    acc = 0
    prec = 0
    rec = 0
    spec = 0
    for train_index, test_index in kf.split(X):
        # print("TRAIN:", train_index, "TEST:", test_index)

        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        selected_feature_indices = sorted_indices[:r]
        X_train_selected_features = X_train[:, selected_feature_indices]
        X_test_selected_features = X_test[:, selected_feature_indices]

        clf = GaussianNB()
        y_pred = clf.fit(X_train_selected_features,
                         y_train).predict(X_test_selected_features)

        f1 += fscore(y_test, y_pred)
        acc += accuracy(y_test, y_pred)
        prec += precision(y_test, y_pred)
        rec += recall(y_test, y_pred)
        spec += specificity(y_test, y_pred)

    print('fscore', f1 / nfold)
    print('accuracy', acc / nfold)
    print('precision', prec / nfold)
    print('recall', rec / nfold)
    print('specificity', spec / nfold)
def find_fs(model):
    predictions = model.predict([test_sent, test_x], batch_size=BATCH_SIZE)[0]
    predictions = predictions.argmax(axis=1)
    fs = fscore(test_y.argmax(axis=1), predictions)
    return fs
def score(preds, targs):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        p, r, f1, _ = fscore(targs, preds, pos_label=None, average='weighted')
    return p, r, f1
Exemplo n.º 8
0
def f_score(true, pred):
    score = fscore(true, pred, average='weighted')
    return score