Exemplo n.º 1
0
def get_acc_auc_kfold(X,Y,k=5):
	#TODO:First get the train indices and test indices for each iteration
	#Then train the classifier accordingly
	#Report the mean accuracy and mean auc of all the folds
	a=KFold(len(Y),k)
	acc=[]
	auc=[]
	for train_index, test_index in a:
		X_train, X_test = X[train_index], X[test_index]
		Y_train, Y_test = Y[train_index], Y[test_index]
		Y_pred=models.logistic_regression_pred(X_train, Y_train, X_test)
		'''
		false_positive_rate, true_positive_rate, thresholds = roc_curve(Y_test, Y_pred)
		roc_auc = sklearn.metrics.roc_auc_score(Y_test, Y_pred)
		plt.title('Receiver Operating Characteristic')
		plt.plot(false_positive_rate, true_positive_rate, 'b',label='AUC = %0.2f'% roc_auc)
		plt.legend(loc='lower right')
		plt.plot([0,1],[0,1],'r--')
		plt.xlim([-0.1,1.2])
		plt.ylim([-0.1,1.2])
		plt.ylabel('True Positive Rate')
		plt.xlabel('False Positive Rate')
		plt.show()
		'''
		
		acc_1=sklearn.metrics.accuracy_score(Y_test, Y_pred)
		auc_1=sklearn.metrics.roc_auc_score(Y_test, Y_pred)
		acc.append(acc_1)
		auc.append(auc_1)
	acc_mean=mean(acc)
	auc_mean=mean(auc)
	
	return acc_mean,auc_mean
Exemplo n.º 2
0
def get_acc_auc_kfold(X, Y, k=5):
    #TODO:First get the train indices and test indices for each iteration
    #Then train the classifier accordingly
    #Report the mean accuracy and mean auc of all the folds
    kf = KFold(len(Y), n_folds=k, random_state=545510477)
    acc = []
    auc = []
    for train, test in kf:
        Y_pred = models.logistic_regression_pred(X[train], Y[train], X[test])
        acc.append(accuracy_score(Y[test], Y_pred))
        auc.append(roc_auc_score(Y[test], Y_pred))
    return mean(acc), mean(auc)
Exemplo n.º 3
0
def get_acc_auc_randomisedCV(X, Y, iterNo=5, test_percent=0.2):
    #TODO: First get the train indices and test indices for each iteration
    #Then train the classifier accordingly
    #Report the mean accuracy and mean auc of all the iterations
    kf = ShuffleSplit(len(Y),
                      n_iter=iterNo,
                      test_size=test_percent,
                      random_state=545510477)
    acc = []
    auc = []
    for train, test in kf:
        Y_pred = models.logistic_regression_pred(X[train], Y[train], X[test])
        acc.append(accuracy_score(Y[test], Y_pred))
        auc.append(roc_auc_score(Y[test], Y_pred))
    return mean(acc), mean(auc)
Exemplo n.º 4
0
def get_acc_auc_kfold(X, Y, k=5):
    # TODO:First get the train indices and test indices for each iteration
    # Then train the classifier accordingly
    # Report the mean accuracy and mean auc of all the folds
    accuracy_arr = []
    auc_arr = []

    kfold = KFold(n=X.get_shape()[0], n_folds=k, random_state=545510477)
    for train_i, test_i in kfold:
        X_train, X_test = X[train_i], X[test_i]
        Y_train, Y_test = Y[train_i], Y[test_i]
        Y_pred = models.logistic_regression_pred(X_train, Y_train, X_test)
        acc, auc_, precision, recall, f1score = models.classification_metrics(Y_pred, Y_test)
        accuracy_arr.append(acc)
        auc_arr.append(auc_)
    return sum(accuracy_arr) / len(accuracy_arr), sum(auc_arr) / len(auc_arr)
Exemplo n.º 5
0
def get_acc_auc_kfold(X, Y, k=5):
    # TODO:First get the train indices and test indices for each iteration
    # Then train the classifier accordingly
    # Report the mean accuracy and mean auc of all the folds
    accuracy_arr = []
    auc_arr = []

    kfold = KFold(n=X.get_shape()[0], n_folds=k, random_state=545510477)
    for train_i, test_i in kfold:
        X_train, X_test = X[train_i], X[test_i]
        Y_train, Y_test = Y[train_i], Y[test_i]
        Y_pred = models.logistic_regression_pred(X_train, Y_train, X_test)
        acc, auc_, precision, recall, f1score = models.classification_metrics(
            Y_pred, Y_test)
        accuracy_arr.append(acc)
        auc_arr.append(auc_)
    return sum(accuracy_arr) / len(accuracy_arr), sum(auc_arr) / len(auc_arr)
Exemplo n.º 6
0
def get_acc_auc_randomisedCV(X, Y, iterNo=5, test_percent=0.2):
    # TODO: First get the train indices and test indices for each iteration
    # Then train the classifier accordingly
    # Report the mean accuracy and mean auc of all the iterations
    accuracy_arr = []
    auc_arr = []

    shuffle_split = ShuffleSplit(n=X.get_shape()[0], n_iter=5, test_size=.2, random_state=545510477)
    for train_i, test_i in shuffle_split:
        X_train, X_test = X[train_i], X[test_i]
        Y_train, Y_test = Y[train_i], Y[test_i]
        Y_pred = models.logistic_regression_pred(X_train, Y_train, X_test)
        acc, auc_, precision, recall, f1score = models.classification_metrics(Y_pred, Y_test)
        accuracy_arr.append(acc)
        auc_arr.append(auc_)

    return sum(accuracy_arr) / len(accuracy_arr), sum(auc_arr) / len(auc_arr)
Exemplo n.º 7
0
def get_acc_auc_randomisedCV(X,Y,iterNo=5,test_percent=0.2):
	#TODO: First get the train indices and test indices for each iteration
	#Then train the classifier accordingly
	#Report the mean accuracy and mean auc of all the iterations
	a=ShuffleSplit(len(Y),iterNo,test_percent)
	acc=[]
	auc=[]
	for train_index, test_index in a:
		X_train, X_test = X[train_index], X[test_index]
		Y_train, Y_test = Y[train_index], Y[test_index]
		Y_pred=models.logistic_regression_pred(X_train, Y_train, X_test)
		acc_1=sklearn.metrics.accuracy_score(Y_test, Y_pred)
		auc_1=sklearn.metrics.roc_auc_score(Y_test, Y_pred)
		acc.append(acc_1)
		auc.append(auc_1)
	acc_mean=mean(acc)
	auc_mean=mean(auc)
	
	return acc_mean,auc_mean
Exemplo n.º 8
0
def get_acc_auc_randomisedCV(X,Y,iterNo=5,test_percent=0.2):
	
	randomizedCV = ShuffleSplit(n = len(Y) , n_iter = iterNo, test_size = test_percent)
	
	# Train logistic regression classifier on each fold train set
	lr_pred = [models.logistic_regression_pred(X[train_index] , Y[train_index], X[test_index]) for train_index,test_index in randomizedCV]
	    
       	# Use the kfold object to get the true Y values for each fold test set
       	Y_true_values = [Y[test_index] for train_index , test_index in randomizedCV]
       	
       	# Get the various classications metrics for the logistic regression models implemented on each fold
	metrics = [ models.classification_metrics(Y_pred,Y_true) for Y_pred,Y_true in zip(lr_pred , Y_true_values) ]
	
	# Calculate mean accuracy across all iterations
	mean_accuracy = mean([elt[0] for elt in metrics])
	
	# Calculate mean AUC across all iterations
	mean_auc = mean([elt[1] for elt in metrics])
	
	return mean_accuracy,mean_auc
Exemplo n.º 9
0
def get_acc_auc_randomisedCV(X, Y, iterNo=5, test_percent=0.2):
    # TODO: First get the train indices and test indices for each iteration
    # Then train the classifier accordingly
    # Report the mean accuracy and mean auc of all the iterations
    accuracy_arr = []
    auc_arr = []

    shuffle_split = ShuffleSplit(n=X.get_shape()[0],
                                 n_iter=5,
                                 test_size=.2,
                                 random_state=545510477)
    for train_i, test_i in shuffle_split:
        X_train, X_test = X[train_i], X[test_i]
        Y_train, Y_test = Y[train_i], Y[test_i]
        Y_pred = models.logistic_regression_pred(X_train, Y_train, X_test)
        acc, auc_, precision, recall, f1score = models.classification_metrics(
            Y_pred, Y_test)
        accuracy_arr.append(acc)
        auc_arr.append(auc_)

    return sum(accuracy_arr) / len(accuracy_arr), sum(auc_arr) / len(auc_arr)
Exemplo n.º 10
0
def get_acc_auc_kfold(X,Y,k=5):
	
	# Get the train indices and test indices for each k-fold iteration
	kfold = KFold(n = len(Y) , n_folds = k)
	
	
	# Train logistic regression classifier on each fold train set
	lr_pred = [models.logistic_regression_pred(X[train_index] , Y[train_index], X[test_index]) for train_index,test_index in kfold]
	    
       	# Use the kfold object to get the true Y values for each fold test set
       	Y_true_values = [Y[test_index] for train_index , test_index in kfold]
       	
       	# Get the various classications metrics for the logistic regression models implemented on each fold
	metrics = [ models.classification_metrics(Y_pred,Y_true) for Y_pred,Y_true in zip(lr_pred , Y_true_values) ]
	
	
	mean_accuracy = mean([elt[0] for elt in metrics])
	
	mean_auc = mean([elt[1] for elt in metrics])
	
	return mean_accuracy,mean_auc