from models import train_test_split import numpy as np from matplotlib import pyplot as plt import feature_extraction from scipy.stats import norm from sklearn.metrics import precision_score data, labels, name_list = feature_extraction.raw_data(two_cat = True) trainX, testX, trainY, testY = train_test_split(data, labels, name_list) train_sigs = np.std(trainX, axis=1) test_sigs = np.std(testX, axis=1) thresh = np.linspace(np.min(train_sigs),np.max(train_sigs),50) tpr = np.zeros((thresh.shape)) fpr = np.zeros((thresh.shape)) for t in range((thresh).size) : th = thresh[t] labels_pred = (train_sigs > th) tpr[t] = np.sum(((labels_pred == 1) & (trainY == 1)))/np.sum(trainY) fpr[t] = np.sum(((labels_pred == 1) * (trainY == 0))).astype(float)/np.sum((trainY == 0).astype(int)) plt.figure() plt.plot(fpr,tpr,'-o') plt.xlabel('FPR') plt.ylabel('TPR') plt.savefig('test.png')
import numpy as np from models import RBF, SEED, train_test_split dataset = np.genfromtxt('data_points.csv', delimiter=',') x = dataset[1:, :2] y = dataset[1:, 2] y = np.expand_dims(y, -1) # row -> column vector # train 70%, test 30% x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.7, random_state=SEED) rbf = RBF(hidden_size=25, _rho=1e-5, _sigma=1) rbf.fit(x_train, y_train) print(f'Test error: {rbf.test_loss(x_test, y_test):.4f}') rbf.surface_plot(x_test)
plt.plot(fpr, tpr, '-o') plt.xlabel('false positive rate') plt.ylabel('true positive rate') plt.title('ROC: Random Forest') plt.show() cvsc = np.mean(cross_val_score(rf.clf, data, labels, cv=10)) print('Random Forest Accuracy: ' + str(sc[0])) print('Random Forest Cross Validation Score: ' + str(cvsc)) # SVM Model #======= X_train, X_test, y_train, y_test = train_test_split(data, labels, name_list, test_size=0.2) svm = models.SVM() svm.train(X_train, y_train, name_list) svm_sc, svm_prec = svm.test(X_test, y_test) # construct ROC curve for SVM svm.roc_auc(data, labels) svm.plot_margin(data, labels) svm_cvsc = cross_val_score(svm.clf, data, labels, cv=5) print('SVM Accuracy: ' + str(svm_sc)) print('SVM Precision: ' + str(svm_prec)) print('SVM Cross Validation Scores: ' + str(svm_cvsc))