def quadratic_discriminant_analysis_with_log(): raw_frame=thal_data() x=raw_frame.drop(['thal','pressure','cholestoral','heart_rate','age'],axis=1).values y=raw_frame['thal'].values x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=5) clf = QuadraticDiscriminantAnalysis().fit(x_train,y_train) global train_score train_score.append(clf.score(x_train,y_train)) global test_score test_score.append(clf.score(x_test,y_test))
def quadratic_discriminant_analysis_selected_feature(): raw_frame=thal_data() x=raw_frame.drop(['sugar','age','cardiographic','angina','slope','thal','log_cholestoral'],axis=1).values y=raw_frame['thal'].values x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=5) clf = QuadraticDiscriminantAnalysis().fit(x_train,y_train) global train_score train_score.append(clf.score(x_train,y_train)) global test_score test_score.append(clf.score(x_test,y_test))
def train_l1_qda(x_train, x_test, y_train, y_test): clf = QuadraticDiscriminantAnalysis() clf.fit(x_train, y_train) if y_test is not None: print('QuadraticDiscriminantAnalysis:', clf.score(x_test, y_test)) else: print('QuadraticDiscriminantAnalysis:', clf.score(x_train, y_train)) test_res = np.reshape(clf.predict(x_train), (-1, 1)) train_res = np.reshape(clf.predict(x_test), (-1, 1)) return [test_res, train_res]
def get_QDA(Xtrain, Ytrain, Xtest = None , Ytest = None, verbose = 0): qda = QDA() qda.fit(Xtrain,Ytrain) scores = np.empty((2)) if (verbose == 1): scores[0] = qda.score(Xtrain,Ytrain) print('QDA, train: {0:.02f}% '.format(scores[0]*100)) if (type(Xtest) != type(None)): scores[1] = qda.score(Xtest,Ytest) print('QDA, test: {0:.02f}% '.format(scores[1]*100)) return qda
def get_QDA(Xtrain, Ytrain, Xtest = None , Ytest = None, verbose = 0): qda = QDA() qda.fit(Xtrain,Ytrain) scores = np.empty((2)) if (verbose == 1): scores[0] = qda.score(Xtrain,Ytrain) print('QDA, train: {0:.02f}% '.format(scores[0]*100)) if (type(Xtest) != type(None)): scores[1] = qda.score(Xtest,Ytest) print('QDA, test: {0:.02f}% '.format(scores[1]*100)) return qda
def quadratic_discriminant_analysis(data, reg_param=0.0, tol=1e-4, store_covariance=True, plot=False): print('\n***********************************************') print('Quadratic Discriminant Analysis') # data prep features = list(data.columns[:-1]) print('\nfeatures:', features) classes = np.unique(data['target']) print('classes:', classes) X = data.loc[:, data.columns != 'target'].values y = data.loc[:, data.columns == 'target'].values y = y.ravel() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, shuffle=None, random_state=0) # lda model prep model = QuadraticDiscriminantAnalysis(reg_param=reg_param, tol=tol, store_covariance=store_covariance) # lda model training model.fit(X_train, y_train) score_test = model.score(X_test, y_test) score_train = model.score(X_train, y_train) covariance = model.covariance_ means = model.means_ results = { 'description': 'Quadratic Discriminant Analysis', 'model': model, 'score_test': score_test, 'score_train': score_train, 'covariance': covariance, 'means': means } # Plot results if plot == True: if len(features) == 2: plot_clf(data, results) else: print( 'Plot does not work for the number of features larger than two.' ) return results
def Optimization(X,T,Val_X,Val_T): # Initialize arrays for keeping track train_acc, val_acc, reg_paramater, Kvalue, accMaxK = [], [], [], [], [] # We first want to get the reduced dataset for k in range(1,51): clf_pca = PCA(n_components = k, svd_solver="full" ) clf_pca.fit(small_Xtrain) reducedData = clf_pca.transform(small_Xtrain) reducedValData = clf_pca.transform(Xval) # Store accuracy values for max K accuracy Acc_list = [] # Then try different values of regularization on that dataset for i in range(0,21): # Define the QDA classifier clf_qda = QuadraticDiscriminantAnalysis(reg_param=2**-i) # Fit the reduced data from PCA ignore_warnings(clf_qda.fit)(reducedData,small_Ttrain) # Compute the training and validation accuracy train_accuracy = clf_qda.score(reducedData, small_Ttrain) val_acccuracy = clf_qda.score(reducedValData,Tval) # Append to corresponding lists reg_paramater.append(2**-i) train_acc.append(train_accuracy) val_acc.append(val_acccuracy) Acc_list.append(val_acccuracy) Kvalue.append(k) # Store the maximum K from the reg_params accMaxK.append(max(Acc_list)) # Get the highest index val accuarcy val_index = np.argmax(val_acc) accMax = val_acc[val_index] print("\n\nQuestion 2(f):") print("--------------") print("The Max Accuracy, accMax is: ", accMax) print("The corresponding training Accuracy is: ", train_acc[val_index]) print("The corresponding value of the regularization parameter is: ", reg_paramater[val_index]) print("The corresponding K value is: ", Kvalue[val_index]) return accMaxK
def call_function(): try: # prepare data trainingSet = [] testSet = [] accuracy = 0.0 split = 0.25 loadDataset("/".join([DATASET_FOLDER, 'med.data']), split, trainingSet, testSet) # generate predictions predictions = [] trainData = np.array(trainingSet)[:, 0:np.array(trainingSet).shape[1] - 1] columns = trainData.shape[1] X = np.array(trainData).astype(np.float) y = np.array(trainingSet)[:, columns].astype(np.float) clf = QDA() clf.fit(X, y) testData = np.array(testSet)[:, 0:np.array(trainingSet).shape[1] - 1] X_test = np.array(testData).astype(np.float) y_test = np.array(testSet)[:, columns].astype(np.float) accuracy = clf.score(X_test, y_test) accuracy *= 100 print("Accuracy %:", accuracy) except: e = sys.exc_info()[0] print("<p>Error: %s</p>" % e)
def gbclf_train_test(mu0, mu1, cov0, cov1, N0_train, N1_train, N0_test, N1_test, str_question): # generate train data from 2(a) and test data from 2(f) X_train, t_train = gen_data(mu0, mu1, cov0, cov1, N0_train, N1_train) X_test, t_test = gen_data(mu0, mu1, cov0, cov1, N0_test, N1_test) # train sklearn QuadraticDiscriminantAnalysis GBclf = QuadraticDiscriminantAnalysis() GBclf.fit(X_train, t_train) # compute and print out the accuracy of your classifier # with the test data from q2(f) accuracy = GBclf.score(X_test, t_test) print '\tAccuracy of Gaussian Bayes clf ' + str_question + ':' print '\t\t' + str(accuracy) # plot the training data classToColor = np.array(['r', 'b']) plt.scatter(X_train[:, 0], X_train[:, 1], color=classToColor[t_train], s=2) # plot the decision boundary using dfContour dfContour(GBclf) # plt.xlim(-3, 6); plt.ylim(-3, 6); plt.title('Question ' + str_question + ': Decision boundary and contours') plt.show()
def main(): dataset = pd.read_csv("shuttle.csv", header=None).values.astype(np.int32, copy=False) data_train = dataset[0:int(len(dataset) * 0.6)] data_test = dataset[int(len(dataset) * 0.6) + 1:] x, y = np.array([]), np.array([]) for row in dataset: if (row[-1] == 4 or row[-1] == 5): x = np.vstack( (x, [row[3], row[6]])) if len(x) != 0 else [row[3], row[6]] y = np.append(y, row[-1] - 4) #<class 'list'>: [11478, 13, 39, 2155, 809, 4, 2] => 4, 5 lda = LDA(solver="svd", store_covariance=True) splot = visualization(dataset[:, 3], dataset[:, 6], dataset[:, -1]) splot = plot_data(lda, x, y, lda.fit(x, y).predict(x)) plt.axis('tight') plt.show() lda = lda.fit(data_train[:, :-1], data_train[:, -1]) lda = lda.score(data_test[:, :-1], data_test[:, -1]) qda = QDA(store_covariances=True) qda = qda.fit(data_train[:, :-1], data_train[:, -1]) qda = qda.score(data_test[:, :-1], data_test[:, -1]) print("Linear Discriminant Analysis: ", lda) print("Quadratic Discriminant Analysis: ", qda)
class QuadraticDiscriminantAnalysiscls(object): """docstring for ClassName""" def __init__(self): self.qda_cls = QuadraticDiscriminantAnalysis() self.prediction = None self.train_x = None self.train_y = None def train_model(self, train_x, train_y): try: self.train_x = train_x self.train_y = train_y self.qda_cls.fit(train_x, train_y) except: print(traceback.format_exc()) def predict(self, test_x): try: self.test_x = test_x self.prediction = self.qda_cls.predict(test_x) return self.prediction except: print(traceback.format_exc()) def accuracy_score(self, test_y): try: # return r2_score(test_y, self.prediction) return self.qda_cls.score(self.test_x, test_y) except: print(traceback.format_exc())
def QDA(X_train, y_train, X_test, y_test, weights={0: 1, 1: 1}, folder = "bush_models"): qda = QuadraticDiscriminantAnalysis() qda = qda.fit(X_train, y_train) joblib.dump(qda, folder+'/qda.joblib') print(qda.score(X_test, y_test))
def QDL(X_train, y_train, X_test, y_test): X_train=np.array(X_train) y_train=np.array(y_train) clf = QuadraticDiscriminantAnalysis(priors=[0.04989,0.51198,0.25267,0.136,0.049]) clf.fit(X_train, y_train) accuracy=clf.score(np.array(X_test), np.array(y_test), sample_weight=None) print accuracy return clf
class FaceClassifier(): def __init__(self, classifier=FaceClassifierModels.DEFAULT): self._clf = None if classifier.value == FaceClassifierModels.LINEAR_SVM.value: self._clf = SVC(C=1.0, kernel="linear", probability=True) elif classifier.value == FaceClassifierModels.NAIVE_BAYES.value: self._clf = GaussianNB() elif classifier.value == FaceClassifierModels.RBF_SVM.value: pipe_svc = make_pipeline(StandardScaler(), SVC(random_state=1, probability=True)) param_range = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0] param_grid = [{ 'svc__C': param_range, 'svc__kernel': ['linear'] }, { 'svc__C': param_range, 'svc__gamma': param_range, 'svc__kernel': ['rbf'] }] self._clf = GridSearchCV(estimator=pipe_svc, param_grid=param_grid, scoring='accuracy', cv=5, n_jobs=-1) # self._clf = SVC(C=1, kernel='rbf', probability=True, gamma=2) elif classifier.value == FaceClassifierModels.NEAREST_NEIGHBORS.value: self._clf = KNeighborsClassifier(1) elif classifier.value == FaceClassifierModels.DECISION_TREE.value: self._clf = DecisionTreeClassifier(max_depth=5) elif classifier.value == FaceClassifierModels.RANDOM_FOREST.value: self._clf = RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1) elif classifier.value == FaceClassifierModels.NEURAL_NET.value: # self._clf = MLPClassifier(alpha=1) self._clf = MLPClassifier(solver='lbfgs', alpha=1e-2, hidden_layer_sizes=(512, 100), random_state=1) elif classifier.value == FaceClassifierModels.ADABOOST.value: self._clf = AdaBoostClassifier() elif classifier.value == FaceClassifierModels.QDA.value: self._clf = QuadraticDiscriminantAnalysis() # print("classifier={}".format(FaceClassifierModels(classifier))) print("classifier={}".format(self._clf)) def fit(self, embeddings, labels): self._clf.fit(embeddings, labels) def predict(self, vec): return self._clf.predict_proba(vec) def score(self, X, y): return self._clf.score(X, y)
def qda_predictor(x_train, y_train, x_test, y_test, give_clf = False): clf = QuadraticDiscriminantAnalysis() clf.fit(x_train, y_train) accuracy = clf.score(x_test, y_test) f1 = precision_recall_fscore_support(y_test, clf.predict(x_test), average = 'weighted')[2] print(precision_recall_fscore_support(y_test, clf.predict(x_test), average = 'weighted')) if not give_clf: return(accuracy, f1) else: return(clf)
def qda(train_size=None): _, _, X_train, X_test, y_train, y_test = dataset() if train_size: X_train, _, y_train, _ = train_test_split(X_train, y_train, train_size=train_size) qda = QDA() qda.fit(X_train, y_train) mae(y_test, qda.predict(X_test)) confusion_matrix(y_test, qda.predict(X_test), qda.score(X_test, y_test))
class QDA(object): def __init__(self, priors=None, reg_param=0., store_covariance=False, tol=1.0e-4): """ :param priors: 分来优先级, array, 可选项, shape=[n_classes] :param reg_param: float, 可选项,将协方差估计正规化 :param store_covariance: boolean 如果为真,则计算并存储协方差矩阵到self.covariance_中 :param tol: 使用排序评估的阈值 """ self.model = QuadraticDiscriminantAnalysis( priors=priors, reg_param=reg_param, store_covariance=store_covariance, tol=tol) def fit(self, x, y): self.model.fit(X=x, y=y) def get_params(self, deep=True): return self.model.get_params(deep=deep) def predict(self, x): return self.model.predict(X=x) def predict_log_dict(self, x): return self.model.predict_log_proba(X=x) def predict_proba(self, x): return self.model.predict_proba(X=x) def score(self, x, y, sample_weight=None): return self.model.score(X=x, y=y, sample_weight=sample_weight) def set_params(self, **params): self.model.set_params(**params) def decision_function(self, x): # 将决策函数应用于样本数组。 return self.model.decision_function(X=x) def get_attribute(self): covariance = self.model.covariance_ # 每个种类的协方差矩阵, list of array-like of shape (n_features, n_features) means = self.model.means # 种类均值, array-like of shape (n_classes, n_features) priors = self.model.priors_ # 种类占比, 求和为1, array-like of shape (n_classes) rotations = self.model.rotations_ # n_k = min(n_features, number of elements in class k) list_array, # 高斯分布的旋转 scalings = self.model.scalings_ # list_array, 每个种类k,shape[n_k]的数组,包含高斯分布的缩放, # 如,旋转坐标系中的方差 classes = self.model.classes_ # array-like, shape(n_classes,), 不同种类标签 return covariance, means, priors, rotations, scalings, classes
def train2d(K,X,T): # Reduce Dimensions with PCAPCA(n_components = num_components) pca = PCA(n_components = K, svd_solver="full" ) pca.fit(X) reducedData = pca.transform(X) # Train the QDA classifier on the reduced dataset qda = QuadraticDiscriminantAnalysis() ignore_warnings(qda.fit)(reducedData,T) # Compute accuracy train_acc = qda.score(reducedData,T) return pca, qda, train_acc
def qda_classifier(dir_models, ticket, x, x_test, y, y_test): print('getting model...QuadraticDiscriminantAnalysis') clf = QuadraticDiscriminantAnalysis() print('training...') clf.fit(x, y) print('predicting...') predicted = clf.predict(x_test) print(classification_report(y_test, predicted)) id = len(os.listdir(dir_models)) joblib.dump(clf, dir_models + ticket + '_qda_' + str(id) + '.pkl') return clf.score(x_test, y_test)
def da_classify(X_train, y_train, X_cv, y_cv, X_test, y_test): from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis clf = QuadraticDiscriminantAnalysis() clf.fit(X_train, y_train) pre_y_train = clf.predict(X_train) pre_y_cv = clf.predict(X_cv) pre_y_test = clf.predict(X_test) print("da train Metrics : {0}".format(PRF(y_train, pre_y_train))) print("da cv Metrics : {0}".format(PRF(y_cv, pre_y_cv))) print("da test Metrics : {0}".format(PRF(y_test, pre_y_test))) print("Test PRF : {0}".format( precision_recall_fscore_support(y_test, pre_y_test))) print('The Accuracy of ' + 'da' + ' is :', clf.score(X_test, y_test)) print(classification_report(y_test, pre_y_test)) return clf
class QDA(object): clf = None def __init__(self): print("QDA Model") self.clf = QuadraticDiscriminantAnalysis() def train(self, x, y): print("Training ...") self.clf.fit(x, y) def test(self, x): res = self.clf.predict(x) return res def get_accuracy(self, test_data, test_res): print("testing ...") acc = self.clf.score(test_data, test_res) return acc
def main(): # prepare data trainingSet = [] testSet = [] accuracy = 0.0 split = 0.25 loadDataset('Dataset/med.data', split, trainingSet, testSet) # generate predictions predictions = [] trainData = np.array(trainingSet)[:, 0:np.array(trainingSet).shape[1] - 1] columns = trainData.shape[1] X = np.array(trainData).astype(np.float) y = np.array(trainingSet)[:, columns].astype(np.float) clf = QDA() clf.fit(X, y) testData = np.array(testSet)[:, 0:np.array(trainingSet).shape[1] - 1] X_test = np.array(testData).astype(np.float) y_test = np.array(testSet)[:, columns].astype(np.float) accuracy = clf.score(X_test, y_test) accuracy *= 100 print("Accuracy %:", accuracy)
def parametric_classifications(): logreg = LogisticRegression( multi_class="multinomial", solver="newton-cg", # I also tried by 'saga' # penalty='none' # I also use this penalty. ) logreg.fit(X_train, y_train) lda = LinearDiscriminantAnalysis() lda.fit(X_train, y_train) qda = QuadraticDiscriminantAnalysis() qda.fit(X_train, y_train) gnb = GaussianNB() gnb.fit(X_train, y_train) logreg_acc = logreg.score(X_test, y_test) lda_acc = lda.score(X_test, y_test) qda_acc = qda.score(X_test, y_test) gnb_acc = gnb.score(X_test, y_test) return logreg_acc, lda_acc, qda_acc, gnb_acc
def discriminatePlot(X, y, cVal, titleStr=''): # Frederic's Robust Wrapper for discriminant analysis function. Performs lda, qda and RF afer error checking, # Generates nice plots and returns cross-validated # performance, stderr and base line. # X np array n rows x p parameters # y group labels n rows # rgb color code for each data point - should be the same for each data beloging to the same group # titleStr title for plots # returns: ldaScore, ldaScoreSE, qdaScore, qdaScoreSE, rfScore, rfScoreSE, nClasses # Global Parameters CVFOLDS = 10 MINCOUNT = 10 MINCOUNTTRAINING = 5 # Initialize Variables and clean up data classes, classesCount = np.unique(y, return_counts = True) # Classes to be discriminated should be same as ldaMod.classes_ goodIndClasses = np.array([n >= MINCOUNT for n in classesCount]) goodInd = np.array([b in classes[goodIndClasses] for b in y]) yGood = y[goodInd] XGood = X[goodInd] cValGood = cVal[goodInd] classes, classesCount = np.unique(yGood, return_counts = True) nClasses = classes.size # Number of classes or groups # Do we have enough data? if (nClasses < 2): print 'Error in ldaPLot: Insufficient classes with minimun data (%d) for discrimination analysis' % (MINCOUNT) return -1, -1, -1, -1 , -1, -1, -1 cvFolds = min(min(classesCount), CVFOLDS) if (cvFolds < CVFOLDS): print 'Warning in ldaPlot: Cross-validation performed with %d folds (instead of %d)' % (cvFolds, CVFOLDS) # Data size and color values nD = XGood.shape[1] # number of features in X nX = XGood.shape[0] # number of data points in X cClasses = [] # Color code for each class for cl in classes: icl = (yGood == cl).nonzero()[0][0] cClasses.append(np.append(cValGood[icl],1.0)) cClasses = np.asarray(cClasses) myPrior = np.ones(nClasses)*(1.0/nClasses) # Perform a PCA for dimensionality reduction so that the covariance matrix can be fitted. nDmax = int(np.fix(np.sqrt(nX/5))) if nDmax < nD: print 'Warning: Insufficient data for', nD, 'parameters. PCA projection to', nDmax, 'dimensions.' nDmax = min(nD, nDmax) pca = PCA(n_components=nDmax) Xr = pca.fit_transform(XGood) print 'Variance explained is %.2f%%' % (sum(pca.explained_variance_ratio_)*100.0) # Initialise Classifiers ldaMod = LDA(n_components = min(nDmax,nClasses-1), priors = myPrior, shrinkage = None, solver = 'svd') qdaMod = QDA(priors = myPrior) rfMod = RF() # by default assumes equal weights # Perform CVFOLDS fold cross-validation to get performance of classifiers. ldaScores = np.zeros(cvFolds) qdaScores = np.zeros(cvFolds) rfScores = np.zeros(cvFolds) skf = cross_validation.StratifiedKFold(yGood, cvFolds) iskf = 0 for train, test in skf: # Enforce the MINCOUNT in each class for Training trainClasses, trainCount = np.unique(yGood[train], return_counts=True) goodIndClasses = np.array([n >= MINCOUNTTRAINING for n in trainCount]) goodIndTrain = np.array([b in trainClasses[goodIndClasses] for b in yGood[train]]) # Specity the training data set, the number of groups and priors yTrain = yGood[train[goodIndTrain]] XrTrain = Xr[train[goodIndTrain]] trainClasses, trainCount = np.unique(yTrain, return_counts=True) ntrainClasses = trainClasses.size # Skip this cross-validation fold because of insufficient data if ntrainClasses < 2: continue goodInd = np.array([b in trainClasses for b in yGood[test]]) if (goodInd.size == 0): continue # Fit the data trainPriors = np.ones(ntrainClasses)*(1.0/ntrainClasses) ldaMod.priors = trainPriors qdaMod.priors = trainPriors ldaMod.fit(XrTrain, yTrain) qdaMod.fit(XrTrain, yTrain) rfMod.fit(XrTrain, yTrain) ldaScores[iskf] = ldaMod.score(Xr[test[goodInd]], yGood[test[goodInd]]) qdaScores[iskf] = qdaMod.score(Xr[test[goodInd]], yGood[test[goodInd]]) rfScores[iskf] = rfMod.score(Xr[test[goodInd]], yGood[test[goodInd]]) iskf += 1 if (iskf != cvFolds): cvFolds = iskf ldaScores.reshape(cvFolds) qdaScores.reshape(cvFolds) rfScores.reshape(cvFolds) # Refit with all the data for the plots ldaMod.priors = myPrior qdaMod.priors = myPrior Xrr = ldaMod.fit_transform(Xr, yGood) # Check labels for a, b in zip(classes, ldaMod.classes_): if a != b: print 'Error in ldaPlot: labels do not match' # Print the coefficients of first 3 DFA print 'LDA Weights:' print 'DFA1:', ldaMod.coef_[0,:] if nClasses > 2: print 'DFA2:', ldaMod.coef_[1,:] if nClasses > 3: print 'DFA3:', ldaMod.coef_[2,:] # Obtain fits in this rotated space for display purposes ldaMod.fit(Xrr, yGood) qdaMod.fit(Xrr, yGood) rfMod.fit(Xrr, yGood) XrrMean = Xrr.mean(0) # Make a mesh for plotting x1, x2 = np.meshgrid(np.arange(-6.0, 6.0, 0.1), np.arange(-6.0, 6.0, 0.1)) xm1 = np.reshape(x1, -1) xm2 = np.reshape(x2, -1) nxm = np.size(xm1) Xm = np.zeros((nxm, Xrr.shape[1])) Xm[:,0] = xm1 if Xrr.shape[1] > 1 : Xm[:,1] = xm2 for ix in range(2,Xrr.shape[1]): Xm[:,ix] = np.squeeze(np.ones((nxm,1)))*XrrMean[ix] XmcLDA = np.zeros((nxm, 4)) # RGBA values for color for LDA XmcQDA = np.zeros((nxm, 4)) # RGBA values for color for QDA XmcRF = np.zeros((nxm, 4)) # RGBA values for color for RF # Predict values on mesh for plotting based on the first two DFs yPredLDA = ldaMod.predict_proba(Xm) yPredQDA = qdaMod.predict_proba(Xm) yPredRF = rfMod.predict_proba(Xm) # Transform the predictions in color codes maxLDA = yPredLDA.max() for ix in range(nxm) : cWeight = yPredLDA[ix,:] # Prob for all classes cWinner = ((cWeight == cWeight.max()).astype('float')) # Winner takes all # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses XmcLDA[ix,:] = np.dot(cWinner, cClasses) XmcLDA[ix,3] = cWeight.max()/maxLDA # Plot the surface of probability plt.figure(facecolor='white', figsize=(10,3)) plt.subplot(131) Zplot = XmcLDA.reshape(np.shape(x1)[0], np.shape(x1)[1],4) plt.imshow(Zplot, zorder=0, extent=[-6, 6, -6, 6], origin='lower', interpolation='none', aspect='auto') if nClasses > 2: plt.scatter(Xrr[:,0], Xrr[:,1], c=cValGood, s=40, zorder=1) else: plt.scatter(Xrr,(np.random.rand(Xrr.size)-0.5)*12.0 , c=cValGood, s=40, zorder=1) plt.title('%s: LDA pC %.0f %%' % (titleStr, (ldaScores.mean()*100.0))) plt.axis('square') plt.xlim((-6, 6)) plt.ylim((-6, 6)) plt.xlabel('DFA 1') plt.ylabel('DFA 2') # Transform the predictions in color codes maxQDA = yPredQDA.max() for ix in range(nxm) : cWeight = yPredQDA[ix,:] # Prob for all classes cWinner = ((cWeight == cWeight.max()).astype('float')) # Winner takes all # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses XmcQDA[ix,:] = np.dot(cWinner, cClasses) XmcQDA[ix,3] = cWeight.max()/maxQDA # Plot the surface of probability plt.subplot(132) Zplot = XmcQDA.reshape(np.shape(x1)[0], np.shape(x1)[1],4) plt.imshow(Zplot, zorder=0, extent=[-6, 6, -6, 6], origin='lower', interpolation='none', aspect='auto') if nClasses > 2: plt.scatter(Xrr[:,0], Xrr[:,1], c=cValGood, s=40, zorder=1) else: plt.scatter(Xrr,(np.random.rand(Xrr.size)-0.5)*12.0 , c=cValGood, s=40, zorder=1) plt.title('%s: QDA pC %.0f %%' % (titleStr, (qdaScores.mean()*100.0))) plt.xlabel('DFA 1') plt.ylabel('DFA 2') plt.axis('square') plt.xlim((-6, 6)) plt.ylim((-6, 6)) # Transform the predictions in color codes maxRF = yPredRF.max() for ix in range(nxm) : cWeight = yPredRF[ix,:] # Prob for all classes cWinner = ((cWeight == cWeight.max()).astype('float')) # Winner takes all # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses # Weighted colors does not work XmcRF[ix,:] = np.dot(cWinner, cClasses) XmcRF[ix,3] = cWeight.max()/maxRF # Plot the surface of probability plt.subplot(133) Zplot = XmcRF.reshape(np.shape(x1)[0], np.shape(x1)[1],4) plt.imshow(Zplot, zorder=0, extent=[-6, 6, -6, 6], origin='lower', interpolation='none', aspect='auto') if nClasses > 2: plt.scatter(Xrr[:,0], Xrr[:,1], c=cValGood, s=40, zorder=1) else: plt.scatter(Xrr,(np.random.rand(Xrr.size)-0.5)*12.0 , c=cValGood, s=40, zorder=1) plt.title('%s: RF pC %.0f %%' % (titleStr, (rfScores.mean()*100.0))) plt.xlabel('DFA 1') plt.ylabel('DFA 2') plt.axis('square') plt.xlim((-6, 6)) plt.ylim((-6, 6)) plt.show() # Results ldaScore = ldaScores.mean()*100.0 qdaScore = qdaScores.mean()*100.0 rfScore = rfScores.mean()*100.0 ldaScoreSE = ldaScores.std() * 100.0 qdaScoreSE = qdaScores.std() * 100.0 rfScoreSE = rfScores.std() * 100.0 print ("Number of classes %d. Chance level %.2f %%") % (nClasses, 100.0/nClasses) print ("%s LDA: %.2f (+/- %0.2f) %%") % (titleStr, ldaScore, ldaScoreSE) print ("%s QDA: %.2f (+/- %0.2f) %%") % (titleStr, qdaScore, qdaScoreSE) print ("%s RF: %.2f (+/- %0.2f) %%") % (titleStr, rfScore, rfScoreSE) return ldaScore, ldaScoreSE, qdaScore, qdaScoreSE, rfScore, rfScoreSE, nClasses
print('SVM accuracy: ', svm.score(X_test, y_test)) # Naive Bayes nb = GaussianNB() nb.fit(X_train, y_train) print('NB accuracy: ', nb.score(X_test, y_test)) # Decision Tree dt = DecisionTreeClassifier(random_state=0) dt.fit(X_train, y_train) print('DT accuracy: ', dt.score(X_test, y_test)) # Quadratic Discriminant qda = QuadraticDiscriminantAnalysis() qda.fit(X_train, y_train) print('QDA accuracy: ', qda.score(X_test, y_test)) # MPL classifier mpl = MLPClassifier(hidden_layer_sizes=(100, ), activation='logistic', max_iter=5000) mpl.fit(X_train, y_train) print('MPL accuracy: ', mpl.score(X_test, y_test)) # Gaussian Process gpc = GaussianProcessClassifier() gpc.fit(X_train, y_train) print('GPC accuracy: ', gpc.score(X_test, y_test)) # Random Forest Classifier rfc = RandomForestClassifier()
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis from sklearn.cross_validation import train_test_split total_score = 0 stop = 1000 for x in range(stop): clf = QuadraticDiscriminantAnalysis() data = win.getStudents() data_train, data_test = train_test_split(data, test_size=0.2) data_train_labels = [s.spec for s in data_train] data_test_labels = [s.spec for s in data_test] data_train = [s.grades for s in data_train] data_test = [s.grades for s in data_test] clf.fit(data_train, data_train_labels) total_score += clf.score(data_test, data_test_labels) total_score = total_score / stop print("all") print(total_score) specs = ["FK", "FM", "MN", "OE"] for sp in specs: total_score = 0 for x in range(stop): clf = QuadraticDiscriminantAnalysis() data = win.getStudents() data_train, data_test = train_test_split(data, test_size=0.2) data_train_labels = [s.spec if s.spec == sp else "NOT " + sp for s in data_train] data_test_labels = [s.spec if s.spec == sp else "NOT " + sp for s in data_test] data_train = [s.grades for s in data_train] data_test = [s.grades for s in data_test]
import numpy as np from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis from sklearn.model_selection import train_test_split from sklearn.externals import joblib def removeDuplicateRows(a): a = np.ascontiguousarray(a) unique_a = np.unique(a.view([('', a.dtype)]*a.shape[1])) return unique_a.view(a.dtype).reshape((unique_a.shape[0], a.shape[1])) classes = ['red','yellow','green','orange'] for index,classs in enumerate(classes): print (index,classs) if index == 0: data = removeDuplicateRows(np.loadtxt(classs)) target = np.zeros(len(data)) else: clsdata = removeDuplicateRows(np.loadtxt(classs)) data = np.append(data,clsdata,axis=0) target=np.append(target,np.zeros(len(clsdata))+index) print (len(data), len(target)) #print (data)\n" X_train,X_test,y_train,y_test = train_test_split(data,target,test_size=0.4,random_state=0) clf = QuadraticDiscriminantAnalysis().fit(X_train,y_train) print (clf.score(X_test,y_test)) joblib.dump(clf, 'rgbClassifier.pkl')
def perform_QuadraticDiscriminantAnalysis(self): QDA_clf = QuadraticDiscriminantAnalysis() QDA_clf.fit(self.data_train, self.labels_train) self.QuadraticDiscriminantAnalysis_result = {"parameters":QDA_clf.get_params(),"labels_test_data":QDA_clf.predict(self.data_test),"score":QDA_clf.score(self.data_test,self.labels_test)} print_dict(self.QuadraticDiscriminantAnalysis_result) print("f1_score:") print(f1_score(self.labels_test, self.QuadraticDiscriminantAnalysis_result["labels_test_data"], average='macro') )
# ## (e) - Performing QDA from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis # # Define model qda_clf = QuadraticDiscriminantAnalysis() # # Fit model qda_clf.fit(X_train, y_train) # # Predict y_train y_hat = qda_clf.predict(X_test) # # Calculate test error test_error = 1 - qda_clf.score(X_test, y_test) # # Compute confution matrix c_mtx = confusion_matrix(y_test, y_hat) c_mtx, test_error # The test error of the QDA model is $11.06$% with the particular training and testing partitions that were generated in step (c). # ## (f) - Performing Logistic Regression from sklearn.linear_model import LogisticRegression # # Define model lr_clf = LogisticRegression() # # Fit model
def QuadDA(X_train, y_train, X_test, y_test): clf = QDA() clf.fit(X_train, y_train) accuracy = clf.score(X_test, y_test) return accuracy
bl2d.boundaries(clf) plt.title("Question 2(b): decision boundaries for logistic regression") plt.show() plt.close() ###################### ##### PART 2(c) ##### ###################### print("\n\nQuestion 2(c):") print("--------------") # Gaussian Discriminant Analysis, Method 1 for calculating accuracy. clf = QuadraticDiscriminantAnalysis(store_covariance=True) clf.fit(Xtrain, Ttrain) accuracy1 = clf.score(Xtest, Ttest) # Method 2 def accuracyQDA(clf, X, T): """ Compute and return the accuracy of Quadratic Discriminant Analysis classifier Parameters ---------- clf : QDA Classifier. X : Training Data. T : True Labels. Returns -------
lda2 = lda2.fit(X2, y2) print('LDA2 accuracy') print(lda2.score(X2, y2)) #data set 3 lda3 = LDA(n_components=2) lda3 = lda3.fit(X3, y3) print('LDA3 accuracy') print(lda3.score(X3, y3)) ##################################QDA################################ #data set 1 qda1 = QDA(tol=0.1) qda1 = qda1.fit(X1, y1) print('QDA1 accuracy') print(qda1.score(X1, y1)) #data set 2 qda2 = QDA(tol=0.1) qda2 = qda2.fit(X2, y2) print('QDA2 accuracy') print(qda2.score(X2, y2)) #data set 3 qda3 = QDA(tol=0.1) qda3 = qda3.fit(X3, y3) print('QDA3 accuracy') print(qda3.score(X3, y3)) #6. for each trained classifier, use the test set to determine the probabilities for which each #classifier believes the dataset belongs to class 1: P(Y=1|X=x), where x is a datapoint observation
def discriminatePlot(X, y, cVal, titleStr='', figdir='.', Xcolname = None, plotFig = False, removeTickLabels = False, testInd = None): # Frederic's Robust Wrapper for discriminant analysis function. Performs lda, qda and RF afer error checking, # Generates nice plots and returns cross-validated # performance, stderr and base line. # X np array n rows x p parameters # y group labels n rows # rgb color code for each data point - should be the same for each data beloging to the same group # titleStr title for plots # figdir is a directory name (folder name) for figures # Xcolname is a np.array or list of strings with column names for printout display # returns: ldaScore, ldaScoreSE, qdaScore, qdaScoreSE, rfScore, rfScoreSE, nClasses # Global Parameters CVFOLDS = 10 MINCOUNT = 10 MINCOUNTTRAINING = 5 # figdir = '/Users/frederictheunissen/Documents/Data/Julie/Acoustical Analysis/Figures Voice' # Initialize Variables and clean up data classes, classesCount = np.unique(y, return_counts = True) # Classes to be discriminated should be same as ldaMod.classes_ goodIndClasses = np.array([n >= MINCOUNT for n in classesCount]) goodInd = np.array([b in classes[goodIndClasses] for b in y]) if testInd is not None: # Check for goodInd - should be an np.array of dtype=bool # Transform testInd into an index inside xGood and yGood testIndx = testInd.nonzero()[0] goodIndx = goodInd.nonzero()[0] testInd = np.hstack([ np.where(goodIndx == testval)[0] for testval in testIndx]) trainInd = np.asarray([i for i in range(len(goodIndx)) if i not in testInd]) yGood = y[goodInd] XGood = X[goodInd] cValGood = cVal[goodInd] classes, classesCount = np.unique(yGood, return_counts = True) nClasses = classes.size # Number of classes or groups # Do we have enough data? if (nClasses < 2): print ('Error in ldaPLot: Insufficient classes with minimun data (%d) for discrimination analysis' % (MINCOUNT)) return -1, -1, -1, -1 , -1, -1, -1, -1, -1 if testInd is None: cvFolds = min(min(classesCount), CVFOLDS) if (cvFolds < CVFOLDS): print ('Warning in ldaPlot: Cross-validation performed with %d folds (instead of %d)' % (cvFolds, CVFOLDS)) else: cvFolds = 1 # Data size and color values nD = XGood.shape[1] # number of features in X nX = XGood.shape[0] # number of data points in X cClasses = [] # Color code for each class for cl in classes: icl = (yGood == cl).nonzero()[0][0] cClasses.append(np.append(cValGood[icl],1.0)) cClasses = np.asarray(cClasses) # Use a uniform prior myPrior = np.ones(nClasses)*(1.0/nClasses) # Perform a PCA for dimensionality reduction so that the covariance matrix can be fitted. nDmax = int(np.fix(np.sqrt(nX//5))) if nDmax < nD: print ('Warning: Insufficient data for', nD, 'parameters. PCA projection to', nDmax, 'dimensions.' ) nDmax = min(nD, nDmax) pca = PCA(n_components=nDmax) Xr = pca.fit_transform(XGood) print ('Variance explained is %.2f%%' % (sum(pca.explained_variance_ratio_)*100.0)) # Initialise Classifiers ldaMod = LDA(n_components = min(nDmax,nClasses-1), priors = myPrior, shrinkage = None, solver = 'svd') qdaMod = QDA(priors = myPrior) rfMod = RF() # by default assumes equal weights # Perform CVFOLDS fold cross-validation to get performance of classifiers. ldaYes = 0 qdaYes = 0 rfYes = 0 cvCount = 0 if testInd is None: skf = cross_validation.StratifiedKFold(yGood, cvFolds) else: skf = [(trainInd,testInd)] for train, test in skf: # Enforce the MINCOUNT in each class for Training trainClasses, trainCount = np.unique(yGood[train], return_counts=True) goodIndClasses = np.array([n >= MINCOUNTTRAINING for n in trainCount]) goodIndTrain = np.array([b in trainClasses[goodIndClasses] for b in yGood[train]]) # Specity the training data set, the number of groups and priors yTrain = yGood[train[goodIndTrain]] XrTrain = Xr[train[goodIndTrain]] trainClasses, trainCount = np.unique(yTrain, return_counts=True) ntrainClasses = trainClasses.size # Skip this cross-validation fold because of insufficient data if ntrainClasses < 2: continue goodInd = np.array([b in trainClasses for b in yGood[test]]) if (goodInd.size == 0): continue # Fit the data trainPriors = np.ones(ntrainClasses)*(1.0/ntrainClasses) ldaMod.priors = trainPriors qdaMod.priors = trainPriors ldaMod.fit(XrTrain, yTrain) qdaMod.fit(XrTrain, yTrain) rfMod.fit(XrTrain, yTrain) ldaYes += np.around((ldaMod.score(Xr[test[goodInd]], yGood[test[goodInd]]))*goodInd.size) qdaYes += np.around((qdaMod.score(Xr[test[goodInd]], yGood[test[goodInd]]))*goodInd.size) rfYes += np.around((rfMod.score(Xr[test[goodInd]], yGood[test[goodInd]]))*goodInd.size) cvCount += goodInd.size # Refit with all the data for the plots ldaMod.priors = myPrior qdaMod.priors = myPrior Xrr = ldaMod.fit_transform(Xr, yGood) # Check labels for a, b in zip(classes, ldaMod.classes_): if a != b: print ('Error in ldaPlot: labels do not match') # Check the within-group covariance in the rotated space # covs = [] # for group in classes: # Xg = Xrr[yGood == group, :] # covs.append(np.atleast_2d(np.cov(Xg,rowvar=False))) # withinCov = np.average(covs, axis=0, weights=myPrior) # Print the five largest coefficients of first 3 DFA MAXCOMP = 3 # Maximum number of DFA componnents MAXWEIGHT = 5 # Maximum number of weights printed for each componnent ncomp = min(MAXCOMP, nClasses-1) nweight = min(MAXWEIGHT, nD) # The scalings_ has the eigenvectors of the LDA in columns and the pca.componnents has the eigenvectors of PCA in columns weights = np.dot(ldaMod.scalings_[:,0:ncomp].T, pca.components_) print('LDA Weights:') for ic in range(ncomp): idmax = np.argsort(np.abs(weights[ic,:]))[::-1] print('DFA %d: '%ic, end = '') for iw in range(nweight): if Xcolname is None: colstr = 'C%d' % idmax[iw] else: colstr = Xcolname[idmax[iw]] print('%s %.3f; ' % (colstr, float(weights[ic, idmax[iw]]) ), end='') print() if plotFig: dimVal = 0.8 # Overall diming of background so that points can be seen # Obtain fits in this rotated space for display purposes ldaMod.fit(Xrr, yGood) qdaMod.fit(Xrr, yGood) rfMod.fit(Xrr, yGood) XrrMean = Xrr.mean(0) # Make a mesh for plotting x1, x2 = np.meshgrid(np.arange(-6.0, 6.0, 0.1), np.arange(-6.0, 6.0, 0.1)) xm1 = np.reshape(x1, -1) xm2 = np.reshape(x2, -1) nxm = np.size(xm1) Xm = np.zeros((nxm, Xrr.shape[1])) Xm[:,0] = xm1 if Xrr.shape[1] > 1 : Xm[:,1] = xm2 for ix in range(2,Xrr.shape[1]): Xm[:,ix] = np.squeeze(np.ones((nxm,1)))*XrrMean[ix] XmcLDA = np.zeros((nxm, 4)) # RGBA values for color for LDA XmcQDA = np.zeros((nxm, 4)) # RGBA values for color for QDA XmcRF = np.zeros((nxm, 4)) # RGBA values for color for RF # Predict values on mesh for plotting based on the first two DFs yPredLDA = ldaMod.predict_proba(Xm) yPredQDA = qdaMod.predict_proba(Xm) yPredRF = rfMod.predict_proba(Xm) # Transform the predictions in color codes maxLDA = yPredLDA.max() for ix in range(nxm) : cWeight = yPredLDA[ix,:] # Prob for all classes cWinner = ((cWeight == cWeight.max()).astype('float')) # Winner takes all # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses XmcLDA[ix,:] = np.dot(cWinner*cWeight, cClasses) XmcLDA[ix,3] = (cWeight.max()/maxLDA)*dimVal # Plot the surface of probability plt.figure(facecolor='white', figsize=(10,4)) plt.subplot(131) Zplot = XmcLDA.reshape(np.shape(x1)[0], np.shape(x1)[1],4) plt.imshow(Zplot, zorder=0, extent=[-6, 6, -6, 6], origin='lower', interpolation='none', aspect='auto') if nClasses > 2: plt.scatter(Xrr[:,0], Xrr[:,1], c=cValGood, s=40, zorder=1) else: plt.scatter(Xrr,(np.random.rand(Xrr.size)-0.5)*12.0 , c=cValGood, s=40, zorder=1) plt.title('%s: LDA %d/%d' % (titleStr, ldaYes, cvCount)) plt.axis('square') plt.xlim((-6, 6)) plt.ylim((-6, 6)) plt.xlabel('DFA 1') plt.ylabel('DFA 2') if removeTickLabels: ax = plt.gca() labels = [item.get_text() for item in ax.get_xticklabels()] empty_string_labels = ['']*len(labels) ax.set_xticklabels(empty_string_labels) labels = [item.get_text() for item in ax.get_yticklabels()] empty_string_labels = ['']*len(labels) ax.set_yticklabels(empty_string_labels) # Transform the predictions in color codes maxQDA = yPredQDA.max() for ix in range(nxm) : cWeight = yPredQDA[ix,:] # Prob for all classes cWinner = ((cWeight == cWeight.max()).astype('float')) # Winner takes all # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses XmcQDA[ix,:] = np.dot(cWinner*cWeight, cClasses) XmcQDA[ix,3] = (cWeight.max()/maxQDA)*dimVal # Plot the surface of probability plt.subplot(132) Zplot = XmcQDA.reshape(np.shape(x1)[0], np.shape(x1)[1],4) plt.imshow(Zplot, zorder=0, extent=[-6, 6, -6, 6], origin='lower', interpolation='none', aspect='auto') if nClasses > 2: plt.scatter(Xrr[:,0], Xrr[:,1], c=cValGood, s=40, zorder=1) else: plt.scatter(Xrr,(np.random.rand(Xrr.size)-0.5)*12.0 , c=cValGood, s=40, zorder=1) plt.title('%s: QDA %d/%d' % (titleStr, qdaYes, cvCount)) plt.xlabel('DFA 1') plt.ylabel('DFA 2') plt.axis('square') plt.xlim((-6, 6)) plt.ylim((-6, 6)) if removeTickLabels: ax = plt.gca() labels = [item.get_text() for item in ax.get_xticklabels()] empty_string_labels = ['']*len(labels) ax.set_xticklabels(empty_string_labels) labels = [item.get_text() for item in ax.get_yticklabels()] empty_string_labels = ['']*len(labels) ax.set_yticklabels(empty_string_labels) # Transform the predictions in color codes maxRF = yPredRF.max() for ix in range(nxm) : cWeight = yPredRF[ix,:] # Prob for all classes cWinner = ((cWeight == cWeight.max()).astype('float')) # Winner takes all # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses # Weighted colors does not work XmcRF[ix,:] = np.dot(cWinner*cWeight, cClasses) XmcRF[ix,3] = (cWeight.max()/maxRF)*dimVal # Plot the surface of probability plt.subplot(133) Zplot = XmcRF.reshape(np.shape(x1)[0], np.shape(x1)[1],4) plt.imshow(Zplot, zorder=0, extent=[-6, 6, -6, 6], origin='lower', interpolation='none', aspect='auto') if nClasses > 2: plt.scatter(Xrr[:,0], Xrr[:,1], c=cValGood, s=40, zorder=1) else: plt.scatter(Xrr,(np.random.rand(Xrr.size)-0.5)*12.0 , c=cValGood, s=40, zorder=1) plt.title('%s: RF %d/%d' % (titleStr, rfYes, cvCount)) plt.xlabel('DFA 1') plt.ylabel('DFA 2') plt.axis('square') plt.xlim((-6, 6)) plt.ylim((-6, 6)) if removeTickLabels: ax = plt.gca() labels = [item.get_text() for item in ax.get_xticklabels()] empty_string_labels = ['']*len(labels) ax.set_xticklabels(empty_string_labels) labels = [item.get_text() for item in ax.get_yticklabels()] empty_string_labels = ['']*len(labels) ax.set_yticklabels(empty_string_labels) plt.show() plt.savefig('%s/%s.png' % (figdir,titleStr), format='png', dpi=1000) # Results ldaYes = int(ldaYes) qdaYes = int(qdaYes) rfYes = int(rfYes) p = 1.0/nClasses ldaP = 0 qdaP = 0 rfP = 0 for k in range(ldaYes, cvCount+1): ldaP += binom.pmf(k, cvCount, p) for k in range(qdaYes, cvCount+1): qdaP += binom.pmf(k, cvCount, p) for k in range(rfYes, cvCount+1): rfP += binom.pmf(k, cvCount, p) print ("Number of classes %d. Chance level %.2f %%" % (nClasses, 100.0/nClasses)) print ("%s LDA: %.2f %% (%d/%d p=%.4f)" % (titleStr, 100.0*ldaYes/cvCount, ldaYes, cvCount, ldaP)) print ("%s QDA: %.2f %% (%d/%d p=%.4f)" % (titleStr, 100.0*qdaYes/cvCount, qdaYes, cvCount, qdaP)) print ("%s RF: %.2f %% (%d/%d p=%.4f)" % (titleStr, 100.0*rfYes/cvCount, rfYes, cvCount, rfP)) return ldaYes, qdaYes, rfYes, cvCount, ldaP, qdaP, rfP, nClasses, weights
def class_sf(files): accu_clf = [] accu_forest = [] accu_knn = [] accu_lda = [] accu_qda = [] accu_mlp = [] file_namesDA = [ 'sDAdelta.npy', 'sDAtheta.npy', 'sDAalpha.npy', 'sDAbeta.npy', 'sDAlowgamma.npy' ] file_namesDAw = [ 'sDAwdelta.npy', 'sDAwtheta.npy', 'sDAwalpha.npy', 'sDAwbeta.npy', 'sDAwlowgamma.npy' ] file_namesLA = [ 'sLAdelta.npy', 'sLAtheta.npy', 'sLAalpha.npy', 'sLAbeta.npy', 'sLAlowgamma.npy' ] file_namesLAw = [ 'sLAwdelta.npy', 'sLAwtheta.npy', 'sLAwalpha.npy', 'sLAwbeta.npy', 'sLAwlowgamma.npy' ] listAnest = [] listWake = [] if files == 'DA': file_names = file_namesDA file_names2 = file_namesDAw elif files == 'LA': file_names = file_namesLA file_names2 = file_namesLAw for i, j in zip(file_names, file_names2): listAnest.append(np.load(i, allow_pickle=True)) listWake.append(np.load(j, allow_pickle=True)) listAnest = np.concatenate(listAnest, axis=2) listWake = np.concatenate(listWake, axis=2) listAnest = listAnest.reshape((-1, listAnest.shape[2])) listWake = listWake.reshape((-1, listWake.shape[2])) X = np.concatenate((listAnest, listWake), axis=0) y = np.concatenate( (np.zeros(listAnest.shape[0]), np.ones(listWake.shape[0]))) X = X.T for i in range(X.shape[0]): x = X[i, :] x = x.reshape((-1, 1)) X_train, X_test, y_train, y_test = train_test_split( x, y) #CROSS VAL --> leave p groups out # permutation t test --> choisir cross val clf = svm.SVC(gamma='auto') clf.fit(X_train, y_train) forest = RandomForestClassifier( criterion='entropy', n_estimators=10) #pas utiliser avec signe features forest.fit(X_train, y_train) knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski') knn.fit(X_train, y_train) lda = LinearDiscriminantAnalysis() lda.fit(X_train, y_train) qda = QuadraticDiscriminantAnalysis() qda.fit(X_train, y_train) mlp = MLPClassifier() mlp.fit(X_train, y_train) accu_clf.append(clf.score(X_test, y_test)) accu_forest.append(forest.score(X_test, y_test)) accu_knn.append(knn.score(X_test, y_test)) accu_lda.append(lda.score(X_test, y_test)) accu_qda.append(qda.score(X_test, y_test)) accu_mlp.append(mlp.score(X_test, y_test)) return accu_clf, accu_forest, accu_knn, accu_lda, accu_qda, accu_mlp
import numpy as np import pandas as pd from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis train_data = pd.read_csv('./dm_data2/prob5_moons.tra') test_data = pd.read_csv('./dm_data2/prob5_moons.tes') train_data = np.array(train_data) test_data = np.array(test_data) train_x, train_y = train_data[:, 1:], train_data[:, 0].reshape(-1, 1) test_x, test_y = test_data[:, 1:], test_data[:, 0].reshape(-1, 1) model = QuadraticDiscriminantAnalysis() model.fit(train_x, train_y) print('Train Accuracy : {0}'.format(model.score(train_x, train_y))) print('Test Accuracy : {0}'.format(model.score(test_x, test_y)))
print('Linear Discriminant Analysis eigen') linDisc = LinearDiscriminantAnalysis(solver='eigen') linDisc.fit(X_train, y_train) y_test_pred = linDisc.predict(X_test) matrix = confusion_matrix(y_test, y_test_pred) score = linDisc.score(X_test, y_test) no_selection_performance.append( ('Linear Discriminant Analysis eigen', score, matrix)) print('Quadratic Discriminant Analysis') quadDisc = QuadraticDiscriminantAnalysis() quadDisc.fit(X_train, y_train) y_test_pred = quadDisc.predict(X_test) matrix = confusion_matrix(y_test, y_test_pred) score = quadDisc.score(X_test, y_test) no_selection_performance.append( ('Quadratic Discriminant Analysis', score, matrix)) print('Kernel Ridge Regression') kerRid = KernelRidge(alpha=1.0) kerRid.fit(X_train, y_train) y_test_pred = kerRid.predict(X_test) y_test_pred = [int(round(x)) for x in y_test_pred] matrix = confusion_matrix(y_test, y_test_pred) score = kerRid.score(X_test, y_test) no_selection_performance.append(('Kernel Ridge Regression', score, matrix)) print('SVC') svc = svm.SVC(C=1, class_weight=None,