Exemplo n.º 1
0
def quadratic_discriminant_analysis_with_log():
    raw_frame=thal_data()
    x=raw_frame.drop(['thal','pressure','cholestoral','heart_rate','age'],axis=1).values
    y=raw_frame['thal'].values
    x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=5)
    clf = QuadraticDiscriminantAnalysis().fit(x_train,y_train)
    global train_score
    train_score.append(clf.score(x_train,y_train))
    global test_score
    test_score.append(clf.score(x_test,y_test))
Exemplo n.º 2
0
def quadratic_discriminant_analysis_selected_feature():
    raw_frame=thal_data()
    x=raw_frame.drop(['sugar','age','cardiographic','angina','slope','thal','log_cholestoral'],axis=1).values
    y=raw_frame['thal'].values
    x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=5)
    clf = QuadraticDiscriminantAnalysis().fit(x_train,y_train)
    global train_score
    train_score.append(clf.score(x_train,y_train))
    global test_score
    test_score.append(clf.score(x_test,y_test))
Exemplo n.º 3
0
def train_l1_qda(x_train, x_test, y_train, y_test):
    clf = QuadraticDiscriminantAnalysis()
    clf.fit(x_train, y_train)

    if y_test is not None:
        print('QuadraticDiscriminantAnalysis:', clf.score(x_test, y_test))
    else:
        print('QuadraticDiscriminantAnalysis:', clf.score(x_train, y_train))
    test_res = np.reshape(clf.predict(x_train), (-1, 1))
    train_res = np.reshape(clf.predict(x_test), (-1, 1))
    return [test_res, train_res]
Exemplo n.º 4
0
def get_QDA(Xtrain, Ytrain, Xtest = None , Ytest = None, verbose = 0):
    qda = QDA()
    qda.fit(Xtrain,Ytrain)
    
    scores = np.empty((2))
    if (verbose == 1):
        scores[0] = qda.score(Xtrain,Ytrain)
        print('QDA, train: {0:.02f}% '.format(scores[0]*100))
        if (type(Xtest) != type(None)):
            scores[1] = qda.score(Xtest,Ytest)
            print('QDA, test: {0:.02f}% '.format(scores[1]*100))
    return qda
Exemplo n.º 5
0
def get_QDA(Xtrain, Ytrain, Xtest = None , Ytest = None, verbose = 0):
    qda = QDA()
    qda.fit(Xtrain,Ytrain)
    
    scores = np.empty((2))
    if (verbose == 1):
        scores[0] = qda.score(Xtrain,Ytrain)
        print('QDA, train: {0:.02f}% '.format(scores[0]*100))
        if (type(Xtest) != type(None)):
            scores[1] = qda.score(Xtest,Ytest)
            print('QDA, test: {0:.02f}% '.format(scores[1]*100))
    return qda
Exemplo n.º 6
0
def quadratic_discriminant_analysis(data,
                                    reg_param=0.0,
                                    tol=1e-4,
                                    store_covariance=True,
                                    plot=False):
    print('\n***********************************************')
    print('Quadratic Discriminant Analysis')

    # data prep
    features = list(data.columns[:-1])
    print('\nfeatures:', features)
    classes = np.unique(data['target'])
    print('classes:', classes)
    X = data.loc[:, data.columns != 'target'].values
    y = data.loc[:, data.columns == 'target'].values
    y = y.ravel()
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=test_size,
                                                        shuffle=None,
                                                        random_state=0)

    # lda model prep
    model = QuadraticDiscriminantAnalysis(reg_param=reg_param,
                                          tol=tol,
                                          store_covariance=store_covariance)

    # lda model training
    model.fit(X_train, y_train)
    score_test = model.score(X_test, y_test)
    score_train = model.score(X_train, y_train)
    covariance = model.covariance_
    means = model.means_
    results = {
        'description': 'Quadratic Discriminant Analysis',
        'model': model,
        'score_test': score_test,
        'score_train': score_train,
        'covariance': covariance,
        'means': means
    }

    # Plot results
    if plot == True:
        if len(features) == 2:
            plot_clf(data, results)
        else:
            print(
                'Plot does not work for the number of features larger than two.'
            )
    return results
Exemplo n.º 7
0
def Optimization(X,T,Val_X,Val_T):
	
	# Initialize arrays for keeping track
	train_acc, val_acc, reg_paramater, Kvalue, accMaxK = [], [], [], [], []
	
	# We first want to get the reduced dataset
	for k in range(1,51):
		
		clf_pca = PCA(n_components = k, svd_solver="full" )
		clf_pca.fit(small_Xtrain)
		reducedData = clf_pca.transform(small_Xtrain)
		reducedValData = clf_pca.transform(Xval)
		
		
		# Store accuracy values for max K accuracy
		Acc_list = []
		
		# Then try different values of regularization on that dataset
		for i in range(0,21):
			# Define the QDA classifier
			clf_qda = QuadraticDiscriminantAnalysis(reg_param=2**-i)
			# Fit the reduced data from PCA
			ignore_warnings(clf_qda.fit)(reducedData,small_Ttrain)
			
			# Compute the training and validation accuracy
			train_accuracy = clf_qda.score(reducedData, small_Ttrain)
			val_acccuracy = clf_qda.score(reducedValData,Tval)
			
			# Append to corresponding lists
			reg_paramater.append(2**-i)
			train_acc.append(train_accuracy)
			val_acc.append(val_acccuracy)
			Acc_list.append(val_acccuracy)
			Kvalue.append(k)
		
		# Store the maximum K from the reg_params 
		accMaxK.append(max(Acc_list))
			
	# Get the highest index val accuarcy
	val_index = np.argmax(val_acc)
	accMax = val_acc[val_index]
	
	print("\n\nQuestion 2(f):")
	print("--------------")
	print("The Max Accuracy, accMax is: ", accMax)
	print("The corresponding training Accuracy is: ", train_acc[val_index])
	print("The corresponding value of the regularization parameter is: ", reg_paramater[val_index])
	print("The corresponding K value is: ", Kvalue[val_index])
	
	return accMaxK
def call_function():
    try:
        # prepare data
        trainingSet = []
        testSet = []
        accuracy = 0.0
        split = 0.25
        loadDataset("/".join([DATASET_FOLDER, 'med.data']), split, trainingSet,
                    testSet)
        # generate predictions
        predictions = []
        trainData = np.array(trainingSet)[:,
                                          0:np.array(trainingSet).shape[1] - 1]
        columns = trainData.shape[1]
        X = np.array(trainData).astype(np.float)
        y = np.array(trainingSet)[:, columns].astype(np.float)
        clf = QDA()
        clf.fit(X, y)
        testData = np.array(testSet)[:, 0:np.array(trainingSet).shape[1] - 1]
        X_test = np.array(testData).astype(np.float)
        y_test = np.array(testSet)[:, columns].astype(np.float)
        accuracy = clf.score(X_test, y_test)
        accuracy *= 100
        print("Accuracy %:", accuracy)
    except:
        e = sys.exc_info()[0]
        print("<p>Error: %s</p>" % e)
Exemplo n.º 9
0
def gbclf_train_test(mu0, mu1, cov0, cov1, N0_train, N1_train, N0_test,
                     N1_test, str_question):

    # generate train data from 2(a) and test data from 2(f)
    X_train, t_train = gen_data(mu0, mu1, cov0, cov1, N0_train, N1_train)
    X_test, t_test = gen_data(mu0, mu1, cov0, cov1, N0_test, N1_test)

    # train sklearn QuadraticDiscriminantAnalysis
    GBclf = QuadraticDiscriminantAnalysis()
    GBclf.fit(X_train, t_train)

    # compute and print out the accuracy of your classifier
    # with the test data from q2(f)
    accuracy = GBclf.score(X_test, t_test)
    print '\tAccuracy of Gaussian Bayes clf ' + str_question + ':'
    print '\t\t' + str(accuracy)

    # plot the training data
    classToColor = np.array(['r', 'b'])
    plt.scatter(X_train[:, 0], X_train[:, 1], color=classToColor[t_train], s=2)

    # plot the decision boundary using dfContour
    dfContour(GBclf)
    # plt.xlim(-3, 6); plt.ylim(-3, 6);
    plt.title('Question ' + str_question + ': Decision boundary and contours')
    plt.show()
Exemplo n.º 10
0
def main():
    dataset = pd.read_csv("shuttle.csv", header=None).values.astype(np.int32,
                                                                    copy=False)
    data_train = dataset[0:int(len(dataset) * 0.6)]
    data_test = dataset[int(len(dataset) * 0.6) + 1:]
    x, y = np.array([]), np.array([])
    for row in dataset:
        if (row[-1] == 4 or row[-1] == 5):
            x = np.vstack(
                (x, [row[3], row[6]])) if len(x) != 0 else [row[3], row[6]]
            y = np.append(y, row[-1] - 4)

#<class 'list'>: [11478, 13, 39, 2155, 809, 4, 2] =>  4, 5

    lda = LDA(solver="svd", store_covariance=True)
    splot = visualization(dataset[:, 3], dataset[:, 6], dataset[:, -1])
    splot = plot_data(lda, x, y, lda.fit(x, y).predict(x))
    plt.axis('tight')
    plt.show()

    lda = lda.fit(data_train[:, :-1], data_train[:, -1])
    lda = lda.score(data_test[:, :-1], data_test[:, -1])
    qda = QDA(store_covariances=True)
    qda = qda.fit(data_train[:, :-1], data_train[:, -1])
    qda = qda.score(data_test[:, :-1], data_test[:, -1])

    print("Linear Discriminant Analysis: ", lda)
    print("Quadratic Discriminant Analysis: ", qda)
Exemplo n.º 11
0
class QuadraticDiscriminantAnalysiscls(object):
    """docstring for ClassName"""
    def __init__(self):
        self.qda_cls = QuadraticDiscriminantAnalysis()
        self.prediction = None
        self.train_x = None
        self.train_y = None

    def train_model(self, train_x, train_y):
        try:
            self.train_x = train_x
            self.train_y = train_y
            self.qda_cls.fit(train_x, train_y)
        except:
            print(traceback.format_exc())

    def predict(self, test_x):
        try:
            self.test_x = test_x
            self.prediction = self.qda_cls.predict(test_x)
            return self.prediction
        except:
            print(traceback.format_exc())

    def accuracy_score(self, test_y):
        try:
            # return r2_score(test_y, self.prediction)
            return self.qda_cls.score(self.test_x, test_y)
        except:
            print(traceback.format_exc())
def QDA(X_train, y_train, X_test, y_test, weights={0: 1, 1: 1}, folder = "bush_models"):

    qda = QuadraticDiscriminantAnalysis()
    qda = qda.fit(X_train, y_train)

    joblib.dump(qda, folder+'/qda.joblib')

    print(qda.score(X_test, y_test))
Exemplo n.º 13
0
def QDL(X_train, y_train, X_test, y_test):
	X_train=np.array(X_train)
	y_train=np.array(y_train)
	clf = QuadraticDiscriminantAnalysis(priors=[0.04989,0.51198,0.25267,0.136,0.049])
	clf.fit(X_train, y_train)
	accuracy=clf.score(np.array(X_test), np.array(y_test), sample_weight=None)
	print accuracy
	return clf
Exemplo n.º 14
0
class FaceClassifier():
    def __init__(self, classifier=FaceClassifierModels.DEFAULT):
        self._clf = None
        if classifier.value == FaceClassifierModels.LINEAR_SVM.value:
            self._clf = SVC(C=1.0, kernel="linear", probability=True)
        elif classifier.value == FaceClassifierModels.NAIVE_BAYES.value:
            self._clf = GaussianNB()
        elif classifier.value == FaceClassifierModels.RBF_SVM.value:
            pipe_svc = make_pipeline(StandardScaler(),
                                     SVC(random_state=1, probability=True))

            param_range = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]
            param_grid = [{
                'svc__C': param_range,
                'svc__kernel': ['linear']
            }, {
                'svc__C': param_range,
                'svc__gamma': param_range,
                'svc__kernel': ['rbf']
            }]
            self._clf = GridSearchCV(estimator=pipe_svc,
                                     param_grid=param_grid,
                                     scoring='accuracy',
                                     cv=5,
                                     n_jobs=-1)

            # self._clf = SVC(C=1, kernel='rbf', probability=True, gamma=2)
        elif classifier.value == FaceClassifierModels.NEAREST_NEIGHBORS.value:
            self._clf = KNeighborsClassifier(1)
        elif classifier.value == FaceClassifierModels.DECISION_TREE.value:
            self._clf = DecisionTreeClassifier(max_depth=5)
        elif classifier.value == FaceClassifierModels.RANDOM_FOREST.value:
            self._clf = RandomForestClassifier(max_depth=5,
                                               n_estimators=10,
                                               max_features=1)
        elif classifier.value == FaceClassifierModels.NEURAL_NET.value:
            # self._clf = MLPClassifier(alpha=1)
            self._clf = MLPClassifier(solver='lbfgs',
                                      alpha=1e-2,
                                      hidden_layer_sizes=(512, 100),
                                      random_state=1)

        elif classifier.value == FaceClassifierModels.ADABOOST.value:
            self._clf = AdaBoostClassifier()
        elif classifier.value == FaceClassifierModels.QDA.value:
            self._clf = QuadraticDiscriminantAnalysis()
        # print("classifier={}".format(FaceClassifierModels(classifier)))
        print("classifier={}".format(self._clf))

    def fit(self, embeddings, labels):
        self._clf.fit(embeddings, labels)

    def predict(self, vec):
        return self._clf.predict_proba(vec)

    def score(self, X, y):
        return self._clf.score(X, y)
Exemplo n.º 15
0
def qda_predictor(x_train, y_train, x_test, y_test, give_clf = False):
	clf = QuadraticDiscriminantAnalysis()
	clf.fit(x_train, y_train)
	accuracy = clf.score(x_test, y_test)
	f1 = precision_recall_fscore_support(y_test, clf.predict(x_test), average = 'weighted')[2]
	print(precision_recall_fscore_support(y_test, clf.predict(x_test), average = 'weighted'))
	if not give_clf:
		return(accuracy, f1)
	else:
		return(clf)
Exemplo n.º 16
0
def qda(train_size=None):
    _, _, X_train, X_test, y_train, y_test = dataset()
    if train_size:
        X_train, _, y_train, _ = train_test_split(X_train,
                                                  y_train,
                                                  train_size=train_size)

    qda = QDA()
    qda.fit(X_train, y_train)
    mae(y_test, qda.predict(X_test))
    confusion_matrix(y_test, qda.predict(X_test), qda.score(X_test, y_test))
Exemplo n.º 17
0
class QDA(object):
    def __init__(self,
                 priors=None,
                 reg_param=0.,
                 store_covariance=False,
                 tol=1.0e-4):
        """
        :param priors:  分来优先级, array, 可选项, shape=[n_classes]
        :param reg_param:  float, 可选项,将协方差估计正规化
        :param store_covariance: boolean 如果为真,则计算并存储协方差矩阵到self.covariance_中
        :param tol:  使用排序评估的阈值
        """
        self.model = QuadraticDiscriminantAnalysis(
            priors=priors,
            reg_param=reg_param,
            store_covariance=store_covariance,
            tol=tol)

    def fit(self, x, y):
        self.model.fit(X=x, y=y)

    def get_params(self, deep=True):
        return self.model.get_params(deep=deep)

    def predict(self, x):
        return self.model.predict(X=x)

    def predict_log_dict(self, x):
        return self.model.predict_log_proba(X=x)

    def predict_proba(self, x):
        return self.model.predict_proba(X=x)

    def score(self, x, y, sample_weight=None):
        return self.model.score(X=x, y=y, sample_weight=sample_weight)

    def set_params(self, **params):
        self.model.set_params(**params)

    def decision_function(self, x):  # 将决策函数应用于样本数组。
        return self.model.decision_function(X=x)

    def get_attribute(self):
        covariance = self.model.covariance_  # 每个种类的协方差矩阵, list of array-like of shape (n_features, n_features)
        means = self.model.means  # 种类均值, array-like of shape (n_classes, n_features)
        priors = self.model.priors_  # 种类占比, 求和为1, array-like of shape (n_classes)
        rotations = self.model.rotations_  # n_k = min(n_features, number of elements in class k) list_array,
        # 高斯分布的旋转
        scalings = self.model.scalings_  # list_array, 每个种类k,shape[n_k]的数组,包含高斯分布的缩放,
        # 如,旋转坐标系中的方差
        classes = self.model.classes_  # array-like, shape(n_classes,), 不同种类标签

        return covariance, means, priors, rotations, scalings, classes
Exemplo n.º 18
0
def train2d(K,X,T):
	   
	# Reduce Dimensions with PCAPCA(n_components = num_components)
	pca = PCA(n_components = K, svd_solver="full" )
	pca.fit(X)
	reducedData = pca.transform(X)
	
	# Train the QDA classifier on the reduced dataset
	qda = QuadraticDiscriminantAnalysis()
	ignore_warnings(qda.fit)(reducedData,T)
	
	# Compute accuracy
	train_acc = qda.score(reducedData,T)
	
	return pca, qda, train_acc
def qda_classifier(dir_models, ticket, x, x_test, y, y_test):
    print('getting model...QuadraticDiscriminantAnalysis')
    clf = QuadraticDiscriminantAnalysis()

    print('training...')
    clf.fit(x, y)

    print('predicting...')
    predicted = clf.predict(x_test)
    print(classification_report(y_test, predicted))

    id = len(os.listdir(dir_models))

    joblib.dump(clf, dir_models + ticket + '_qda_' + str(id) + '.pkl')

    return clf.score(x_test, y_test)
Exemplo n.º 20
0
def da_classify(X_train, y_train, X_cv, y_cv, X_test, y_test):
    from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

    clf = QuadraticDiscriminantAnalysis()
    clf.fit(X_train, y_train)
    pre_y_train = clf.predict(X_train)
    pre_y_cv = clf.predict(X_cv)
    pre_y_test = clf.predict(X_test)
    print("da   train Metrics : {0}".format(PRF(y_train, pre_y_train)))
    print("da   cv Metrics : {0}".format(PRF(y_cv, pre_y_cv)))
    print("da   test Metrics : {0}".format(PRF(y_test, pre_y_test)))
    print("Test PRF : {0}".format(
        precision_recall_fscore_support(y_test, pre_y_test)))
    print('The Accuracy of ' + 'da' + ' is :', clf.score(X_test, y_test))
    print(classification_report(y_test, pre_y_test))

    return clf
class QDA(object):
    clf = None

    def __init__(self):
        print("QDA Model")
        self.clf = QuadraticDiscriminantAnalysis()

    def train(self, x, y):
        print("Training ...")
        self.clf.fit(x, y)

    def test(self, x):
        res = self.clf.predict(x)
        return res

    def get_accuracy(self, test_data, test_res):
        print("testing ...")
        acc = self.clf.score(test_data, test_res)
        return acc
Exemplo n.º 22
0
def main():
    # prepare data
    trainingSet = []
    testSet = []
    accuracy = 0.0
    split = 0.25
    loadDataset('Dataset/med.data', split, trainingSet, testSet)
    # generate predictions
    predictions = []
    trainData = np.array(trainingSet)[:, 0:np.array(trainingSet).shape[1] - 1]
    columns = trainData.shape[1]
    X = np.array(trainData).astype(np.float)
    y = np.array(trainingSet)[:, columns].astype(np.float)
    clf = QDA()
    clf.fit(X, y)
    testData = np.array(testSet)[:, 0:np.array(trainingSet).shape[1] - 1]
    X_test = np.array(testData).astype(np.float)
    y_test = np.array(testSet)[:, columns].astype(np.float)
    accuracy = clf.score(X_test, y_test)
    accuracy *= 100
    print("Accuracy %:", accuracy)
def parametric_classifications():
    logreg = LogisticRegression(
        multi_class="multinomial",
        solver="newton-cg",  # I also tried by 'saga'
        # penalty='none' # I also use this penalty.
    )
    logreg.fit(X_train, y_train)

    lda = LinearDiscriminantAnalysis()
    lda.fit(X_train, y_train)

    qda = QuadraticDiscriminantAnalysis()
    qda.fit(X_train, y_train)

    gnb = GaussianNB()
    gnb.fit(X_train, y_train)

    logreg_acc = logreg.score(X_test, y_test)
    lda_acc = lda.score(X_test, y_test)
    qda_acc = qda.score(X_test, y_test)
    gnb_acc = gnb.score(X_test, y_test)

    return logreg_acc, lda_acc, qda_acc, gnb_acc
Exemplo n.º 24
0
def discriminatePlot(X, y, cVal, titleStr=''):
    # Frederic's Robust Wrapper for discriminant analysis function.  Performs lda, qda and RF afer error checking, 
    # Generates nice plots and returns cross-validated
    # performance, stderr and base line.
    # X np array n rows x p parameters
    # y group labels n rows
    # rgb color code for each data point - should be the same for each data beloging to the same group
    # titleStr title for plots
    # returns: ldaScore, ldaScoreSE, qdaScore, qdaScoreSE, rfScore, rfScoreSE, nClasses
    
    # Global Parameters
    CVFOLDS = 10
    MINCOUNT = 10
    MINCOUNTTRAINING = 5 
    
    # Initialize Variables and clean up data
    classes, classesCount = np.unique(y, return_counts = True)  # Classes to be discriminated should be same as ldaMod.classes_
    goodIndClasses = np.array([n >= MINCOUNT for n in classesCount])
    goodInd = np.array([b in classes[goodIndClasses] for b in y])
    yGood = y[goodInd]
    XGood = X[goodInd]
    cValGood = cVal[goodInd]


    classes, classesCount = np.unique(yGood, return_counts = True) 
    nClasses = classes.size         # Number of classes or groups  

    # Do we have enough data?  
    if (nClasses < 2):
        print 'Error in ldaPLot: Insufficient classes with minimun data (%d) for discrimination analysis' % (MINCOUNT)
        return -1, -1, -1, -1 , -1, -1, -1
    cvFolds = min(min(classesCount), CVFOLDS)
    if (cvFolds < CVFOLDS):
        print 'Warning in ldaPlot: Cross-validation performed with %d folds (instead of %d)' % (cvFolds, CVFOLDS)
   
    # Data size and color values   
    nD = XGood.shape[1]                 # number of features in X
    nX = XGood.shape[0]                 # number of data points in X
    cClasses = []   # Color code for each class
    for cl in classes:
        icl = (yGood == cl).nonzero()[0][0]
        cClasses.append(np.append(cValGood[icl],1.0))
    cClasses = np.asarray(cClasses)
    myPrior = np.ones(nClasses)*(1.0/nClasses)  

    # Perform a PCA for dimensionality reduction so that the covariance matrix can be fitted.
    nDmax = int(np.fix(np.sqrt(nX/5)))
    if nDmax < nD:
        print 'Warning: Insufficient data for', nD, 'parameters. PCA projection to', nDmax, 'dimensions.' 
    nDmax = min(nD, nDmax)
    pca = PCA(n_components=nDmax)
    Xr = pca.fit_transform(XGood)
    print 'Variance explained is %.2f%%' % (sum(pca.explained_variance_ratio_)*100.0)
    
    
    # Initialise Classifiers  
    ldaMod = LDA(n_components = min(nDmax,nClasses-1), priors = myPrior, shrinkage = None, solver = 'svd') 
    qdaMod = QDA(priors = myPrior)
    rfMod = RF()   # by default assumes equal weights

        
    # Perform CVFOLDS fold cross-validation to get performance of classifiers.
    ldaScores = np.zeros(cvFolds)
    qdaScores = np.zeros(cvFolds)
    rfScores = np.zeros(cvFolds)
    skf = cross_validation.StratifiedKFold(yGood, cvFolds)
    iskf = 0
    
    for train, test in skf:
        
        # Enforce the MINCOUNT in each class for Training
        trainClasses, trainCount = np.unique(yGood[train], return_counts=True)
        goodIndClasses = np.array([n >= MINCOUNTTRAINING for n in trainCount])
        goodIndTrain = np.array([b in trainClasses[goodIndClasses] for b in yGood[train]])

        # Specity the training data set, the number of groups and priors
        yTrain = yGood[train[goodIndTrain]]
        XrTrain = Xr[train[goodIndTrain]]

        trainClasses, trainCount = np.unique(yTrain, return_counts=True) 
        ntrainClasses = trainClasses.size
        
        # Skip this cross-validation fold because of insufficient data
        if ntrainClasses < 2:
            continue
        goodInd = np.array([b in trainClasses for b in yGood[test]])    
        if (goodInd.size == 0):
            continue
           
        # Fit the data
        trainPriors = np.ones(ntrainClasses)*(1.0/ntrainClasses)
        ldaMod.priors = trainPriors
        qdaMod.priors = trainPriors
        ldaMod.fit(XrTrain, yTrain)
        qdaMod.fit(XrTrain, yTrain)        
        rfMod.fit(XrTrain, yTrain)
        

        ldaScores[iskf] = ldaMod.score(Xr[test[goodInd]], yGood[test[goodInd]])
        qdaScores[iskf] = qdaMod.score(Xr[test[goodInd]], yGood[test[goodInd]])
        rfScores[iskf] = rfMod.score(Xr[test[goodInd]], yGood[test[goodInd]])

        iskf += 1
     
    if (iskf !=  cvFolds):
        cvFolds = iskf
        ldaScores.reshape(cvFolds)
        qdaScores.reshape(cvFolds)
        rfScores.reshape(cvFolds)
      
# Refit with all the data  for the plots
        
    ldaMod.priors = myPrior
    qdaMod.priors = myPrior
    Xrr = ldaMod.fit_transform(Xr, yGood)
    # Check labels
    for a, b in zip(classes, ldaMod.classes_):
        if a != b:
            print 'Error in ldaPlot: labels do not match'
  
    # Print the coefficients of first 3 DFA 
    print 'LDA Weights:'
    print 'DFA1:', ldaMod.coef_[0,:]
    if nClasses > 2:
        print 'DFA2:', ldaMod.coef_[1,:] 
    if nClasses > 3:
        print 'DFA3:', ldaMod.coef_[2,:] 
        
    # Obtain fits in this rotated space for display purposes   
    ldaMod.fit(Xrr, yGood)    
    qdaMod.fit(Xrr, yGood)
    rfMod.fit(Xrr, yGood)
    
    XrrMean = Xrr.mean(0)
                
    # Make a mesh for plotting
    x1, x2 = np.meshgrid(np.arange(-6.0, 6.0, 0.1), np.arange(-6.0, 6.0, 0.1))
    xm1 = np.reshape(x1, -1)
    xm2 = np.reshape(x2, -1)
    nxm = np.size(xm1)
    Xm = np.zeros((nxm, Xrr.shape[1]))
    Xm[:,0] = xm1
    if Xrr.shape[1] > 1 :
        Xm[:,1] = xm2
        
    for ix in range(2,Xrr.shape[1]):
        Xm[:,ix] = np.squeeze(np.ones((nxm,1)))*XrrMean[ix]
        
    XmcLDA = np.zeros((nxm, 4))  # RGBA values for color for LDA
    XmcQDA = np.zeros((nxm, 4))  # RGBA values for color for QDA
    XmcRF = np.zeros((nxm, 4))  # RGBA values for color for RF

    
    # Predict values on mesh for plotting based on the first two DFs     
    yPredLDA = ldaMod.predict_proba(Xm) 
    yPredQDA = qdaMod.predict_proba(Xm) 
    yPredRF = rfMod.predict_proba(Xm)

    
    # Transform the predictions in color codes
    maxLDA = yPredLDA.max()
    for ix in range(nxm) :
        cWeight = yPredLDA[ix,:]                               # Prob for all classes
        cWinner = ((cWeight == cWeight.max()).astype('float')) # Winner takes all 
        # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses
        XmcLDA[ix,:] = np.dot(cWinner, cClasses)
        XmcLDA[ix,3] = cWeight.max()/maxLDA
    
    # Plot the surface of probability    
    plt.figure(facecolor='white', figsize=(10,3))
    plt.subplot(131)
    Zplot = XmcLDA.reshape(np.shape(x1)[0], np.shape(x1)[1],4)
    plt.imshow(Zplot, zorder=0, extent=[-6, 6, -6, 6], origin='lower', interpolation='none', aspect='auto')
    if nClasses > 2:
        plt.scatter(Xrr[:,0], Xrr[:,1], c=cValGood, s=40, zorder=1)
    else:
        plt.scatter(Xrr,(np.random.rand(Xrr.size)-0.5)*12.0 , c=cValGood, s=40, zorder=1) 
    plt.title('%s: LDA pC %.0f %%' % (titleStr, (ldaScores.mean()*100.0)))
    plt.axis('square')
    plt.xlim((-6, 6))
    plt.ylim((-6, 6))    
    plt.xlabel('DFA 1')
    plt.ylabel('DFA 2')

    
    # Transform the predictions in color codes
    maxQDA = yPredQDA.max()
    for ix in range(nxm) :
        cWeight = yPredQDA[ix,:]                               # Prob for all classes
        cWinner = ((cWeight == cWeight.max()).astype('float')) # Winner takes all 
        # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses
        XmcQDA[ix,:] = np.dot(cWinner, cClasses)
        XmcQDA[ix,3] = cWeight.max()/maxQDA
    
    # Plot the surface of probability    
    plt.subplot(132)
    Zplot = XmcQDA.reshape(np.shape(x1)[0], np.shape(x1)[1],4)
    plt.imshow(Zplot, zorder=0, extent=[-6, 6, -6, 6], origin='lower', interpolation='none', aspect='auto')
    if nClasses > 2:
        plt.scatter(Xrr[:,0], Xrr[:,1], c=cValGood, s=40, zorder=1)
    else:
        plt.scatter(Xrr,(np.random.rand(Xrr.size)-0.5)*12.0 , c=cValGood, s=40, zorder=1) 
    plt.title('%s: QDA pC %.0f %%' % (titleStr, (qdaScores.mean()*100.0)))
    plt.xlabel('DFA 1')
    plt.ylabel('DFA 2')
    plt.axis('square')
    plt.xlim((-6, 6))
    plt.ylim((-6, 6))
    
    
    # Transform the predictions in color codes
    maxRF = yPredRF.max()
    for ix in range(nxm) :
        cWeight = yPredRF[ix,:]           # Prob for all classes
        cWinner = ((cWeight == cWeight.max()).astype('float')) # Winner takes all 
        # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses  # Weighted colors does not work
        XmcRF[ix,:] = np.dot(cWinner, cClasses)
        XmcRF[ix,3] = cWeight.max()/maxRF
    
    # Plot the surface of probability    
    plt.subplot(133)
    Zplot = XmcRF.reshape(np.shape(x1)[0], np.shape(x1)[1],4)
    plt.imshow(Zplot, zorder=0, extent=[-6, 6, -6, 6], origin='lower', interpolation='none', aspect='auto')
    if nClasses > 2:    
        plt.scatter(Xrr[:,0], Xrr[:,1], c=cValGood, s=40, zorder=1)
    else:
        plt.scatter(Xrr,(np.random.rand(Xrr.size)-0.5)*12.0 , c=cValGood, s=40, zorder=1) 
    plt.title('%s: RF pC %.0f %%' % (titleStr, (rfScores.mean()*100.0)))
    plt.xlabel('DFA 1')
    plt.ylabel('DFA 2')
    plt.axis('square')
    plt.xlim((-6, 6))
    plt.ylim((-6, 6))
    
    plt.show()


    # Results
    ldaScore = ldaScores.mean()*100.0
    qdaScore = qdaScores.mean()*100.0
    rfScore = rfScores.mean()*100.0
    ldaScoreSE = ldaScores.std() * 100.0
    qdaScoreSE = qdaScores.std() * 100.0 
    rfScoreSE = rfScores.std() * 100.0 
    
    print ("Number of classes %d. Chance level %.2f %%") % (nClasses, 100.0/nClasses)
    print ("%s LDA: %.2f (+/- %0.2f) %%") % (titleStr, ldaScore, ldaScoreSE)
    print ("%s QDA: %.2f (+/- %0.2f) %%") % (titleStr, qdaScore, qdaScoreSE)
    print ("%s RF: %.2f (+/- %0.2f) %%") % (titleStr, rfScore, rfScoreSE)
    return ldaScore, ldaScoreSE, qdaScore, qdaScoreSE, rfScore, rfScoreSE, nClasses
Exemplo n.º 25
0
print('SVM accuracy: ', svm.score(X_test, y_test))

# Naive Bayes
nb = GaussianNB()
nb.fit(X_train, y_train)
print('NB accuracy: ', nb.score(X_test, y_test))

# Decision Tree
dt = DecisionTreeClassifier(random_state=0)
dt.fit(X_train, y_train)
print('DT accuracy: ', dt.score(X_test, y_test))

# Quadratic Discriminant
qda = QuadraticDiscriminantAnalysis()
qda.fit(X_train, y_train)
print('QDA accuracy: ', qda.score(X_test, y_test))

# MPL classifier
mpl = MLPClassifier(hidden_layer_sizes=(100, ),
                    activation='logistic',
                    max_iter=5000)
mpl.fit(X_train, y_train)
print('MPL accuracy: ', mpl.score(X_test, y_test))

# Gaussian Process
gpc = GaussianProcessClassifier()
gpc.fit(X_train, y_train)
print('GPC accuracy: ', gpc.score(X_test, y_test))

# Random Forest Classifier
rfc = RandomForestClassifier()
Exemplo n.º 26
0
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.cross_validation import train_test_split


total_score = 0
stop = 1000
for x in range(stop):
    clf = QuadraticDiscriminantAnalysis()
    data = win.getStudents()
    data_train, data_test = train_test_split(data, test_size=0.2)
    data_train_labels = [s.spec for s in data_train]
    data_test_labels = [s.spec for s in data_test]
    data_train = [s.grades for s in data_train]
    data_test = [s.grades for s in data_test]
    clf.fit(data_train, data_train_labels)
    total_score += clf.score(data_test, data_test_labels)
total_score = total_score / stop
print("all")
print(total_score)

specs = ["FK", "FM", "MN", "OE"]
for sp in specs:
    total_score = 0
    for x in range(stop):
        clf = QuadraticDiscriminantAnalysis()
        data = win.getStudents()
        data_train, data_test = train_test_split(data, test_size=0.2)
        data_train_labels = [s.spec if s.spec == sp else "NOT " + sp for s in data_train]
        data_test_labels = [s.spec if s.spec == sp else "NOT " + sp for s in data_test]
        data_train = [s.grades for s in data_train]
        data_test = [s.grades for s in data_test]
Exemplo n.º 27
0
import numpy as np
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.model_selection import train_test_split
from sklearn.externals import joblib
def removeDuplicateRows(a):
    a = np.ascontiguousarray(a)
    unique_a = np.unique(a.view([('', a.dtype)]*a.shape[1]))
    return unique_a.view(a.dtype).reshape((unique_a.shape[0], a.shape[1]))

classes = ['red','yellow','green','orange']

for index,classs in enumerate(classes):
    print (index,classs)
    if index == 0:
        data = removeDuplicateRows(np.loadtxt(classs))
        target = np.zeros(len(data))
    else:
        clsdata =  removeDuplicateRows(np.loadtxt(classs))
        data = np.append(data,clsdata,axis=0)
        target=np.append(target,np.zeros(len(clsdata))+index)
            
print (len(data), len(target))
    #print (data)\n"

X_train,X_test,y_train,y_test = train_test_split(data,target,test_size=0.4,random_state=0)    
clf = QuadraticDiscriminantAnalysis().fit(X_train,y_train)
print (clf.score(X_test,y_test))
joblib.dump(clf, 'rgbClassifier.pkl') 
Exemplo n.º 28
0
 def perform_QuadraticDiscriminantAnalysis(self):
     QDA_clf = QuadraticDiscriminantAnalysis()
     QDA_clf.fit(self.data_train, self.labels_train)
     self.QuadraticDiscriminantAnalysis_result =  {"parameters":QDA_clf.get_params(),"labels_test_data":QDA_clf.predict(self.data_test),"score":QDA_clf.score(self.data_test,self.labels_test)}
     
     print_dict(self.QuadraticDiscriminantAnalysis_result)
     print("f1_score:")
     print(f1_score(self.labels_test, self.QuadraticDiscriminantAnalysis_result["labels_test_data"], average='macro') )
Exemplo n.º 29
0
# ## (e) - Performing QDA

from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
#
# Define model
qda_clf = QuadraticDiscriminantAnalysis()
#
# Fit model
qda_clf.fit(X_train, y_train)
#
# Predict y_train
y_hat = qda_clf.predict(X_test)
#
# Calculate test error
test_error = 1 - qda_clf.score(X_test, y_test)
#
# Compute confution matrix
c_mtx = confusion_matrix(y_test, y_hat)
c_mtx, test_error

# The test error of the QDA model is $11.06$% with the particular training and testing partitions that were generated in step (c).

# ## (f) - Performing Logistic Regression

from sklearn.linear_model import LogisticRegression
#
# Define model
lr_clf = LogisticRegression()
#
# Fit model
Exemplo n.º 30
0
def QuadDA(X_train, y_train, X_test, y_test):
    clf = QDA()
    clf.fit(X_train, y_train)
    accuracy = clf.score(X_test, y_test)
    return accuracy
Exemplo n.º 31
0
bl2d.boundaries(clf)
plt.title("Question 2(b): decision boundaries for logistic regression")
plt.show()
plt.close()

######################
##### PART 2(c)  #####
######################

print("\n\nQuestion 2(c):")
print("--------------")

# Gaussian Discriminant Analysis, Method 1 for calculating accuracy.
clf = QuadraticDiscriminantAnalysis(store_covariance=True)
clf.fit(Xtrain, Ttrain)
accuracy1 = clf.score(Xtest, Ttest)


# Method 2
def accuracyQDA(clf, X, T):
    """
	Compute and return the accuracy of Quadratic Discriminant Analysis classifier

	Parameters
	----------
	clf : QDA Classifier.
	X : Training Data.
	T : True Labels.

	Returns
	-------
Exemplo n.º 32
0
lda2 = lda2.fit(X2, y2)
print('LDA2 accuracy')
print(lda2.score(X2, y2))

#data set 3
lda3 = LDA(n_components=2)
lda3 = lda3.fit(X3, y3)
print('LDA3 accuracy')
print(lda3.score(X3, y3))
##################################QDA################################

#data set 1
qda1 = QDA(tol=0.1)
qda1 = qda1.fit(X1, y1)
print('QDA1 accuracy')
print(qda1.score(X1, y1))

#data set 2
qda2 = QDA(tol=0.1)
qda2 = qda2.fit(X2, y2)
print('QDA2 accuracy')
print(qda2.score(X2, y2))

#data set 3
qda3 = QDA(tol=0.1)
qda3 = qda3.fit(X3, y3)
print('QDA3 accuracy')
print(qda3.score(X3, y3))

#6. for each trained classifier, use the test set to determine the probabilities for which each
#classifier believes the dataset belongs to class 1: P(Y=1|X=x), where x is a datapoint observation
Exemplo n.º 33
0
def discriminatePlot(X, y, cVal, titleStr='', figdir='.', Xcolname = None, plotFig = False, removeTickLabels = False, testInd = None):
    # Frederic's Robust Wrapper for discriminant analysis function.  Performs lda, qda and RF afer error checking, 
    # Generates nice plots and returns cross-validated
    # performance, stderr and base line.
    # X np array n rows x p parameters
    # y group labels n rows
    # rgb color code for each data point - should be the same for each data beloging to the same group
    # titleStr title for plots
    # figdir is a directory name (folder name) for figures
    # Xcolname is a np.array or list of strings with column names for printout display
    # returns: ldaScore, ldaScoreSE, qdaScore, qdaScoreSE, rfScore, rfScoreSE, nClasses

    # Global Parameters
    CVFOLDS = 10
    MINCOUNT = 10
    MINCOUNTTRAINING = 5
    # figdir = '/Users/frederictheunissen/Documents/Data/Julie/Acoustical Analysis/Figures Voice'

    # Initialize Variables and clean up data
    classes, classesCount = np.unique(y, return_counts = True)  # Classes to be discriminated should be same as ldaMod.classes_
    goodIndClasses = np.array([n >= MINCOUNT for n in classesCount])
    goodInd = np.array([b in classes[goodIndClasses] for b in y])
    if testInd is not None:
        # Check for goodInd - should be an np.array of dtype=bool
        # Transform testInd into an index inside xGood and yGood
        testIndx = testInd.nonzero()[0]
        goodIndx = goodInd.nonzero()[0]
        testInd = np.hstack([ np.where(goodIndx == testval)[0] for testval in testIndx])
        trainInd = np.asarray([i for i in range(len(goodIndx)) if i not in testInd])
        
    yGood = y[goodInd]
    XGood = X[goodInd]
    cValGood = cVal[goodInd]
        
    classes, classesCount = np.unique(yGood, return_counts = True) 
    nClasses = classes.size         # Number of classes or groups  

    # Do we have enough data?  
    if (nClasses < 2):
        print ('Error in ldaPLot: Insufficient classes with minimun data (%d) for discrimination analysis' % (MINCOUNT))
        return -1, -1, -1, -1 , -1, -1, -1, -1, -1
    
    if testInd is None:
        cvFolds = min(min(classesCount), CVFOLDS)
        if (cvFolds < CVFOLDS):
            print ('Warning in ldaPlot: Cross-validation performed with %d folds (instead of %d)' % (cvFolds, CVFOLDS))
    else:
        cvFolds = 1
   
    # Data size and color values   
    nD = XGood.shape[1]                 # number of features in X
    nX = XGood.shape[0]                 # number of data points in X
    cClasses = []   # Color code for each class
    for cl in classes:
        icl = (yGood == cl).nonzero()[0][0]
        cClasses.append(np.append(cValGood[icl],1.0))
    cClasses = np.asarray(cClasses)
    
    # Use a uniform prior 
    myPrior = np.ones(nClasses)*(1.0/nClasses)  

    # Perform a PCA for dimensionality reduction so that the covariance matrix can be fitted.
    nDmax = int(np.fix(np.sqrt(nX//5)))
    if nDmax < nD:
        print ('Warning: Insufficient data for', nD, 'parameters. PCA projection to', nDmax, 'dimensions.' )
    nDmax = min(nD, nDmax)
    pca = PCA(n_components=nDmax)
    Xr = pca.fit_transform(XGood)
    print ('Variance explained is %.2f%%' % (sum(pca.explained_variance_ratio_)*100.0))
    
    
    # Initialise Classifiers  
    ldaMod = LDA(n_components = min(nDmax,nClasses-1), priors = myPrior, shrinkage = None, solver = 'svd') 
    qdaMod = QDA(priors = myPrior)
    rfMod = RF()   # by default assumes equal weights

        
    # Perform CVFOLDS fold cross-validation to get performance of classifiers.
    ldaYes = 0
    qdaYes = 0
    rfYes = 0
    cvCount = 0
    
    if testInd is None:
        skf = cross_validation.StratifiedKFold(yGood, cvFolds)
    else:
        skf = [(trainInd,testInd)]
    
    for train, test in skf:
        
        # Enforce the MINCOUNT in each class for Training
        trainClasses, trainCount = np.unique(yGood[train], return_counts=True)
        goodIndClasses = np.array([n >= MINCOUNTTRAINING for n in trainCount])
        goodIndTrain = np.array([b in trainClasses[goodIndClasses] for b in yGood[train]])

        # Specity the training data set, the number of groups and priors
        yTrain = yGood[train[goodIndTrain]]
        XrTrain = Xr[train[goodIndTrain]]

        trainClasses, trainCount = np.unique(yTrain, return_counts=True) 
        ntrainClasses = trainClasses.size
        
        # Skip this cross-validation fold because of insufficient data
        if ntrainClasses < 2:
            continue
        goodInd = np.array([b in trainClasses for b in yGood[test]])    
        if (goodInd.size == 0):
            continue
           
        # Fit the data
        trainPriors = np.ones(ntrainClasses)*(1.0/ntrainClasses)
        ldaMod.priors = trainPriors
        qdaMod.priors = trainPriors
        ldaMod.fit(XrTrain, yTrain)
        qdaMod.fit(XrTrain, yTrain)        
        rfMod.fit(XrTrain, yTrain)
        
        
        ldaYes += np.around((ldaMod.score(Xr[test[goodInd]], yGood[test[goodInd]]))*goodInd.size)
        qdaYes += np.around((qdaMod.score(Xr[test[goodInd]], yGood[test[goodInd]]))*goodInd.size)
        rfYes += np.around((rfMod.score(Xr[test[goodInd]], yGood[test[goodInd]]))*goodInd.size)
        cvCount += goodInd.size


      
# Refit with all the data  for the plots
        
    ldaMod.priors = myPrior
    qdaMod.priors = myPrior
    Xrr = ldaMod.fit_transform(Xr, yGood)
    # Check labels
    for a, b in zip(classes, ldaMod.classes_):
        if a != b:
            print ('Error in ldaPlot: labels do not match')
            
# Check the within-group covariance in the rotated space 
#    covs = []
#    for group in classes:
#        Xg = Xrr[yGood == group, :]
#        covs.append(np.atleast_2d(np.cov(Xg,rowvar=False)))
#    withinCov = np.average(covs, axis=0, weights=myPrior)
  
    # Print the five largest coefficients of first 3 DFA
    MAXCOMP = 3        # Maximum number of DFA componnents
    MAXWEIGHT = 5     # Maximum number of weights printed for each componnent
    
    ncomp = min(MAXCOMP, nClasses-1)
    nweight = min(MAXWEIGHT, nD)
    
    # The scalings_ has the eigenvectors of the LDA in columns and the pca.componnents has the eigenvectors of PCA in columns
    weights = np.dot(ldaMod.scalings_[:,0:ncomp].T, pca.components_)
    
    print('LDA Weights:')
    for ic in range(ncomp):
        idmax = np.argsort(np.abs(weights[ic,:]))[::-1]
        print('DFA %d: '%ic, end = '')
        for iw in range(nweight):
            if Xcolname is None:
                colstr = 'C%d' % idmax[iw]
            else:
                colstr = Xcolname[idmax[iw]]
            print('%s %.3f; ' % (colstr, float(weights[ic, idmax[iw]]) ), end='')
        print()
        
    if plotFig:
        dimVal = 0.8    # Overall diming of background so that points can be seen
        # Obtain fits in this rotated space for display purposes   
        ldaMod.fit(Xrr, yGood)    
        qdaMod.fit(Xrr, yGood)
        rfMod.fit(Xrr, yGood)
    
        XrrMean = Xrr.mean(0)
                
        # Make a mesh for plotting
        x1, x2 = np.meshgrid(np.arange(-6.0, 6.0, 0.1), np.arange(-6.0, 6.0, 0.1))
        xm1 = np.reshape(x1, -1)
        xm2 = np.reshape(x2, -1)
        nxm = np.size(xm1)
        Xm = np.zeros((nxm, Xrr.shape[1]))
        Xm[:,0] = xm1
        if Xrr.shape[1] > 1 :
            Xm[:,1] = xm2
        
        for ix in range(2,Xrr.shape[1]):
            Xm[:,ix] = np.squeeze(np.ones((nxm,1)))*XrrMean[ix]
        
        XmcLDA = np.zeros((nxm, 4))  # RGBA values for color for LDA
        XmcQDA = np.zeros((nxm, 4))  # RGBA values for color for QDA
        XmcRF = np.zeros((nxm, 4))  # RGBA values for color for RF

    
        # Predict values on mesh for plotting based on the first two DFs     
        yPredLDA = ldaMod.predict_proba(Xm) 
        yPredQDA = qdaMod.predict_proba(Xm) 
        yPredRF = rfMod.predict_proba(Xm)

    
        # Transform the predictions in color codes
        maxLDA = yPredLDA.max()
        for ix in range(nxm) :
            cWeight = yPredLDA[ix,:]                               # Prob for all classes
            cWinner = ((cWeight == cWeight.max()).astype('float')) # Winner takes all 
            # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses
            XmcLDA[ix,:] = np.dot(cWinner*cWeight, cClasses)
            XmcLDA[ix,3] = (cWeight.max()/maxLDA)*dimVal
    
        # Plot the surface of probability    
        plt.figure(facecolor='white', figsize=(10,4))
        plt.subplot(131)
        Zplot = XmcLDA.reshape(np.shape(x1)[0], np.shape(x1)[1],4)
        plt.imshow(Zplot, zorder=0, extent=[-6, 6, -6, 6], origin='lower', interpolation='none', aspect='auto')
        if nClasses > 2:
            plt.scatter(Xrr[:,0], Xrr[:,1], c=cValGood, s=40, zorder=1)
        else:
            plt.scatter(Xrr,(np.random.rand(Xrr.size)-0.5)*12.0 , c=cValGood, s=40, zorder=1) 
        plt.title('%s: LDA %d/%d' % (titleStr, ldaYes, cvCount))
        plt.axis('square')
        plt.xlim((-6, 6))
        plt.ylim((-6, 6))    
        plt.xlabel('DFA 1')
        plt.ylabel('DFA 2')

        if removeTickLabels:
            ax = plt.gca()
        
            labels = [item.get_text() for item in ax.get_xticklabels()]
            empty_string_labels = ['']*len(labels)
            ax.set_xticklabels(empty_string_labels)
            
            labels = [item.get_text() for item in ax.get_yticklabels()]
            empty_string_labels = ['']*len(labels)
            ax.set_yticklabels(empty_string_labels)
        
    
        # Transform the predictions in color codes
        maxQDA = yPredQDA.max()
        for ix in range(nxm) :
            cWeight = yPredQDA[ix,:]                               # Prob for all classes
            cWinner = ((cWeight == cWeight.max()).astype('float')) # Winner takes all 
            # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses
            XmcQDA[ix,:] = np.dot(cWinner*cWeight, cClasses)
            XmcQDA[ix,3] = (cWeight.max()/maxQDA)*dimVal
    
        # Plot the surface of probability  

        plt.subplot(132)
        Zplot = XmcQDA.reshape(np.shape(x1)[0], np.shape(x1)[1],4)
        plt.imshow(Zplot, zorder=0, extent=[-6, 6, -6, 6], origin='lower', interpolation='none', aspect='auto')
        if nClasses > 2:
            plt.scatter(Xrr[:,0], Xrr[:,1], c=cValGood, s=40, zorder=1)
        else:
            plt.scatter(Xrr,(np.random.rand(Xrr.size)-0.5)*12.0 , c=cValGood, s=40, zorder=1) 
        plt.title('%s: QDA %d/%d' % (titleStr, qdaYes, cvCount))
        plt.xlabel('DFA 1')
        plt.ylabel('DFA 2')
        plt.axis('square')
        plt.xlim((-6, 6))
        plt.ylim((-6, 6))
           
        if removeTickLabels:
            ax = plt.gca()
            labels = [item.get_text() for item in ax.get_xticklabels()]
            empty_string_labels = ['']*len(labels)
            ax.set_xticklabels(empty_string_labels)
        
            labels = [item.get_text() for item in ax.get_yticklabels()]
            empty_string_labels = ['']*len(labels)
            ax.set_yticklabels(empty_string_labels)
   
        # Transform the predictions in color codes
        maxRF = yPredRF.max()
        for ix in range(nxm) :
            cWeight = yPredRF[ix,:]           # Prob for all classes
            cWinner = ((cWeight == cWeight.max()).astype('float')) # Winner takes all 
            # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses  # Weighted colors does not work
            XmcRF[ix,:] = np.dot(cWinner*cWeight, cClasses)
            XmcRF[ix,3] = (cWeight.max()/maxRF)*dimVal
    
    # Plot the surface of probability    
        plt.subplot(133)
        Zplot = XmcRF.reshape(np.shape(x1)[0], np.shape(x1)[1],4)
        plt.imshow(Zplot, zorder=0, extent=[-6, 6, -6, 6], origin='lower', interpolation='none', aspect='auto')
        if nClasses > 2:    
            plt.scatter(Xrr[:,0], Xrr[:,1], c=cValGood, s=40, zorder=1)
        else:
            plt.scatter(Xrr,(np.random.rand(Xrr.size)-0.5)*12.0 , c=cValGood, s=40, zorder=1) 
            
        plt.title('%s: RF %d/%d' % (titleStr, rfYes, cvCount))
        plt.xlabel('DFA 1')
        plt.ylabel('DFA 2')
        plt.axis('square')
        plt.xlim((-6, 6))
        plt.ylim((-6, 6))
        
        if removeTickLabels:
            ax = plt.gca()
                        
            labels = [item.get_text() for item in ax.get_xticklabels()]
            empty_string_labels = ['']*len(labels)
            ax.set_xticklabels(empty_string_labels)
        
            labels = [item.get_text() for item in ax.get_yticklabels()]
            empty_string_labels = ['']*len(labels)
            ax.set_yticklabels(empty_string_labels)
        
        plt.show()
        plt.savefig('%s/%s.png' % (figdir,titleStr), format='png', dpi=1000)


    # Results
    ldaYes = int(ldaYes)
    qdaYes = int(qdaYes)
    rfYes = int(rfYes)
    
    p = 1.0/nClasses
    ldaP = 0
    qdaP = 0
    rfP = 0
    
    for k in range(ldaYes, cvCount+1):
        ldaP += binom.pmf(k, cvCount, p)
        
    for k in range(qdaYes, cvCount+1):
        qdaP += binom.pmf(k, cvCount, p)
        
    for k in range(rfYes, cvCount+1):
        rfP += binom.pmf(k, cvCount, p)
        
    print ("Number of classes %d. Chance level %.2f %%" % (nClasses, 100.0/nClasses))
    print ("%s LDA: %.2f %% (%d/%d p=%.4f)" % (titleStr, 100.0*ldaYes/cvCount, ldaYes, cvCount, ldaP))
    print ("%s QDA: %.2f %% (%d/%d p=%.4f)" % (titleStr, 100.0*qdaYes/cvCount, qdaYes, cvCount, qdaP))
    print ("%s RF: %.2f %% (%d/%d p=%.4f)" % (titleStr, 100.0*rfYes/cvCount, rfYes, cvCount, rfP))
    return ldaYes, qdaYes, rfYes, cvCount, ldaP, qdaP, rfP, nClasses, weights
Exemplo n.º 34
0
def class_sf(files):

    accu_clf = []
    accu_forest = []
    accu_knn = []
    accu_lda = []
    accu_qda = []
    accu_mlp = []

    file_namesDA = [
        'sDAdelta.npy', 'sDAtheta.npy', 'sDAalpha.npy', 'sDAbeta.npy',
        'sDAlowgamma.npy'
    ]
    file_namesDAw = [
        'sDAwdelta.npy', 'sDAwtheta.npy', 'sDAwalpha.npy', 'sDAwbeta.npy',
        'sDAwlowgamma.npy'
    ]

    file_namesLA = [
        'sLAdelta.npy', 'sLAtheta.npy', 'sLAalpha.npy', 'sLAbeta.npy',
        'sLAlowgamma.npy'
    ]
    file_namesLAw = [
        'sLAwdelta.npy', 'sLAwtheta.npy', 'sLAwalpha.npy', 'sLAwbeta.npy',
        'sLAwlowgamma.npy'
    ]

    listAnest = []
    listWake = []

    if files == 'DA':
        file_names = file_namesDA
        file_names2 = file_namesDAw

    elif files == 'LA':
        file_names = file_namesLA
        file_names2 = file_namesLAw

    for i, j in zip(file_names, file_names2):

        listAnest.append(np.load(i, allow_pickle=True))
        listWake.append(np.load(j, allow_pickle=True))

    listAnest = np.concatenate(listAnest, axis=2)
    listWake = np.concatenate(listWake, axis=2)

    listAnest = listAnest.reshape((-1, listAnest.shape[2]))
    listWake = listWake.reshape((-1, listWake.shape[2]))

    X = np.concatenate((listAnest, listWake), axis=0)
    y = np.concatenate(
        (np.zeros(listAnest.shape[0]), np.ones(listWake.shape[0])))

    X = X.T

    for i in range(X.shape[0]):

        x = X[i, :]
        x = x.reshape((-1, 1))

        X_train, X_test, y_train, y_test = train_test_split(
            x, y)  #CROSS VAL --> leave p groups out

        # permutation t test --> choisir cross val

        clf = svm.SVC(gamma='auto')
        clf.fit(X_train, y_train)

        forest = RandomForestClassifier(
            criterion='entropy',
            n_estimators=10)  #pas utiliser avec signe features
        forest.fit(X_train, y_train)

        knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski')
        knn.fit(X_train, y_train)

        lda = LinearDiscriminantAnalysis()
        lda.fit(X_train, y_train)

        qda = QuadraticDiscriminantAnalysis()
        qda.fit(X_train, y_train)

        mlp = MLPClassifier()
        mlp.fit(X_train, y_train)

        accu_clf.append(clf.score(X_test, y_test))
        accu_forest.append(forest.score(X_test, y_test))
        accu_knn.append(knn.score(X_test, y_test))
        accu_lda.append(lda.score(X_test, y_test))
        accu_qda.append(qda.score(X_test, y_test))
        accu_mlp.append(mlp.score(X_test, y_test))

    return accu_clf, accu_forest, accu_knn, accu_lda, accu_qda, accu_mlp
Exemplo n.º 35
0
import numpy as np
import pandas as pd
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

train_data = pd.read_csv('./dm_data2/prob5_moons.tra')
test_data = pd.read_csv('./dm_data2/prob5_moons.tes')

train_data = np.array(train_data)
test_data = np.array(test_data)

train_x, train_y = train_data[:, 1:], train_data[:, 0].reshape(-1, 1)
test_x, test_y = test_data[:, 1:], test_data[:, 0].reshape(-1, 1)

model = QuadraticDiscriminantAnalysis()
model.fit(train_x, train_y)

print('Train Accuracy : {0}'.format(model.score(train_x, train_y)))
print('Test Accuracy : {0}'.format(model.score(test_x, test_y)))
Exemplo n.º 36
0
print('Linear Discriminant Analysis eigen')
linDisc = LinearDiscriminantAnalysis(solver='eigen')
linDisc.fit(X_train, y_train)
y_test_pred = linDisc.predict(X_test)
matrix = confusion_matrix(y_test, y_test_pred)
score = linDisc.score(X_test, y_test)
no_selection_performance.append(
    ('Linear Discriminant Analysis eigen', score, matrix))

print('Quadratic Discriminant Analysis')
quadDisc = QuadraticDiscriminantAnalysis()
quadDisc.fit(X_train, y_train)
y_test_pred = quadDisc.predict(X_test)
matrix = confusion_matrix(y_test, y_test_pred)
score = quadDisc.score(X_test, y_test)
no_selection_performance.append(
    ('Quadratic Discriminant Analysis', score, matrix))

print('Kernel Ridge Regression')
kerRid = KernelRidge(alpha=1.0)
kerRid.fit(X_train, y_train)
y_test_pred = kerRid.predict(X_test)
y_test_pred = [int(round(x)) for x in y_test_pred]
matrix = confusion_matrix(y_test, y_test_pred)
score = kerRid.score(X_test, y_test)
no_selection_performance.append(('Kernel Ridge Regression', score, matrix))

print('SVC')
svc = svm.SVC(C=1,
              class_weight=None,