def naive_bayes_with_lda():
    train, train_target, test, test_target = load_polluted_spambase()

    print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape)
    print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape)

    start = timeit.default_timer()

    lda = LDA(n_components=100)
    train = lda.fit_transform(train, train_target)
    test = lda.transform(test)

    print lda
    print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape)
    print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape)

    cf = GaussianNaiveBayes()
    cf.fit(train, train_target)
    raw_predicts = cf.predict(test)
    predict_class = cf.predict_class(raw_predicts)

    cm = confusion_matrix(test_target, predict_class)
    print "confusion matrix: TN: %s, FP: %s, FN: %s, TP: %s" % (
        cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1])
    er, acc, fpr, tpr = confusion_matrix_analysis(cm)
    print 'Error rate: %f, accuracy: %f, FPR: %f, TPR: %f' % (er, acc, fpr,
                                                              tpr)

    stop = timeit.default_timer()
    print "Total Run Time: %s secs" % (stop - start)
def naive_bayes_with_lda():
    train, train_target, test, test_target = load_polluted_spambase()

    print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape)
    print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape)

    start = timeit.default_timer()

    lda = LDA(n_components=100)
    train = lda.fit_transform(train, train_target)
    test = lda.transform(test)

    print lda
    print "Train data: %s, Train Label: %s" % (train.shape, train_target.shape)
    print "Test data: %s, Test Label: %s" % (test.shape, test_target.shape)

    cf = GaussianNaiveBayes()
    cf.fit(train, train_target)
    raw_predicts = cf.predict(test)
    predict_class = cf.predict_class(raw_predicts)

    cm = confusion_matrix(test_target, predict_class)
    print "confusion matrix: TN: %s, FP: %s, FN: %s, TP: %s" % (cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1])
    er, acc, fpr, tpr = confusion_matrix_analysis(cm)
    print "Error rate: %f, accuracy: %f, FPR: %f, TPR: %f" % (er, acc, fpr, tpr)

    stop = timeit.default_timer()
    print "Total Run Time: %s secs" % (stop - start)
示例#3
0
def runLDA(all_kmer_vectors_array,labels):
    sklearn_lda = LDA(n_components=4)
    X = np.array(all_kmer_vectors_array)
    y = np.array(labels)
    X_lda_sklearn = sklearn_lda.fit_transform(X,y)
    print(X_lda_sklearn)
    return X_lda_sklearn
示例#4
0
def lda_scikit():
    df_wine = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data', header=None)
    df_wine.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash', 
    'Alcalinity of ash', 'Magnesium', 'Total phenols', 
    'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 
    'Color intensity', 'Hue', 'OD280/OD315 of diluted wines', 'Proline']
    X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values
    X_train, X_test, y_train, y_test = \
          train_test_split(X, y, test_size=0.3, random_state=0)
    sc = StandardScaler()
    X_train_std = sc.fit_transform(X_train)
    X_test_std = sc.transform(X_test)
    
    pdb.set_trace()
    lda = LDA(n_components=3)
    X_train_lda = lda.fit_transform(X_train_std, y_train)
    lr = LogisticRegression()
    lr = lr.fit(X_train_lda, y_train)
    
    plot_decision_regions(X_train_lda, y_train, classifier=lr)
    plt.xlabel('LD 1')
    plt.ylabel('LD 2')
    plt.legend(loc='lower left')
    plt.tight_layout()
    plt.savefig(PL5 + 'lda_scikit.png', dpi=300)
    plt.close()
    
    X_test_lda = lda.transform(X_test_std)
    
    plot_decision_regions(X_test_lda, y_test, classifier=lr)
    plt.xlabel('LD 1')
    plt.ylabel('LD 2')
    plt.legend(loc='lower left')
    plt.tight_layout()
    plt.savefig(PL5 + 'lda_scikit_test.png', dpi=300)
def combine_lda_pca(X, y):
    sklearn_lda = LDA(n_components=2)
    X_lda_sklearn = sklearn_lda.fit_transform(X, y)
    sklearn_pca = sklearnPCA(n_components=2)  #PCA
    X_ldapca_sklearn = sklearn_pca.fit_transform(X_lda_sklearn)
    plot_scikit_lda(X_ldapca_sklearn,
                    title='LDA+PCA via scikit-learn',
                    mirror=(-1))
示例#6
0
 def _lda(self):
     """ performs linear discriminant analysis of the data"""
     if not self.target:
         print "target has not been set, it is required for LDA"
         return
     lda = LDA(n_components=self.n)
     self.transformed_data = lda.fit_transform(
         self.data, self.target).transform(self.data)
示例#7
0
def reduceDimensionLDA(mat, k):
	print mat.shape
	labels = mat[:, -1]
	mat = mat[:, :-1]
	lda = LDA(n_components = k)
	data = lda.fit_transform(mat, labels)
	data = addLabels(data, labels)
	print data
	return data
示例#8
0
def lda(df, samples, sample_labels, plot_name='lda_plot.png'):
    df = df.copy()
    df = df.transpose()
    df = df.ix[samples]
    df_nrm = normalize_min_max(df)
    X = df_nrm.values
    label_dict, y = encode_labels(sample_labels)
    ldas = LDA(n_components=2)
    X_lda = ldas.fit_transform(X, y)
    plot_scikit_lda(X_lda, y, label_dict, samples)
def lda_decompose(dataset, n):
    lda = LDA(n_components=n)
    reduced_features = lda.fit_transform(dataset.all.features,
                                         dataset.all.target)
    training_size = dataset.training_size
    training = Data(reduced_features[:training_size, :],
                    dataset.all.target[:training_size])
    testing = Data(reduced_features[training_size:, :],
                   dataset.all.target[training_size:])
    return DataSet(training, testing)
示例#10
0
文件: clustering.py 项目: subkar/msda
def lda(df, samples, sample_labels, plot_name='lda_plot.png'):
    df = df.copy()
    df = df.transpose()
    df = df.ix[samples]
    df_nrm = normalize_min_max(df)
    X = df_nrm.values
    label_dict, y = encode_labels(sample_labels)
    ldas = LDA(n_components=2)
    X_lda = ldas.fit_transform(X, y)
    plot_scikit_lda(X_lda, y, label_dict, samples)
示例#11
0
    def execute(self,i,j):
        # dim_red = LDA()
        # dim_red.fit_transform(self.x_train, self.y_train)
        # with open('dumped_dim_red_'+str(i)+'.pkl', 'wb') as fid:
        #     cPickle.dump(dim_red, fid)

        # x_train = dim_red.transform(self.x_train)
        # x_test = dim_red.transform(self.y_train)    
        # stat_obj = self.stat_class() # reflection bitches
        # stat_obj.train(x_train, x_test)
        # print len(x_train)
        # with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'wb') as fid:
        #     cPickle.dump(stat_obj, fid)

        kf = KFold(len(self.x_train), n_folds=self.k_cross)
        own_kappa = []
        for train_idx, test_idx in kf:
    		# print train_idx,test_idx
		# exit(0)
            x_train, x_test = self.x_train[train_idx], self.x_train[test_idx]
            y_train, y_test = self.y_train[train_idx], self.y_train[test_idx]
            dim_red = LDA()
            x_train = dim_red.fit_transform(x_train, y_train)
			
			
            # with open('dumped_dim_red_'+str(i)+'.pkl', 'wb') as fid:
            #     cPickle.dump(dim_red, fid)

            # with open('dumped_dim_red_'+str(i)+'.pkl', 'rb') as fid:
                # dim_red=cPickle.load(fid)
            x_test = dim_red.transform(x_test)
                
            # with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'rb') as fid:
            #     stat_obj=cPickle.load(fid)
            # x_train = dim_red.transform(x_train)
            # x_test = dim_red.transform(x_test)

            stat_obj = self.stat_class() # reflection bitches
            stat_obj.train(x_train,y_train)
            # with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'wb') as fid:
                # cPickle.dump(stat_obj, fid)
            # with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'rb') as fid:
                # stat_obj=cPickle.load(fid)
            y_pred = [ 0 for i in xrange(len(y_test)) ]
            for i in range(len(x_test)):
                # print len(x_test[i])
                val = int(np.round(stat_obj.predict(x_test[i])))
                if val > self.range_max: val = self.range_max
                if val < self.range_min: val = self.range_min
                y_pred[i] = [val]
            y_pred = np.matrix(y_pred)
            cohen_kappa_rating = own_wp.quadratic_weighted_kappa(y_test,y_pred,self.range_min,self.range_max)
            self.values.append(cohen_kappa_rating)
        return str(sum(self.values)/self.k_cross)
示例#12
0
def get_LDA_performance(test_df, X_std, y):
    X_test = test_df.ix[:, 'x.1':'x.10'].values
    X_std_test = StandardScaler().fit_transform(X_test)
    y_test = test_df.ix[:, 'y'].values

    lda_scores_training = []
    lda_scores_test = []

    qda_scores_training = []
    qda_scores_test = []

    knn_scores_training = []
    knn_scores_test = []

    for d in range(1, 11):
        lda = LDA(n_components=d)
        Xred_lda_training = lda.fit_transform(X_std, y)
        Xred_lda_test = lda.transform(X_std_test)

        lda_model = LDA()
        lda_model.fit(Xred_lda_training, y)

        qda_model = QDA()
        qda_model.fit(Xred_lda_training, y)

        knn_model = KNeighborsClassifier(n_neighbors=10)
        knn_model.fit(Xred_lda_training, y)

        lda_scores_training.append(1 - lda_model.score(Xred_lda_training, y))
        lda_scores_test.append(1 - lda_model.score(Xred_lda_test, y_test))

        qda_scores_training.append(1 - qda_model.score(Xred_lda_training, y))
        qda_scores_test.append(1 - qda_model.score(Xred_lda_test, y_test))

        knn_scores_training.append(1 - knn_model.score(Xred_lda_training, y))
        knn_scores_test.append(1 - knn_model.score(Xred_lda_test, y_test))

    plt.plot(range(10), lda_scores_training, 'r--', label="Train data")
    plt.plot(range(10), lda_scores_test, 'b--', label="Test data")
    plt.title("LDA vs LDA")
    plt.xlabel('k')
    plt.ylabel('Score')
    plt.show()

    plt.plot(range(10), qda_scores_training, 'r--', label="Train data")
    plt.plot(range(10), qda_scores_test, 'b--', label="Test data")
    plt.title("QDA vs LDA")
    plt.show()

    plt.plot(range(10), knn_scores_training, 'r--', label="Train data")
    plt.plot(range(10), knn_scores_test, 'b--', label="Test data")
    plt.title("KNN vs LDA")
    plt.show()
示例#13
0
class LDA(AbstractProjection):
    def __init__(self, **kw):
        super(LDA, self).__init__()
        self.lda = ScikitLDA(**kw)

    def train(self, features, labels):
        red_feats = self.lda.fit_transform(features, labels)
        self.V = np.std(red_feats, axis=0)

    def project(self, feats, whiten=True):
        lda_feats = self.lda.transform(feats)
        if whiten:
            lda_feats /= self.V
        return lda_feats
示例#14
0
    def execute(self,i,j):
        x_train= self.x_train
        y_train= self.y_train
        dim_red = LDA()
        x_train = dim_red.fit_transform(x_train, y_train)
        with open('dumped_dim_red_'+str(i)+'.pkl', 'wb') as fid:
            cPickle.dump(dim_red, fid)

        stat_obj = self.stat_class() # reflection bitches
        stat_obj.train(x_train,y_train)
        with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'wb') as fid:
            cPickle.dump(stat_obj, fid)
        
        kf = KFold(len(self.x_train), n_folds=self.k_cross)
        own_kappa = []
        for train_idx, test_idx in kf:
		# print train_idx,test_idx
		# exit(0)
            x_train, x_test = self.x_train[train_idx], self.x_train[test_idx]
            y_train, y_test = self.y_train[train_idx], self.y_train[test_idx]
            dim_red = LDA()
            x_train = dim_red.fit_transform(x_train, y_train)
            x_test = dim_red.transform(x_test)

            stat_obj = self.stat_class() # reflection bitches
            stat_obj.train(x_train,y_train)
          
            y_pred = [ 0 for i in xrange(len(y_test)) ]
            for i in range(len(x_test)):
                val = int(np.round(stat_obj.predict(x_test[i])))
                if val > self.range_max: val = self.range_max
                if val < self.range_min: val = self.range_min
                y_pred[i] = [val]
            y_pred = np.matrix(y_pred)
            cohen_kappa_rating = own_wp.quadratic_weighted_kappa(y_test,y_pred,self.range_min,self.range_max)
            self.values.append(cohen_kappa_rating)
        return sum(self.values)/self.k_cross
示例#15
0
def lda_data(X, y, n_components=2, num_data_points=-1):

    lda = LDA(n_components=n_components)

    if num_data_points > 0:
        X = X[:num_data_points, :]
        y = y[:num_data_points]

    print "Performing mapping"
    start = timeit.default_timer()
    mapped = lda.fit_transform(X, y)
    end = timeit.default_timer()
    print "Mapping completed in %f seconds" % (end - start)

    return mapped, lda
示例#16
0
def get_LDA(X_std, y):
    sklearn_lda = LDA(n_components=2)
    Xred_lda = sklearn_lda.fit_transform(X_std, y)
    cmap = plt.cm.get_cmap('Accent')
    mclasses = (1, 2, 3, 4, 5, 6, 7, 8, 9)
    mcolors = [cmap(i) for i in np.linspace(0, 1, 10)]
    plt.figure(figsize=(12, 8))

    for lab, col in zip(mclasses, mcolors):
        plt.scatter(Xred_lda[y == lab, 0],
                    Xred_lda[y == lab, 1],
                    label=lab,
                    c=col)

    plt.xlabel('LDA/Fisher Direction 1')
    plt.ylabel('LDA/Fisher Direction 2')
    leg = plt.legend(loc='upper right', fancybox=True)
    plt.show()
示例#17
0
def with_lda(X_train_std, y_train, X_test_std, y_test):
    from sklearn.lda import LDA
    lda = LDA(n_components=2)
    X_train_lda = lda.fit_transform(X_train_std, y_train)
    lr = LogisticRegression()
    lr = lr.fit(X_train_lda, y_train)
    plot_decision_regions(X_train_lda, y_train, classifier=lr)
    plot.xlabel('LD 1')
    plot.ylabel('LD 2')
    plt.legend(loc='lower left')
    plt.show()

    X_test_lda = lda.transform(X_test_std)
    plot_decision_regions(X_test_lda, y_test, classifier=lr)
    plot.xlabel('LD 1')
    plot.ylabel('LD 2')
    plt.legend(loc='lower left')
    plt.show()
示例#18
0
 def execute(self):
     kf = KFold(len(self.x_train), n_folds=self.k_cross)
     own_kappa = []
     for train_idx, test_idx in kf:
         x_train, x_test = self.x_train[train_idx], self.x_train[test_idx]
         y_train, y_test = self.y_train[train_idx], self.y_train[test_idx]
         dim_red = LDA()
         x_train = dim_red.fit_transform(x_train, y_train)
         x_test = dim_red.transform(x_test)
         stat_obj = self.stat_class() # reflection bitches
         stat_obj.train(x_train,y_train)
         y_pred = [ 0 for i in xrange(len(y_test)) ]
         for i in range(len(x_test)):
             val = int(np.round(stat_obj.predict(x_test[i])))
             if val > self.range_max: val = self.range_max
             if val < self.range_min: val = self.range_min
             y_pred[i] = [val]
         y_pred = np.matrix(y_pred)
         cohen_kappa_rating = own_wp.quadratic_weighted_kappa(y_test,y_pred,self.range_min,self.range_max)
         self.values.append(cohen_kappa_rating)
     return str(sum(self.values)/self.k_cross)
示例#19
0
from Wine import getWineData
from Util import plot_decision_regions
from sklearn.lda import LDA
from sklearn.linear_model import LogisticRegression
import numpy as np
import matplotlib.pyplot as plt

X_train_std, X_test_std, y_train, y_test = getWineData()

lda = LDA(n_components=2)
X_train_lda = lda.fit_transform(X_train_std, y_train)
lr = LogisticRegression()
lr.fit(X_train_lda, y_train)
plot_decision_regions(X_train_lda, y_train, classifier=lr)
plt.xlabel('LD 1')
plt.ylabel('LD 2')
plt.legend(loc='lower left')
plt.show()
示例#20
0
# newdata = normdata
# for i in range(5):
# 	print newdata[i]

print "data done"
print "logistic initialized"
# clf.fit(data[:,:-1], data[:,-1])
print "fitted data"
skf = StratifiedKFold(data[:,-1], n_folds=10, shuffle=True)
output =[]
finalscore = 0
counter = 0
for train, test in skf:
	counter = counter + 1
	newdata = prj.fit_transform([ normdata[i][:] for i in train ],[ data[i][-1] for i in train ])
	newtestdata = prj.transform([ normdata[i][:] for i in test ])
	clf = GradientBoostingClassifier(warm_start = True)
	clf = clf.fit(newdata, [ data[i][-1] for i in train ])
	prediction = clf.predict(newtestdata)
	# pred = []
	# for i in prediction:
	# 	if(i > 1.5):
	# 		pred.append(2)
	# 	else:
	# 		pred.append(1)
	finalscore = finalscore + score.get_score( prediction , [ data[i][-1] for i in test ])
	print "done"
# score = cross_val_score(clf, newdata[:,:], data[:,-1], cv = 5, scoring = 'get_score')
# print "in scores"
# for i in score:
示例#21
0
		except:
			pass
for i in range(len(s2.split('\n'))):
	Test.append([])
	for j in s2.split('\n')[i].split(','):
		try:
			Test[i].append(float(j))
		except:
			pass
pca=PCA(n_components=2)
x=pca.fit_transform(numpy.array(Data[0:len(Data)-1]))
y=pca.fit_transform(numpy.array(Test[0:len(Test)-1]))
regr=linear_model.LinearRegression()
regr.fit(x,1000*[0]+1000*[1])
clf=LDA(n_components=2)
z=clf.fit_transform(numpy.array(Data[0:len(Data)-1]),1000*[0]+1000*[1])
New=clf.predict(Test[0:len(Test)-1])
A=numpy.transpose(x)
B=numpy.dot(A,x)
C=numpy.dot(inv(B),A)
D=numpy.dot(C,1000*[-1]+1000*[1])
E=numpy.transpose(D)
F=numpy.dot(y,D)
outx=[]
for i in F:
	if i>=0:
		outx.append(1)
	else:
		outx.append(0)
outp=[]
for i in regr.predict(y):
def combine_lda_pca(X, y):
    sklearn_lda = LDA(n_components=2)
    X_lda_sklearn = sklearn_lda.fit_transform(X, y)
    sklearn_pca = sklearnPCA(n_components=2) #PCA
    X_ldapca_sklearn = sklearn_pca.fit_transform(X_lda_sklearn)
    plot_scikit_lda(X_ldapca_sklearn, title='LDA+PCA via scikit-learn', mirror=(-1))
示例#23
0
import pandas as pd 
from sklearn import datasets
from sklearn.decomposition import PCA 
from sklearn.lda import LDA 

# Load data 
iris = datasets.load_iris()
idata = iris.data 
itarget = iris.target
species = iris.target_names

iris_df = pd.DataFrame(idata, columns = ['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width'])
iris_df['Species'] = itarget

# PCA 
pca = PCA(n_components = 2)
reduced_idata = pca.fit_transform(idata) 

# LDA 
lda = LDA(n_components = 2)
reduced_itarget = lda.fit_transform(idata, itarget) 
示例#24
0
from sklearn.datasets import load_iris
import numpy as np

iris = load_iris()
print 'iris.data[-10:]\n', iris.data[-10:]
print 'iris.target[-10:]\n', iris.target[-10:]
print 'iris.data.shape:', iris.data.shape

from sklearn.lda import LDA

# Data dimension reduction
lda = LDA() # Default n_components setting, max C-1
lda_result1 = lda.fit_transform(iris.data, iris.target)
print 'LDA result 1:', lda_result1.shape
lda = LDA(n_components=1)
lda_result2 = lda.fit_transform(iris.data, iris.target)
print 'LDA result 2:', lda_result2.shape

# Visualization
import matplotlib.pyplot as plt
plt.subplot(1,2,1)
plt.scatter(lda_result1[iris.target==0, 0], lda_result1[iris.target==0, 1], color='r')
plt.scatter(lda_result1[iris.target==1, 0], lda_result1[iris.target==1, 1], color='g') 
plt.scatter(lda_result1[iris.target==2, 0], lda_result1[iris.target==2, 1], color='b') 
plt.title('LDA on iris (1)')

plt.subplot(1,2,2)
plt.stem(lda_result2)
plt.title('LDA on iris (2)')

plt.show()
示例#25
0
#	A cada dato se le asigna la clase y un color para diferenciarlas entre si
for lab, col in zip(mclasses,mcolors):
	plt.scatter(Xred_pca[y==lab, 0],Xred_pca[y==lab, 1],label=lab,c=col)

#	Se configuran las etiquetas
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
leg = plt.legend(loc='upper right', fancybox=True)

######## Pregunta (d) ############################################################

#	Se utiliza LDA con dos dimensiones
sklearn_lda = LDA(n_components=2)

#	Se ajusta a los datos de entrenamiento
Xred_lda = sklearn_lda.fit_transform(X_std,y)

#	Se escoge la paleta de colores
cmap = plt.cm.get_cmap('hsv')

#	Se definen las clases
mclasses=(1,2,3,4,5,6,7,8,9)
mcolors = [cmap(i) for i in np.linspace(0,1,10)]

#	Se establece el tamanno de la figura
plt.figure(figsize=(12, 8))

#	A cada dato se le asigna la clase y un color para diferenciarlas entre si
for lab, col in zip(mclasses,mcolors):
	plt.scatter(Xred_lda[y==lab, 0],Xred_lda[y==lab, 1],label=lab,c=col)
示例#26
0
文件: lda.py 项目: jugovich/CSC424
with closing(open(path.join(__file__, '..', 'data', 'X_train.pkl'), 'rb')) as pkl:
    data = cPickle.load(pkl)

with closing(open(path.join(__file__, '..', 'data', 'y_train.pkl'), 'rb')) as pkl:
    target = cPickle.load(pkl)

vectorizer = CountVectorizer(max_df=.75, min_df = 2, ngram_range=(1,2))

X=vectorizer.fit_transform(data, array(target))
transformer = TfidfTransformer()
X = transformer.fit_transform(X, array(target))

#hv = HashingVectorizer()
clf = LDA()
clf.fit_transform(X.toarray(), array(target), store_covariance=True)

for name,obj in [('saclings.txt', clf.scalings_), ('coef.txt', clf.coef_),
                 ('covariance.txt', clf.covariance_),('xbar.txt', clf.xbar_),
                 ('means.txt', clf.means_)]:
    with closing(open(path.join(opt['out_path'], name), 'wb')) as out:
        print 'saving %s' % name
        for row in obj:
            out.write(str(row)+'\r\n')

print 'priors'
print clf.priors

del X
del data
del target
#kernel PCA keeping 300 components
kpca = KernelPCA(kernel="rbf",n_components=300 , gamma=1)
X_kpca = kpca.fit_transform(X_train)
X_test = kpca.transform(X_test)
print (kpca)
print(X_kpca.shape)





#lda for dimensionality reduction. It should keep [classes-1] components.
lda = LDA()
print (lda)

X_lda = lda.fit_transform(X_kpca,y_train)
X_test = lda.transform(X_test)
print(X_lda.shape)



#kNN classification
start = int(round(time.time() * 1000))
clf = neighbors.KNeighborsClassifier(n_neighbors=5)
clf.fit(X_lda, y_train)

print (clf)

print("---------(5) Cross validation accuracy--------")
print(cross_validation.cross_val_score(clf, X_lda,y_train, cv=5))
示例#28
0
文件: predictor.py 项目: ananthbv/A3
        acc, clusters = run_clustering(X_new)
        print "average EM score after X modified with ICA", n, "components, clusters =", clusters, "silhouette score =", acc

        
if dralg == 'rp':
    #######################################################
    ######## KMeans after Sparse Random Projection ########
    #######################################################
    for n in range(1, len(df.columns) + 1):
        # create the random projection
        sp = SparseRandomProjection(n_components = n)
        X_new = sp.fit_transform(X)
        acc, clusters = run_clustering(X_new)
        print "average EM score after X modified with Random Projectsion", n, "components, clusters =", clusters, "silhouette score =", acc

        
if dralg == 'lda':
    ##################################
    ######## KMeans after LDA ########
    ##################################
    for n in range(1, len(df.columns) + 1):
        for solver in ['svd', 'eigen']:
        # create the random projection
            lda = LDA(n_components = n, solver = solver)
            X_new = lda.fit_transform(X, y)
            acc, clusters = run_clustering(X_new)
            print "average EM score after X modified with LDA", n, "components, clusters =", clusters, "silhouette score =", acc


plt.show()
示例#29
0
    def execute(self, i, j):
        global save1
        global save2
        jk = i
        # print type(jk)
        # dim_red = LDA()
        # dim_red.fit_transform(self.x_train, self.y_train)
        # with open('dumped_dim_red_'+str(i)+'.pkl', 'wb') as fid:
        #     cPickle.dump(dim_red, fid)

        # x_train = dim_red.transform(self.x_train)
        # x_test = dim_red.transform(self.y_train)
        # stat_obj = self.stat_class() # reflection bitches
        # stat_obj.train(x_train, x_test)
        # print len(x_train)
        # with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'wb') as fid:
        #     cPickle.dump(stat_obj, fid)
        # save1=None
        # save2=None
        kf = KFold(len(self.x_train), n_folds=self.k_cross)
        own_kappa = []
        for train_idx, test_idx in kf:
            # print train_idx,test_idx
            # exit(0)
            x_train, x_test = self.x_train[train_idx], self.x_train[test_idx]
            y_train, y_test = self.y_train[train_idx], self.y_train[test_idx]
            dim_red = LDA()
            x_train = dim_red.fit_transform(x_train, y_train)

            # with open('dumped_dim_red_'+str(i)+'.pkl', 'wb') as fid:
            #     cPickle.dump(dim_red, fid)

            # with open('dumped_dim_red_'+str(i)+'.pkl', 'rb') as fid:
            # dim_red=cPickle.load(fid)
            x_test = dim_red.transform(x_test)

            # with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'rb') as fid:
            #     stat_obj=cPickle.load(fid)
            # x_train = dim_red.transform(x_train)
            # x_test = dim_red.transform(x_test)

            stat_obj = self.stat_class()  # reflection bitches
            stat_obj.train(x_train, y_train)
            # with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'wb') as fid:
            # cPickle.dump(stat_obj, fid)
            # with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'rb') as fid:
            # stat_obj=cPickle.load(fid)
            y_pred = [0 for i in xrange(len(y_test))]
            if (int(jk) == 1):
                # print "test_idx"
                save1 = stat_obj
                save2 = dim_red
            for i in range(len(x_test)):
                # print len(x_test[i])
                val = int(np.round(stat_obj.predict(x_test[i])))
                if val > self.range_max: val = self.range_max
                if val < self.range_min: val = self.range_min
                y_pred[i] = [val]
            y_pred = np.matrix(y_pred)
            cohen_kappa_rating = own_wp.quadratic_weighted_kappa(
                y_test, y_pred, self.range_min, self.range_max)
            self.values.append(cohen_kappa_rating)
            # print stat_obj.predict(x_train)
            # linear_k_cross = k_fold_cross_validation(cross_valid_k,linear_regression,X_train,Y_train,range_min,range_max)
            # linesar_accuracy.append(linear_k_cross.execute(i,0))
            # logistic_k_cross = k_fold_cross_validation(cross_valid_k,logistic_regression,X_train,Y_train,range_min,range_max)
            # logistic_accuracy.append(logistic_k_cross.execute(i,1))
            # svr_k_cross = k_fold_cross_validation(cross_valid_k,support_vector_regression,X_train,Y_train,range_min,range_max)
            # svr_accuracy.append(svr_k_cross.execute(i,2))
            # svm_k_cross = k_fold_cross_validation(cross_valid_k,support_vector_machine,X_train,Y_train, range_min,range_max)
            # svm_accuracy.append(svm_k_cross.execute(i,3))
        return str(sum(self.values) / self.k_cross)
示例#30
0
importance_w, Xw_cols = zip(*sorted(zip(importance_w, Xw_cols)))

fig = plt.figure(figsize=(6, 4), dpi=80).add_subplot(111)
plt.bar(range(len(Xw_cols)), importance_w, align='center')
plt.xticks(range(len(Xw_cols)), Xw_cols, rotation='vertical')

plt.xlabel('Features')
plt.ylabel('Importance of features')
plt.title("PCA for white wine")
plt.show()

# PCA for red wine
pca_transf_r = pca.fit_transform(Xr_minmax)
importance_r = pca.explained_variance_ratio_
print importance_r
importance_r, Xr_cols = zip(*sorted(zip(importance_r, Xr_cols)))

fig = plt.figure(figsize=(6, 4), dpi=80).add_subplot(111)
plt.bar(range(len(Xr_cols)), importance_r, color='red', align='center')
plt.xticks(range(len(Xr_cols)), Xr_cols, rotation='vertical')

plt.xlabel('Features')
plt.ylabel('Importance of features')
plt.title("PCA for red wine")
plt.show()

# LDA for white wine
from sklearn.lda import LDA
lda = LDA(n_components=None)
transf_lda = lda.fit_transform(Xw_minmax, Yw)
示例#31
0
def fit_lda(df, active_features, y_col, k=2):
    lda = LDA(n_components=k)
    X = lda.fit_transform(df[active_features], df[y_col])
    return X
    plt.figure()
    plt.pcolormesh(xx, yy, Z, cmap=cmap_light)

    # Plot also the training points
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
    plt.axis('tight')
    plt.axis('off')
    plt.tight_layout()

plot_estimator(svc, X, y)
plt.show()
plt.clf()

###LDA###
sklearn_lda = LDA(n_components=2)
X_lda_sklearn = sklearn_lda.fit_transform(X, y)
print X_lda_sklearn

#4. Petal Length X Sepal Width for all 3
plt.scatter(iris.data[:, 1], iris.data[:, 2], c=iris.target)
plt.xlabel(iris.feature_names[1])
plt.ylabel(iris.feature_names[2])

plt.scatter(iris.data[0:150, 1], iris.data[0:150, 2], c=iris.target[0:150])
plt.xlabel(iris.feature_names[1])
plt.ylabel(iris.feature_names[2])

from sklearn import svm
svc = svm.SVC(kernel='linear', C=10) #The > C, the wider the margin between groups
from sklearn import datasets
X = X_lda_sklearn #reassigning to LDA
示例#33
0
class DotProduct(DP1):
    name = 'LDA'
    LDA_components = 2
    
    def __init__(self, X,Y, room, bin_size):
        assert (room[0][1]-room[0][0])%bin_size == 0
        assert (room[1][1]-room[1][0])%bin_size == 0
        self.bin_size=bin_size
        self.room=room
        self.xblen = (room[0][1]-room[0][0])/bin_size
        self.yblen = (room[1][1]-room[1][0])/bin_size
        self.bins = self.xblen*self.yblen
        self.labels = np.unique(Y)
        
        newX = np.zeros([X.shape[0],self.LDA_components+self.bins])
        newX[:,-self.bins:] = X[:,-self.bins:]
        
        self.lda = LDA(n_components=self.LDA_components)
        tmp = self.lda.fit_transform(X[:,:-self.bins],Y)
        import pdb; pdb.set_trace()
        newX[:,:self.LDA_components] = tmp
        
        # This is if X = [cell1, cell2, ..., celln, binfrac1,...,binfrac k^2]
        self.train(newX,Y,room, bin_size)
        
    
    def classify(self,X):
        bin_frac = X[-self.bins:].reshape([self.xblen,self.yblen])
        X = X[:-self.bins]
        
        X = np.squeeze(self.lda.transform(X))

        
        #self.base[cell id, lbl, xbin, ybin] = rate
        cntxt0 = np.einsum('cxy,c,xy',self.base[:,0,:,:],X,bin_frac)
        cntxt1 = np.einsum('cxy,c,xy',self.base[:,1,:,:],X,bin_frac)
        
        if logging.getLogger().level <= 5:
            tmp0 = 0
            for cell in range(len(X)):
                tmp0 += np.sum(X[cell]*bin_frac*self.base[cell,0,:,:])
            
            tmp1 = 0
            for cell in range(len(X)):
                tmp1 += np.sum(X[cell]*bin_frac*self.base[cell,1,:,:])
            
            assert np.allclose(tmp0,cntxt0)
            assert np.allclose(tmp1,cntxt1)
        
        #import pdb; pdb.set_trace()
        
        if cntxt0 > cntxt1:
            return {self.labels[0]: 1,
                self.labels[1]: 0}
        else:
            return {self.labels[0]: 0,
                self.labels[1]: 1}
        
        '''
        # Normalize
        if cntxt0 != 0 or cntxt1 != 0:
            mag = cntxt0+cntxt1
        else:
            mag = 1
        
        cntxt0 /= mag
        cntxt1 /= mag
        
        assert (round(cntxt0 + cntxt1,5) in [0,1])'''
        
        return {self.labels[0]: cntxt0,
                self.labels[1]: cntxt1}
示例#34
0
文件: ch05.py 项目: 1iyiwei/pyml
plt.xlabel('LD 1')
plt.ylabel('LD 2')
plt.legend(loc='lower right')
# plt.tight_layout()
# plt.savefig('./figures/lda2.png', dpi=300)
plt.show()


#############################################################################
print(50 * '=')
print('Section: LDA via scikit-learn')
print(50 * '-')

lda = LDA(n_components=2)
X_train_lda = lda.fit_transform(X_train_std, y_train)

lr = LogisticRegression()
lr = lr.fit(X_train_lda, y_train)

plot_decision_regions(X_train_lda, y_train, classifier=lr)
plt.xlabel('LD 1')
plt.ylabel('LD 2')
plt.legend(loc='lower left')
# plt.tight_layout()
# plt.savefig('./images/lda3.png', dpi=300)
plt.show()

X_test_lda = lda.transform(X_test_std)

plot_decision_regions(X_test_lda, y_test, classifier=lr)
 
 #Construct dictionary for saving array
 final_data = {'M1_d_data':M1_d_data,'S1_d_data':S1_d_data,'pmd_d_data':pmd_d_data,'pmv_d_data':pmv_d_data,'M1_c_data':M1_c_data,'S1_c_data':S1_c_data,'pmd_c_data':pmd_c_data,'pmv_c_data':pmv_c_data,'targets':targets}

 #Construct temparary dictionary for figure generation
 final_data_no_targets = {'M1 at Delivery':M1_d_data,'S1 at Delivery':S1_d_data,'PmD at Delivery':pmd_d_data,'PmV at Delivery':pmv_d_data,'M1 at Cue':M1_c_data,'S1 at Cue':S1_c_data,'PmD at Cue':pmd_c_data,'PmV at Cue':pmv_c_data}

 np.save("multi_reward"+filename[-15:-4]+"_hists_"+name_of_bin,(final_data,unit_names))

 #Perform PCA on PSTH followed By LDA on PCA transform of PSTH data and save figure showing results for each bin
 for key,value in final_data_no_targets.iteritems():
     print key
     lda = LDA(n_components=2)
     pca = RandomizedPCA(n_components=20)
     proj = pca.fit_transform(value)
     proj = lda.fit_transform(proj,targets)
     print proj.shape
     plt.clf()
     plt.scatter(proj[:, 0], proj[:, 1], c=targets)
     plt.title(key+" from "+name_of_bin)
     plt.xlabel("LD1")
     plt.ylabel("LD2")
     plt.colorbar()
     plt.savefig(key+" from "+name_of_bin+"s.png")
     plt.clf()
     



                                                                                
示例#36
0
    for line in fileLines:
        result.append(line.split())
    return result

## II.1.1 LDA using sklearn package ##
trainingData = fileParser("IrisTrain2014.dt") # Parse iris files
testData = fileParser("IrisTest2014.dt")

x = []
y = []
for entry in trainingData:
    x.append([float(entry[0]), float(entry[1])])
    y.append(int(entry[2]))

lda = LDA(n_components = 2) # initialize LDA for 2 parameter entries
transformedLda = lda.fit_transform(x,y)

# Function to plot the transformed data
def plotLda(lda):
    colors = ["red", "green", "blue"]
    i=0
    while(i < 3):
        xs = []
        ys = []
        n = 0
        while(n < len(lda)):
            if(y[n] == i):
                xs.append(lda[n][0])
                ys.append(lda[n][1])
            n += 1
        plotter.scatter(x=xs, y=ys, color = colors[i])
示例#37
0
    plt.scatter(Xred_pca[y==lab, 0],Xred_pca[y==lab, 1],label=lab,c=col)
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
leg = plt.legend(loc='upper right', fancybox=True)
plt.show()



####################################################
########## Parte d: LDA ############################
####################################################



sklearn_lda = LDA(n_components=2)
Xred_lda = sklearn_lda.fit_transform(X_std,y)
cmap = plt.cm.get_cmap('Set1')
mclasses=(1,2,3,4,5,6,7,8,9)
mcolors = [cmap(i) for i in np.linspace(0,1,10)]
plt.figure(figsize=(12, 8))
for lab, col in zip(mclasses,mcolors):
    plt.scatter(Xred_lda[y==lab, 0],Xred_lda[y==lab, 1],label=lab,c=col)

plt.xlabel('LDA/Fisher Direction 1')
plt.ylabel('LDA/Fisher Direction 2')
leg = plt.legend(loc='upper right', fancybox=True)
plt.show()

#
# ####################################################
# ########## Parte f: Construcción Clasificador ######
 first_half = np.vstack(first_half)
 second_half = np.hstack(Data[no_mappings/2:no_mappings,:,:,:])
 second_half = np.vstack(second_half)
 Data = np.vstack([first_half,second_half])
 # for true targets uncomment next line
 #targets = np.hstack([targets,targets])

 #for random targets uncomment next line
 targets = np.random.randint(1,no_locations+1,no_mappings*no_locations*no_thwacks)


 lda = LDA(n_components=14)
 pca = RandomizedPCA(n_components = 125)
 classifier =  KNeighborsClassifier(8)
 proj = pca.fit_transform(Data)
 proj = lda.fit_transform(proj,targets)
 proj1 = pca.fit_transform(Data)
 proj1 = lda.fit_transform(proj1,mapping_targets)
 print(file)
 plt.clf()
 plt.scatter(proj[0:proj.shape[0]/2,0],proj[0:proj.shape[0]/2,1],c=targets[0:targets.shape[0]/2])
 plt.title(file.rsplit('_')[0]+'_'+file.rsplit('_')[1]+" Before "+file.rsplit('_')[2]+" injection")
 plt.colorbar()
 plt.ylabel("LD1")
 plt.xlabel("LD2")
 plt.savefig(file.rsplit('_')[0]+'_'+file.rsplit('_')[1]+" Before "+file.rsplit('_')[2]+file[-11:-4]+" injection.svg")
 plt.show()
 plt.clf()

 
 plt.scatter(proj[proj.shape[0]/2:proj.shape[0],0],proj[proj.shape[0]/2:proj.shape[0],1],c=targets[targets.shape[0]/2:targets.shape[0]])
示例#39
0
eigen_vals = pca.explained_variance_ratio_ # Egan-values for each PCAs (Importance)
# --- Fitting Model with PCA
pca = PCA(n_components=2) # Only take first 2 PCs
lr = LogisticRegression()
X_train_pca = pca.fit_transform(X_train_std) # fit with trainset
X_test_pca = pca.transform(X_test_std) # only transform with testset
lr.fit(X_train_pca, Y_train)



# (2) Linear Discriminant Analsysi (LDA) - linear separatible
# --- Evaluate Importance of LDA
from sklearn.linear_model import LogisticRegression
from sklearn.lda import LDA
lda = LDA(n_components=None)
X_train_lda = lda.fit_transform(X_train_std, Y_train) # fit with trainset, (x, y) supervized
eigen_vals = lda.explained_variance_ratio_ # Egan-values for each LDAs (Importance)


# --- Fitting Model with LDA
lda = LDA(n_components=2)
X_train_lda = lda.fit_transform(X_train_std, Y_train) # fit with trainset, (x, y) supervized
X_test_lda = lda.transform(X_test_std) # only transform with testset
lr.fit(X_train_lda, Y_train)


# (3) Kernel Principal Component Analysis (K-PCA) - non-linear separatible
from sklearn.decomposition import KernelPCA
scikit_kpca = KernelPCA(n_components=2, kernel='rbf', gamma=15) # can choose other kernel methods, 2 PCAs = features
X_skernpca = scikit_kpca.fit_transform(X_train_std)
示例#40
0
from sklearn.svm import LinearSVC
kappa_scorer = make_scorer(cohen_kappa_score)
grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, scoring=kappa_scorer)

#EXEMPLO 20 - LATENT FACTOR ANALYSIS (LFA)
from sklearn.decomposition import FactorAnalysis
fact_2c = FactorAnalysis(n_components=2)
X_factor = fact_2c.fit_transform(iris.data)
plt.scatter(X_factor[:,0], X_factor[:,1], c=iris.target,
alpha=0.8, s=60, marker='o', edgecolors='white')
plt.show()

#EXEMPLO 21 - LINEAR DISCRIMINANT ANALYSIS (LDA)
from sklearn.lda import LDA
lda_2c = LDA(n_components=2)
X_lda_2c = lda_2c.fit_transform(iris.data, iris.target)
plt.scatter(X_lda_2c[:,0], X_lda_2c[:,1], c=iris.target,
alpha=0.8, edgecolors='none'); plt.show()

#EXEMPLO 22 - KERNEL PCA
from sklearn.decomposition import KernelPCA
kpca_2c = KernelPCA(n_components=2, kernel='rbf')
X_kpca_2c = kpca_2c.fit_transform(fake_circular_data)
plt.scatter(X_kpca_2c[:,0], X_kpca_2c[:,1], c=fake_circular_target,
alpha=0.8, s=60, marker='o', edgecolors='white')
plt.show()

#EXEMPLO 22 - MARS
import numpy
from pyearth import Earth
from matplotlib import pyplot
示例#41
0
plt.plot(transformedXg[:x1g.shape[0], 0], transformedXg[:x1g.shape[0], 1], 'o')
plt.plot(transformedXg[x1g.shape[0]:, 0], transformedXg[x1g.shape[0]:, 1], 'x')
plt.show()

# <h2 style="color:purple"> LDA </h2>

# In[25]:

from sklearn.lda import LDA
lda = LDA(n_components=5)

xg = pd.concat([x1g, x2g])
yg = np.zeros(xg.shape[0])
yg[:x1g.shape[0]] = np.ones(x1g.shape[0])
transformedXg2 = lda.fit_transform(xg, yg)
print transformedXg2.shape
plt.plot(transformedXg2[:x1g.shape[0], 0], np.zeros(x1g.shape[0]), 'o')
plt.plot(transformedXg2[x1g.shape[0]:, 0], np.zeros(x2g.shape[0]), 'x')
plt.show()

# <h2 style="color:purple"> Supervised Graph</h2>

# In[ ]:

# <h2 style="color:purple"> Unsupervised Graph</h2>

# In[ ]:

# <h2 style="color:green"> Classify </h2>
示例#42
0
class DotProduct(DP1):
    name = 'LDA'
    LDA_components = 2

    def __init__(self, X, Y, room, bin_size):
        assert (room[0][1] - room[0][0]) % bin_size == 0
        assert (room[1][1] - room[1][0]) % bin_size == 0
        self.bin_size = bin_size
        self.room = room
        self.xblen = (room[0][1] - room[0][0]) / bin_size
        self.yblen = (room[1][1] - room[1][0]) / bin_size
        self.bins = self.xblen * self.yblen
        self.labels = np.unique(Y)

        newX = np.zeros([X.shape[0], self.LDA_components + self.bins])
        newX[:, -self.bins:] = X[:, -self.bins:]

        self.lda = LDA(n_components=self.LDA_components)
        tmp = self.lda.fit_transform(X[:, :-self.bins], Y)
        import pdb
        pdb.set_trace()
        newX[:, :self.LDA_components] = tmp

        # This is if X = [cell1, cell2, ..., celln, binfrac1,...,binfrac k^2]
        self.train(newX, Y, room, bin_size)

    def classify(self, X):
        bin_frac = X[-self.bins:].reshape([self.xblen, self.yblen])
        X = X[:-self.bins]

        X = np.squeeze(self.lda.transform(X))

        #self.base[cell id, lbl, xbin, ybin] = rate
        cntxt0 = np.einsum('cxy,c,xy', self.base[:, 0, :, :], X, bin_frac)
        cntxt1 = np.einsum('cxy,c,xy', self.base[:, 1, :, :], X, bin_frac)

        if logging.getLogger().level <= 5:
            tmp0 = 0
            for cell in range(len(X)):
                tmp0 += np.sum(X[cell] * bin_frac * self.base[cell, 0, :, :])

            tmp1 = 0
            for cell in range(len(X)):
                tmp1 += np.sum(X[cell] * bin_frac * self.base[cell, 1, :, :])

            assert np.allclose(tmp0, cntxt0)
            assert np.allclose(tmp1, cntxt1)

        #import pdb; pdb.set_trace()

        if cntxt0 > cntxt1:
            return {self.labels[0]: 1, self.labels[1]: 0}
        else:
            return {self.labels[0]: 0, self.labels[1]: 1}
        '''
        # Normalize
        if cntxt0 != 0 or cntxt1 != 0:
            mag = cntxt0+cntxt1
        else:
            mag = 1
        
        cntxt0 /= mag
        cntxt1 /= mag
        
        assert (round(cntxt0 + cntxt1,5) in [0,1])'''

        return {self.labels[0]: cntxt0, self.labels[1]: cntxt1}
labels = set(df.columns.values)
labels.remove('y')
X_raw = df[list(labels)]
X_train, _, _ = one_hot_dataframe(X_raw, [
    'job', 'marital', 'education', 'default', 'housing', 'loan', 'contact',
    'month', 'poutcome'
],
                                  replace=True)
y_train = [1 if i == 'yes' else 0 for i in df.y]

reductions = []
pca = PCA(n_components=2)
reductions.append(pca.fit_transform(X_train, y_train))
lda = LDA(n_components=2)
reductions.append(lda.fit_transform(X_train, y_train))
isomap = Isomap(n_components=2)
reductions.append(isomap.fit_transform(X_train, y_train))
lle = LocallyLinearEmbedding(n_components=2, method='standard')
reductions.append(lle.fit_transform(X_train, y_train))

for reduced_X in reductions:
    plt.figure()
    red_x = []
    red_y = []
    blue_x = []
    blue_y = []
    green_x = []
    green_y = []

    for i in range(len(reduced_X)):
示例#44
0
    n1 = Y[Y==0].shape[0]
    n2 = Y[Y==1].shape[0]

    print "PCA:"
    pca = PCA(n_components=Components)
    transformed = pca.fit_transform(X)

    plt.plot(transformed[:n1,0],transformed[:n1,1],'o')
    plt.plot(transformed[n1:,0],transformed[n1:,1],'x')
    plt.show()

    print "LDA:"
    lda = LDA()

    transformed2 = lda.fit_transform(X,Y)
    plt.plot(transformed2[:n1,0],np.zeros(n1),'o')
    plt.plot(transformed2[n1:,0],np.zeros(n2),'x')
    plt.show()

    print "unsupervised:"
    numFeature = X.shape[1]
    numData = X.shape[0]
    numNode = numFeature + numData
    A = np.zeros((numNode,numNode))

    # construct feature-data
    for i in range(numData):
        for j in xrange(numFeature):
            A[i+numFeature,j] = X.iloc[i,j]
            A[j,i+numFeature] = X.iloc[i,j]
示例#45
0
    # remove axis spines
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["bottom"].set_visible(False)
    ax.spines["left"].set_visible(False)

    plt.grid()
    plt.tight_layout
    plt.show()
    
    
    
os.chdir("F:\Analytics\ISB Study\Capstone\dir_data\dir_data")



X_train, y_train, X_test, y_test, X_val, y_val = load_svmlight_files(("train\\vision_cuboids_histogram.txt", "test\\vision_cuboids_histogram.txt","validation\\vision_cuboids_histogram.txt"))
np.unique(y_train)

sklearn_lda = LDA(n_components=30)
X_lda_sklearn = sklearn_lda.fit_transform(X_train.todense(), y_train)
plot_scikit_lda(X_lda_sklearn, title='LDA vision_cuboids_histogram')
# PCA
sklearn_pca = sklearnPCA(n_components=30)
X_pca = sklearn_pca.fit_transform(X_train.todense())
plot_pca(title = 'PCA vision_cuboids_histogram')
#
X_ldapca_sklearn = sklearn_pca.fit_transform(X_lda_sklearn)
plot_scikit_lda(X_ldapca_sklearn, title='LDA+PCA LDA vision_cuboids_histogram', mirror=(-1))
示例#46
0
x_test = sc.transform(x_test)

import numpy as np
cov_mat = np.cov(x_train.T)
eigen_vals, eigen_vecs = np.linalg.eig(cov_mat)
print('eigenvals', eigen_vals)

tot = sum(eigen_vals)
var_exp = [(i / tot) for i in sorted(eigen_vals, reverse=True)]
cum_var_exp = np.cumsum(var_exp)

import matplotlib.pyplot as plt
plt.figure(figsize=(6, 4))
plt.bar(range(1, 14),
        var_exp,
        alpha=0.5,
        align='center',
        label='ind explained variance')
plt.step(range(1, 14),
         cum_var_exp,
         where='mid',
         label='cum explained variance')
plt.ylabel('Explained Variance')
plt.xlabel('Principle Components')
plt.legend(loc=0)
plt.show()

from sklearn.lda import LDA
lda = LDA(n_components=2)
x_train_lda = lda.fit_transform(x_train, y_train)
示例#47
0
    final_data_no_targets = {
        'M1 at Delivery': M1_d_data,
        'S1 at Delivery': S1_d_data,
        'PmD at Delivery': pmd_d_data,
        'PmV at Delivery': pmv_d_data,
        'M1 at Cue': M1_c_data,
        'S1 at Cue': S1_c_data,
        'PmD at Cue': pmd_c_data,
        'PmV at Cue': pmv_c_data
    }

    np.save("multi_reward" + filename[-15:-4] + "_hists_" + name_of_bin,
            (final_data, unit_names))

    #Perform PCA on PSTH followed By LDA on PCA transform of PSTH data and save figure showing results for each bin
    for key, value in final_data_no_targets.iteritems():
        print key
        lda = LDA(n_components=2)
        pca = RandomizedPCA(n_components=20)
        proj = pca.fit_transform(value)
        proj = lda.fit_transform(proj, targets)
        print proj.shape
        plt.clf()
        plt.scatter(proj[:, 0], proj[:, 1], c=targets)
        plt.title(key + " from " + name_of_bin)
        plt.xlabel("LD1")
        plt.ylabel("LD2")
        plt.colorbar()
        plt.savefig(key + " from " + name_of_bin + "s.png")
        plt.clf()
示例#48
0
def lda(X, y, components=10):
    lda = LDA(n_components=components)
    return lda.fit_transform(X, y)
示例#49
0
np.mean(score)
#0.913

#Convert array to dataframe
df = pd.DataFrame(x, columns = ['C1', 'C2', 'C3'])

#3 dimentional plot of principal components (eigenvectors)  
fig = plt.figure(1)
ax = fig.add_subplot(111, projection = '3d')
ax.scatter(df['C1'].tolist(), df['C2'].tolist(), df['C3'].tolist(), c = 'b', marker = '*')
ax.set_xlabel('C1')
ax.set_ylabel('C2')
ax.set_zlabel('C3')
ax.legend()

#Linear discriminant analysis
#Standardize
da = LDA(n_components = 3)
#Fit model and transform data
a = da.fit_transform(X, Y)

#Perform KNN algorithm
neigh1 = KNeighborsClassifier(n_neighbors = 3)
neigh1.fit(a, Y) 

#Get cross validation metrics
#Create validation set with k-fold cross validation
#Test for accuracy of validation set
score1 = cross_val_score(neigh1, a, Y, scoring = 'accuracy', cv = 10)
np.mean(score1)
#0.973
示例#50
0
    def execute(self,i,j):
    	global save1
    	global save2
    	jk=i
    	# print type(jk)
        # dim_red = LDA()
        # dim_red.fit_transform(self.x_train, self.y_train)
        # with open('dumped_dim_red_'+str(i)+'.pkl', 'wb') as fid:
        #     cPickle.dump(dim_red, fid)

        # x_train = dim_red.transform(self.x_train)
        # x_test = dim_red.transform(self.y_train)    
        # stat_obj = self.stat_class() # reflection bitches
        # stat_obj.train(x_train, x_test)
        # print len(x_train)
        # with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'wb') as fid:
        #     cPickle.dump(stat_obj, fid)
        # save1=None
        # save2=None
        kf = KFold(len(self.x_train), n_folds=self.k_cross)
        own_kappa = []
        for train_idx, test_idx in kf:
		# print train_idx,test_idx
		# exit(0)
			x_train, x_test = self.x_train[train_idx], self.x_train[test_idx]
			y_train, y_test = self.y_train[train_idx], self.y_train[test_idx]
			dim_red = LDA()
			x_train = dim_red.fit_transform(x_train, y_train)


			# with open('dumped_dim_red_'+str(i)+'.pkl', 'wb') as fid:
			#     cPickle.dump(dim_red, fid)

			# with open('dumped_dim_red_'+str(i)+'.pkl', 'rb') as fid:
			    # dim_red=cPickle.load(fid)
			x_test = dim_red.transform(x_test)
			    
			# with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'rb') as fid:
			#     stat_obj=cPickle.load(fid)
			# x_train = dim_red.transform(x_train)
			# x_test = dim_red.transform(x_test)

			stat_obj = self.stat_class() # reflection bitches
			stat_obj.train(x_train,y_train)
			# with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'wb') as fid:
			    # cPickle.dump(stat_obj, fid)
			# with open('dumped_'+str(j)+'_'+str(i)+'.pkl', 'rb') as fid:
			    # stat_obj=cPickle.load(fid)
			y_pred = [ 0 for i in xrange(len(y_test)) ]
			if(int(jk)==1):
				# print "test_idx"
				save1=stat_obj
				save2=dim_red
			for i in range(len(x_test)):
			    # print len(x_test[i])
			    val = int(np.round(stat_obj.predict(x_test[i])))
			    if val > self.range_max: val = self.range_max
			    if val < self.range_min: val = self.range_min
			    y_pred[i] = [val]
			y_pred = np.matrix(y_pred)
			cohen_kappa_rating = own_wp.quadratic_weighted_kappa(y_test,y_pred,self.range_min,self.range_max)
			self.values.append(cohen_kappa_rating)
			# print stat_obj.predict(x_train)
			# linear_k_cross = k_fold_cross_validation(cross_valid_k,linear_regression,X_train,Y_train,range_min,range_max)
			# linesar_accuracy.append(linear_k_cross.execute(i,0))
			# logistic_k_cross = k_fold_cross_validation(cross_valid_k,logistic_regression,X_train,Y_train,range_min,range_max)
			# logistic_accuracy.append(logistic_k_cross.execute(i,1))
			# svr_k_cross = k_fold_cross_validation(cross_valid_k,support_vector_regression,X_train,Y_train,range_min,range_max)
			# svr_accuracy.append(svr_k_cross.execute(i,2))
			# svm_k_cross = k_fold_cross_validation(cross_valid_k,support_vector_machine,X_train,Y_train, range_min,range_max)
			# svm_accuracy.append(svm_k_cross.execute(i,3))
        return str(sum(self.values)/self.k_cross)
示例#51
0
def FLD_r(X, y):
    # can change n_components to desired value
    fld = LDA()
    return fld.fit_transform(X, y)
示例#52
0
#	A cada dato se le asigna la clase y un color para diferenciarlas entre si
for lab, col in zip(mclasses,mcolors):
	plt.scatter(Xred_pca[y==lab, 0],Xred_pca[y==lab, 1],label=lab,c=col)

#	Se configuran las etiquetas
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
leg = plt.legend(loc='upper right', fancybox=True)

######## Pregunta (d) ############################################################

#	Se utiliza LDA con dos dimensiones
sklearn_lda = LDA(n_components=2)

#	Se ajusta a los datos de entrenamiento
Xred_lda = sklearn_lda.fit_transform(X_std,y)

#	Se escoge la paleta de colores
cmap = plt.cm.get_cmap('hsv')

#	Se definen las clases
mclasses=(1,2,3,4,5,6,7,8,9)
mcolors = [cmap(i) for i in np.linspace(0,1,10)]

#	Se establece el tamanno de la figura
plt.figure(figsize=(12, 8))

#	A cada dato se le asigna la clase y un color para diferenciarlas entre si
for lab, col in zip(mclasses,mcolors):
	plt.scatter(Xred_lda[y==lab, 0],Xred_lda[y==lab, 1],label=lab,c=col)
示例#53
0
def lda_projected_X(X, y, n):
    pca = LDA(n_components=n)
    return pca.fit_transform(X, y)
示例#54
0
# Project the tranformed points
w_lda = np.dot(np.linalg.inv(s_within), mean_1 - mean_2)
w_lda = w_lda / np.linalg.norm(w_lda)

label_1_projected = np.dot(label_1_transformed, w_lda)
label_2_projected = np.dot(label_2_transformed, w_lda)

# plot the transformed points
plt.plot(label_1_projected, np.zeros_like(label_1_projected), 'o', c='r')
plt.plot(label_2_projected, np.zeros_like(label_2_projected), 'o', c='b')
plt.show()

# LDA using sklearn
from sklearn.lda import LDA
points = np.concatenate((label_1, label_2), axis=0)
label = np.concatenate((np.zeros((20, 1)), np.zeros((20, 1)) + 1))

lda = LDA()
skl_transform = lda.fit_transform(points, label)

f, ax = plt.subplots(1, 2)

ax[0].plot(skl_transform[:20], np.zeros_like(label_1_projected), 'o', c='r')
ax[0].plot(skl_transform[20:], np.zeros_like(label_2_projected), 'o', c='b')
ax[0].set_title('SKlearn Projection')
ax[1].plot(label_1_projected, np.zeros_like(label_1_projected), 'o', c='r')
ax[1].plot(label_2_projected, np.zeros_like(label_2_projected), 'o', c='b')
ax[1].set_title('Manual Projection')
plt.show()
plt.title('Projection onto first 2 PC space')
plt.xlabel('Principle Component 1')
plt.ylabel('Principle Component 2')
fig1.savefig('./Plots/2_PCA1.png')

# Generate scatter plot on the second 2 PC spacefig2 = plt.figure()
fig2 = plt.figure(figsize=(16.0, 9.0))
plt.scatter( x=pc_data[:,2], y=pc_data[:,3], c=[color_dict[c] for c in df.iloc[:,0]] )
plt.title('Projection onto second 2 PC space')
plt.xlabel('Principle Component 3')
plt.ylabel('Principle Component 4')
fig2.savefig('./Plots/2_PCA2.png')

# Apply LDA to project PC space onto first 2 LD space
lda = LDA()
ld_data = lda.fit_transform(X=pc_data, y=df.iloc[:,0])
fig3 = plt.figure(figsize=(16.0, 9.0))
plt.scatter( x=ld_data[:,0], y=ld_data[:,1], c=[color_dict[c] for c in df.iloc[:,0]] )
plt.title('Projection onto first 2 LD space')
plt.xlabel('Linear Discriminant 1')
plt.ylabel('Linear Discriminant 2')
fig3.savefig('./Plots/2_LDA1.png')

# Apply LDA to project PC space onto second 2 LD space
fig4 = plt.figure(figsize=(16.0, 9.0))
plt.scatter( x=ld_data[:,2], y=ld_data[:,3], c=[color_dict[c] for c in df.iloc[:,0]] )
plt.title('Projection onto second 2 LD space')
plt.xlabel('Linear Discriminant 3')
plt.ylabel('Linear Discriminant 4')
fig4.savefig('./Plots/2_LDA2.png')
train.head()

# In[ ]:

lda = LDA(n_components=2)
pca = PCA(n_components=2)

# In[ ]:

scaler = StandardScaler()
trains = scaler.fit_transform(train)

trans = pca.fit_transform(trains, labels)
dfp = pd.DataFrame(trans, index=train.index)

trans = lda.fit_transform(trains, labels)
df = pd.DataFrame(trans, columns=["lda"], index=train.index)

# In[ ]:

df["labels"] = labels
dfp["labels"] = labels

# In[ ]:

df.plot(kind='scatter', x='lda', y='labels')

# In[ ]:

dfp.plot(kind='scatter', x='pca1', y='pca2', c="labels")