def apply_LDA(self, X,y, solver, shrinkage):
     if solver == 'svd':
         pca = lda(solver = solver)
         pca.fit(X, y)
     else:
         pca = lda(solver = solver, shrinkage = shrinkage)
         pca.fit(X, y)
         
     return pca.transform(X)
Пример #2
0
def plot_lda_decision_boundaries(two_lda_dimensions, label_matrix):

    # Create mesh_matrix, a mesh of points in space of first two linear discriminants
    ldone_min, ldone_max = two_lda_dimensions[:, 0].min(
    ) - 1, two_lda_dimensions[:, 0].max() + 1
    ldtwo_min, ldtwo_max = two_lda_dimensions[:, 1].min(
    ) - 1, two_lda_dimensions[:, 1].max() + 1

    ldoneone, ldtwotwo = np.meshgrid(np.linspace(ldone_min, ldone_max, 500),
                                     np.linspace(ldtwo_min, ldtwo_max, 500))

    mesh_matrix = np.c_[ldoneone.ravel(), ldtwotwo.ravel()]

    # Instantiate LDA model and fit LDA model on two_lda_dimensions
    lda_model = lda(n_components=2)
    lda_model.fit(two_lda_dimensions, label_matrix)

    # Use LDA model to make categorical predictions on mesh_matrix
    mesh_predictions = lda_model.predict(mesh_matrix)

    # Map categorical predictions into numerical values for contour plotting
    speciesmap = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
    f = lambda x: speciesmap[x]
    fv = np.vectorize(f)
    Z = fv(mesh_predictions).reshape((len(ldoneone), len(ldtwotwo)))

    # Make contour plot
    plt.contourf(ldoneone,
                 ldtwotwo,
                 Z,
                 levels=[-0.5, 0.5, 1.5, 2.5],
                 colors=('orange', 'black', 'grey'),
                 alpha=0.4)
Пример #3
0
def lda_(values, class1, class2, labels, features):
    # use fisher for finding best line
    clf = lda()
    clf.fit(features, labels)
    w_ = clf.coef_
    b_ = clf.intercept_

    class1_projected = np.dot(class1, w_.T) + b_
    class2_projected = np.dot(class2, w_.T) + b_
    class1_mean_projected = np.mean(class1_projected)
    class2_mean_projected = np.mean(class2_projected)

    class1_dist_to_mean = class1_projected - class1_mean_projected
    class2_dist_to_mean = class2_projected - class2_mean_projected

    means_dist = class2_mean_projected - class1_mean_projected

    mems1 = (means_dist - class1_dist_to_mean) / means_dist
    mems1[mems1 > 1] = 1
    mems1[mems1 < 0] = 0
    mems2 = (means_dist + class2_dist_to_mean) / means_dist
    mems2[mems2 > 1] = 1
    mems2[mems2 < 0] = 0

    mems = np.concatenate((mems1, mems2))

    return testLeaveOneOut(features, labels, mems.ravel())
Пример #4
0
def exeML(mlmethod, xtr, ytr, xte, yte, islog=True, isfeatureselection=True):
    if islog:
        xtr = np.log(np.abs(xtr)).tolist()
        ytr = np.log(np.abs(ytr)).tolist()
        xte = np.log(np.abs(xte)).tolist()
        yte = np.log(np.abs(yte)).tolist()

    if isfeatureselection:
        estimator = SVR(kernel="linear")
        selector = RFE(estimator, 100, step=1)
        selector = selector.fit(xtr, ytr)
        xtr = np.array(xtr)[:, selector.support_].tolist()
        xte = np.array(xte)[:, selector.support_].tolist()

    np.random.seed(1000)
    if mlmethod == "SVM":
        clf = svm.SVR(kernel='poly')
    elif mlmethod == "NeaNei":
        clf = NearestCentroid()
    elif mlmethod == "dtree":
        clf = tree.DecisionTreeClassifier()
    elif mlmethod == "lda":
        clf = lda(solver="svd")

    predval = []
    clf.fit(xtr, ytr)

    for i in range(len(xte)):
        predval.append(np.float(clf.predict(xte[i])))

    return predval
Пример #5
0
Файл: OCR.py Проект: xancros/TFG
def usarLDA(array,e):
    Lda = lda()
    caracteres = np.vstack(array)
    E = np.array(e)
    Lda.fit_transform(caracteres,E)
    CR = Lda.transform(caracteres)
    CR = CR.astype(np.float32, copy=True)
    return CR,Lda
Пример #6
0
def LDA_DR(X, y):
    #线形判别分析(Linear Discriminant Analysis,LDA)从64维降到2,3维
    logging.info("Computing LDA projection")
    X = np.array(X)
    X2 = X.copy()
    X2.flat[::X.shape[1] + 1] += 0.01  # Make X invertible
    t0 = time()
    X_lda = lda(n_components=3).fit_transform(X2, y)
    plot_embedding_2d(X_lda[:, 0:2], y, "LDA of Kmeans")
Пример #7
0
def plot_simple_demo_lda():
    pylab.clf()
    fig = pylab.figure(num=None, figsize=(10, 4))
    pylab.subplot(121)

    title = "Original feature space"
    pylab.title(title)
    pylab.xlabel("$X_1$")
    pylab.ylabel("$X_2$")

    good = x1 > x2
    bad = ~good

    x1g = x1[good]
    x2g = x2[good]
    pylab.scatter(x1g, x2g, edgecolor="blue", facecolor="blue")

    x1b = x1[bad]
    x2b = x2[bad]
    pylab.scatter(x1b, x2b, edgecolor="red", facecolor="white")

    pylab.grid(True)

    pylab.subplot(122)

    X = np.c_[(x1, x2)]

    lda_inst = lda(n_components=1)
    Xtrans = lda_inst.fit_transform(X, good)

    Xg = Xtrans[good]
    Xb = Xtrans[bad]

    pylab.scatter(Xg[:, 0],
                  np.zeros(len(Xg)),
                  edgecolor="blue",
                  facecolor="blue")
    pylab.scatter(Xb[:, 0],
                  np.zeros(len(Xb)),
                  edgecolor="red",
                  facecolor="white")
    title = "Transformed feature space"
    pylab.title(title)
    pylab.xlabel("$X'$")
    fig.axes[1].get_yaxis().set_visible(False)

    pylab.grid(True)

    pylab.autoscale(tight=True)
    filename = "lda_demo.png"
    pylab.savefig(os.path.join(CHART_DIR, filename), bbox_inches="tight")
Пример #8
0
    def train(self, labels, erps):
        print "training..."

        self.clf = lda()
        self.clf.fit(erps, labels)

        scores = cross_validation.cross_val_score(self.clf,
                                                  erps,
                                                  labels,
                                                  cv=10)
        print("Accuracy: %0.2f (+/- %0.2f)" %
              (scores.mean(), scores.std() * 2))

        joblib.dump(self.clf, 'model/lda.pkl')
Пример #9
0
    def train(self, labels, erps):
        if self.factor != 1:
            erps = convert.erp.decimate(erps, self.factor)

        self.frame_length = len(erps[0]) / 8

        ( b, se, pval, inmodel, stats, nextstep, history ) = stepwisefit( erps, labels, maxiter = 60, penter = 0.1, premove = 0.15)
        self.index = inmodel
        erps = [np.array(erp)[self.index] for erp in erps]
        self.clf = lda()
        self.clf.fit(erps, labels)

        scores = cross_validation.cross_val_score(self.clf, erps, labels, cv=6)
        print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
        self.show_feature()

        joblib.dump(self.clf, 'model/swlda-%s.pkl' % self.name)
        np.save("model/swlda-index-%s.npy" % self.name, self.index)
Пример #10
0
def StratifiedShuffleSplit_cross_validate_func_lda(X, y,partitioner) -> (np.array, np.array,np.array):    
    runs = 4
    lDA= np.empty([runs])
    accuracy_list=[]
    error_rate_list=[]
    for i in range(runs):        
        lda_results = cross_validate(lda(), X, y, scoring="accuracy", cv=partitioner)        
        lDA[i] = np.mean(lda_results["test_score"])
        error_rate_lda = 1-lDA[i]
        print("lDA[i]")
        print(lDA[i])        
        print("error_rate_lda")
        print(error_rate_lda)
        accuracy_list.append(lDA[i])
        error_rate_list.append(error_rate_lda)
    plt.plot(error_rate_list)
    plt.show()
    plt.plot(accuracy_list)
    plt.show()          
Пример #11
0
X = dataset.iloc[:, 1:5].values
y = dataset.iloc[:, 5].values
# c) Data Transforms
# 4. Evaluate Algorithms
# a) Split-out validation dataset
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

# b) Test options and evaluation metric
# c) Spot Check Algorithms
# now it is time to evaluate some  of the appropriate algorithms that can fit our  problem.
models = []
models.append(('Log-Reg', LogisticRegression()))
models.append(('LDA', lda()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC()))

# evaluate each model in turns

# d) Compare Algorithms
results = []

names = []

for name, model in models:
    kfold = KFold(n_splits=10, random_state=42)
    cv_results = cross_val_score(model,
Пример #12
0
    def optimize_cluster(inds1, inds2, log=None):

        ###### CONDITION
        inds1, inds_on_wall1, mask1 = inds1
        inds2, inds_on_wall2, mask2 = inds2

        if not np.any(np.logical_and(mask1, mask2)):
            return mask1, mask2, 0.0

        mask1_erode = cv2.erode(mask1, np.ones((3, 3), dtype=np.uint8))
        mask2_erode = cv2.erode(mask2, np.ones((3, 3), dtype=np.uint8))

        mps2d_pix1 = mps2d_pix[inds_on_wall1]
        mps2d_pix2 = mps2d_pix[inds_on_wall2]

        num_error1 = np.sum(mask2_erode[mps2d_pix1[:, 1], mps2d_pix1[:, 0]])
        num_error2 = np.sum(mask1_erode[mps2d_pix2[:, 1], mps2d_pix2[:, 0]])

        total_error = (num_error1 + num_error2) / (mps2d_pix1.shape[0] +
                                                   mps2d_pix2.shape[0])

        # print(num_error1)
        # print(num_error2)
        # print()
        #
        # exit()
        if num_error1 / mps2d_pix1.shape[
                0] < 0.01 and num_error2 / mps2d_pix2.shape[0] < 0.01:
            return mask1, mask2, total_error

        mps2d1 = mps2d[inds_on_wall1]
        mps2d2 = mps2d[inds_on_wall2]

        kfs2d1 = np.stack([kfs2d_uni[dict_mp2kf[i]] for i in inds_on_wall1])
        kfs2d2 = np.stack([kfs2d_uni[dict_mp2kf[i]] for i in inds_on_wall2])

        ds1 = mps2d1 - kfs2d1
        ds2 = mps2d2 - kfs2d2

        d_norms1 = np.expand_dims(np.linalg.norm(ds1, axis=1), 1)
        d_norms2 = np.expand_dims(np.linalg.norm(ds2, axis=1), 1)

        nrms = np.matmul(d_norms1, d_norms2.T)
        dots = np.matmul(ds1, ds2.T)
        dots = np.where(nrms > 0.0, dots / nrms, 0.0)

        inds_overlap = np.where(dots < -0.5)

        if not len(inds_overlap) > 0:
            s = np.sum(mask1 * mask2)
            a = np.sum(mask1 + mask2 > 0)
            return mask1, mask2, total_error
            # print(s / a, np.min(dots), np.max(dots))

        x1 = np.where(mask1 > 0)
        x2 = np.where(mask2 > 0)

        x1 = np.stack([x1[0], x1[1]], axis=1)
        x2 = np.stack([x2[0], x2[1]], axis=1)

        # plt.scatter(x1[:, 0], x1[:, 1], c="red")
        # plt.scatter(x2[:, 0], x2[:, 1], c="green")
        # plt.show()

        ###### GRADIENT
        x = np.concatenate([x1, x2], axis=0)
        y = np.asarray([0] * x1.shape[0] + [1] * x2.shape[0])

        model = lda()
        model.fit(x, y)
        r = np.fliplr(model.coef_)
        if np.sum(r) == 0:
            raise ValueError
        r = r / np.linalg.norm(r)
        # r = np.asarray([[1.0, 0.0]]).astype(np.float32)

        lambda1 = 0.05

        avg1 = np.average(x1, axis=0)
        avg2 = np.average(x2, axis=0)

        d12 = avg2 - avg1
        norm_d12 = np.linalg.norm(d12)

        ##### UPDATE
        switch = (np.dot(r, d12) > 0).astype(np.float32) * 2 - 1

        mps2d[inds_on_wall1] += switch * r * lambda1
        mps2d[inds_on_wall2] -= switch * r * lambda1
        kfs2d[inds_on_wall1] += switch * r * lambda1
        kfs2d[inds_on_wall2] -= switch * r * lambda1

        mps2d_pix1, kfs2d_pix1 = pts_float2pixel(mps2d[inds_on_wall1],
                                                 kfs2d[inds_on_wall1], w, h,
                                                 xm, xM, ym, yM)
        mps2d_pix2, kfs2d_pix2 = pts_float2pixel(mps2d[inds_on_wall2],
                                                 kfs2d[inds_on_wall2], w, h,
                                                 xm, xM, ym, yM)

        mps2d_pix1[:, 0] = np.clip(mps2d_pix1[:, 0], a_min=0, a_max=w)
        mps2d_pix2[:, 0] = np.clip(mps2d_pix2[:, 0], a_min=0, a_max=w)
        kfs2d_pix1[:, 0] = np.clip(kfs2d_pix1[:, 0], a_min=0, a_max=w)
        kfs2d_pix2[:, 0] = np.clip(kfs2d_pix2[:, 0], a_min=0, a_max=w)

        mps2d_pix1[:, 1] = np.clip(mps2d_pix1[:, 1], a_min=0, a_max=h)
        mps2d_pix2[:, 1] = np.clip(mps2d_pix2[:, 1], a_min=0, a_max=h)
        kfs2d_pix1[:, 1] = np.clip(kfs2d_pix1[:, 1], a_min=0, a_max=h)
        kfs2d_pix2[:, 1] = np.clip(kfs2d_pix2[:, 1], a_min=0, a_max=h)

        for i in inds_on_wall1:
            kfs2d_uni[dict_mp2kf[i]] = kfs2d[i]

        for i in inds_on_wall2:
            kfs2d_uni[dict_mp2kf[i]] = kfs2d[i]

        mps2d_pix[inds_on_wall1] = mps2d_pix1
        mps2d_pix[inds_on_wall2] = mps2d_pix2

        mask1 = make_only_gridmap(mps2d_pix1, kfs2d_pix1, w, h, xm, xM, ym, yM)
        mask2 = make_only_gridmap(mps2d_pix2, kfs2d_pix2, w, h, xm, xM, ym, yM)
        mask1 = (mask1 != 127).astype(np.uint8)
        mask2 = (mask2 != 127).astype(np.uint8)

        return mask1, mask2, total_error
# plt.figure()
# plt.scatter(all_reduced[:,0],all_reduced[:,1],
#                c =opto, s=20)
# plt.colorbar()
# plt.xlabel('PCA1');plt.ylabel('PCA2')
# 
# 
# fig = plt.figure()
# ax = Axes3D(fig)
# p = ax.scatter(all_reduced[:,0],all_reduced[:,1],all_reduced[:,2],
#                c =opto ,s=20)
# fig.colorbar(p)
# =============================================================================

## LDA
clf = lda()
clf.fit(all_reduced, opto)
fit_coefs = clf.coef_[0]
best_sep = np.argsort(np.abs(fit_coefs))[-3:]
plt.figure()
plt.scatter(all_reduced[:,best_sep[2]],all_reduced[:,best_sep[1]],c=opto)
plt.colorbar()

# =============================================================================
# fig = plt.figure()
# ax = Axes3D(fig)
# p = ax.scatter(all_reduced[:,best_sep[0]],all_reduced[:,best_sep[1]],all_reduced[:,best_sep[2]],
#                c =opto,s=20)
# fig.colorbar(p)
# =============================================================================
Пример #14
0
#| |  | | |_| | | |_ \ V / (_| | |    | |_| | \__ \ (__
#|_|  |_|\__,_|_|\__| \_/ \__,_|_|    |____/|_|___/\___|
#

# Extract the non-responsive population and check whether we can
# use those neurons to discriminate tastes using a multivariate analysis

non_taste_firing = data.all_normal_off_firing[taste_p_vec>0.05,:,\
        time_bounds[0]:time_bounds[1]]

# At every time-point, check accuracy of classification using LDA
labels = np.sort(list(range(4)) * 15)

# Use shuffle splits to estimate accuracy predictions for each component
bootstrap_iters = 10
score_array = np.zeros((non_taste_firing.shape[-1], bootstrap_iters))
cv = ShuffleSplit(n_splits=bootstrap_iters, test_size=0.25, random_state=0)
clf = lda(solver='eigen', shrinkage='auto')
for t_bin in trange(non_taste_firing.shape[-1]):
    score_array[t_bin] = cross_val_score(clf,
                                         non_taste_firing[:, :, t_bin].T,
                                         labels,
                                         cv=cv)
dat_imshow(score_array)
plt.show()

plt.errorbar(x=np.arange(score_array.shape[0]),
             y=np.mean(score_array, axis=-1),
             yerr=np.std(score_array, axis=-1))
plt.show()
Пример #15
0
data_type = 'cropped_roi'

# file paths
test_data_path = f'networks/data/sgp/{data_id}/{data_type}/*'
img_save_path = 'networks/reconstructed_roi/lda'
# mkdir if not exists
Path(f'{img_save_path}').mkdir(parents=True, exist_ok=True)

# prepare training set
print('Prepare training data..')

channel_train, y_true, channel_len = load_raw_labeled_data()

# fit lda
print('Model training..')
classifier = lda()
classifier.fit(channel_train, y_true)
precision_clf = classifier.score(channel_train, y_true)
prediction = classifier.predict(channel_train)
balanced_acc = balanced_accuracy_score(y_true, prediction)
kappa = cohen_kappa_score(y_true, prediction)
# plot learning curve
plot_learning_curve(classifier, 'learning curve of LDA', channel_train, y_true)

print('train-accuracy: ', precision_clf)
print('balanced-accuracy: ', balanced_acc)
print('kappa: ', kappa)

# prepare test data
print('Prepare test data..')
imgs = load_raw_images_data(test_data_path,
Пример #16
0
            c_idx.append(idx)
            
    for i in range(len(w)):
        if i in c_idx:
            w[i] = w[i]*np.exp(alpha)
        else:
            w[i] = w[i]*np.exp(-1*alpha)
            
    zt = np.sum(w)
    
    w = np.array(w) / zt
    return w


clf1 = LogisticRegression(solver='sag')
clf2 = lda()
clf3 = DecisionTreeClassifier(max_depth=2)
clf4 = KNeighborsClassifier(n_neighbors=1)
clf5 = KNeighborsClassifier(n_neighbors=10)
clfs = [clf1,clf2,clf3,clf4,clf5]

layer2_tr = None
layer2_te = None
for idx,model in enumerate(clfs):
    for i in range(5):
        x,y = Bootstrap(x_tr,y_tr,weight=weight,factor = 0.8)
        model.fit(x, y)
        prdtr = model.predict(x_tr)
        prdte = model.predict(x_te)
        
        if layer2_tr is None:
Пример #17
0
# computation of PCA projection
print('Computation of PCA Projection')

X_pca = (decomposition
		 .PCA(svd_solver='randomized', 
			  n_components=2)
		 .fit_transform(X))

plot_embedding(X_pca, 'PCA '+title, j=1)

# computation of the LDA projection
print('Computation of the LDA Projection')
X2 = X.copy()
X2.flat[::X.shape[1] + 1] += 0.01  # make X invertible

X_lda = lda(n_components=2).fit_transform(X2, y)
plot_embedding(X_lda, 'LDA '+title, j=2)

# computation of the ISOMAP projection
print('Computation of the ISOMAP Projection')
X_iso = manifold.Isomap(K, n_components=2).fit_transform(X)

plot_embedding(X_iso, 'ISOMAP '+title, j=3)

# computation of the LLE projection
print('Computation of the LLE Projection')
clf = manifold.LocallyLinearEmbedding(K, n_components=2, method='standard')

X_lle = clf.fit_transform(X)
plot_embedding(X_lle, 'LLE '+title, j=4)
Пример #18
0
def individual(df, label1, label2):
    '''
      df: first-last (17 features with DHAMD scores and 1 column of DRUG names)
      drug_name: name of drug you want to compare with Placebo
    '''

    df['DRUG'].replace(label1, 1, inplace=True)

    df['DRUG'].replace(label2, 0, inplace=True)

    df = df[(df['DRUG']==1) | (df['DRUG']==0)].reset_index(drop=True)

    #fit LDA model
    y = df['DRUG'].astype(int)
    X = df.drop(['DRUG'],axis=1)

    
    clf = lda()
    model = clf.fit(X,y)
    df_pn = pd.DataFrame(model.coef_,index=['PN']).T

    #get weightings
    df_w = pd.DataFrame(np.absolute(model.coef_),index=['Weightings']).sort_values(by='Weightings', axis=1,ascending=False).T
    
    #sort values and add descriptions
    df_w['Polarity'] = ''
    df_w['Description'] = ''
    df_w['True_Weightings'] = ''
    df_w['HAMD_Name'] = ''

    idx = list(df_w.index)
    
    for i in idx:
        
        #add corresponding descriptions
        df_w.loc[i,'Description'] = desc[str(i)]
        df_w.loc[i,'HAMD_Name'] = 'HAM-D '+str(i+1)
        
        #get polarity
        if df_pn.loc[i,'PN'] > 0:
            df_w.loc[i,'Polarity'] = 'positive' 
            df_w.loc[i,'True_Weightings'] = df_w.loc[i,'Weightings']
        else:
            df_w.loc[i,'Polarity'] = 'negative'
            df_w.loc[i,'True_Weightings'] = -1*df_w.loc[i,'Weightings']

    pvals = pd.Series([])
    sigs = pd.Series([])
    ci_up = pd.Series([])
    ci_lo = pd.Series([])
    ci = pd.Series([])
    sig_2 = pd.Series([])
    drug = df.loc[df['DRUG']!=0]
    placebo = df.loc[df['DRUG']==0]
    
    for i in range(len(df_w.Weightings)):
        N1=len(drug[drug.columns[i]])
        N2=len(placebo[placebo.columns[i]])
        d_f = (N1 + N2 - 2)
        std1 = drug[drug.columns[i]].std()
        std2 = placebo[placebo.columns[i]].std()
        std_N1N2 = sqrt( ((N1 - 1)*(std1)**2 + (N2 - 1)*(std2)**2) / d_f)
        diff_mean = drug[drug.columns[i]].mean() - placebo[placebo.columns[i]].mean()
        MoE = t.ppf(0.975, d_f) * std_N1N2 * sqrt(1/N1 + 1/N2)
        ci_up[i] = diff_mean + MoE
        ci_lo[i] = diff_mean - MoE
        sig_2[i] = ((diff_mean + MoE) * (diff_mean - MoE)>0)
        ci[i] = (diff_mean - MoE, diff_mean + MoE)
        tset, pval = ttest_1samp(df_w.Weightings, df_w.Weightings[i])
        sig = ttest_ind(drug[drug.columns[i]], placebo[placebo.columns[i]], equal_var = False)
        pvals[i]=pval
        sigs[i]=sig[1]
    df_w.insert(3, 'weightings p-value', pvals)
    df_w.insert(4, '2-sample t-test p-value', sigs)
    df_w.insert(5, '2-sample t-test ci', ci)
    df_w.insert(6, '2-sample t-test ci upper', ci_up)
    df_w.insert(7, '2-sample t-test ci lower', ci_lo)
    df_w.insert(8, 'Significant', sig_2)
    return (df_w, df)
Пример #19
0
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as lda
import numpy as np

pkl_dir = '/home/user/Documents/Kaggle/CreditDefaultRisk/EngineeredData'
# _, pkl_dir = directory_table.get_paths(station='Subgraph')
training_df = np.load(pkl_dir + r'\train_df.npy')
target = np.load(pkl_dir + r'\target.npy')
predicting_df = np.load(pkl_dir + r'\predict_df.npy')

model = lda()
model.fit(training_df, target[:, 0])
pred = model.predict_proba(predicting_df)

Пример #20
0
# all_spikes = all_spikes[:,nrn,:,2000:4000]
# if not (np.sum((np.sum(all_spikes,axis=2) == 0).flatten()) > 0):
# =============================================================================
    
this_off = np.asarray(data.all_normal_off_firing)
this_off = this_off[:,:,80:160]
total_this_off = this_off[0,:,:]
for nrn in range(1,this_off.shape[0]):
    total_this_off = np.concatenate((total_this_off,this_off[int(nrn),:,:]),axis=1)

reduced_stim_pca = pca(n_components = 45).fit(total_this_off)
reduced_stim = reduced_stim_pca.transform(total_this_off)
plt.plot(np.cumsum(reduced_stim_pca.explained_variance_ratio_))

## Identity
clf = lda()
clf.fit(reduced_stim, tastes)
fit_coefs = clf.coef_[0]
best_sep = np.argsort(np.abs(fit_coefs))[-3:]

plt.figure()
plt.scatter(reduced_stim[:,best_sep[2]],reduced_stim[:,best_sep[1]],c=tastes)
plt.colorbar()

clf.score(reduced_stim, tastes)

fig = plt.figure()
ax = Axes3D(fig)
p = ax.scatter(reduced_stim[:,best_sep[0]],reduced_stim[:,best_sep[1]],reduced_stim[:,best_sep[2]],
               c =tastes,s=20)
fig.colorbar(p)
Пример #21
0
import pandas as pd
import numpy as np
df = pd.read_csv("/home/shaury/Downloads/nptel/Iris.csv", delimiter=",")
x, y = df[["SepalLengthCm", "SepalWidthCm", "PetalLengthCm",
           "PetalWidthCm"]], df["Species"]
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    random_state=1,
                                                    test_size=0.15)

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as lda
l1 = lda(n_components=1)
x_train = l1.fit_transform(x_train, y_train)
x_test = l1.fit_transform(x_test, y_test)

from sklearn.ensemble import RandomForestClassifier as RFC
cl = RFC(max_depth=2, random_state=0)
cl.fit(x_train, y_train)
y_pred = cl.predict(x_test)

from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
print('Accuracy' + str(accuracy_score(y_test, y_pred)))
Пример #22
0
tuning_param = [{
    'C': [0.01, 0.1, 1, 5, 10, 100],
    'gamma': [0.01, 0.1, 1, 5, 10, 100]
}]

svm_fit = GridSearchCV(SVC(kernel='rbf'), tuning_param, cv=10)
svm_fit.fit(data_x, data_y)

svm_fit.best_params_
#{'C': 0.01, 'gamma': 0.1}

#Fit the model using the parameters found
svm_best_fit = SVC(kernel='rbf', C=0.01, gamma=0.1)
svm_best_fit.fit(x_clas_train, y_clas_train)
np.mean(svm_best_fit.predict(x_clas_cv) - y_clas_cv)
#0.5833333333333334

#LDA
lda_fit = lda()
lda_fit.fit(x_clas_train, y_clas_train)

np.mean(lda_fit.predict(x_clas_cv) - y_clas_cv)
#0.20833333333333334

#QDA
qda_fit = qda()
qda_fit.fit(x_clas_train, y_clas_train)

np.mean(qda_fit.predict(x_clas_cv) - y_clas_cv)
#0.4583333333333333
        groups = gmm.predict(reduced_stim)
        all_groups.append(sum(groups))
        trial_order = np.argsort(groups)

        # Train LDA classifier on firing from both clusters

        repeats = 500
        stim_acc = []

        for i in range(repeats):
            test_stim = np.random.choice(np.arange(15), size=1,
                                         replace=False)[0]
            train_stim = np.arange(15)
            train_stim = np.delete(train_stim, test_stim)

            stim_lda = lda()
            stim_lda.fit(reduced_stim[train_stim, :], groups[train_stim])
            stim_acc.append(
                sum(
                    stim_lda.predict(reduced_stim[test_stim, :][np.newaxis, :])
                    == groups[test_stim]))
            #print('explained_var = %.3f, accuracy = %.3f' % (explained_var_stim,accuracy))

        class_acc.append(np.mean(stim_acc))
        # =============================================================================
        # =============================================================================

        # Pull out and cluster distance matrices
        this_dist = off_stim_dists[taste]
        clust_dist = this_dist[trial_order, :]
        clust_dist = clust_dist[:, trial_order]
Пример #24
0
# LDA - 2 - PCA/LDA (Can't handle 100k)
t = time.time()
A3 = LDA(n_components=1, method='twostage').fit(X, Y)[1]
Z1 = np.dot(X, A3[:, 0])
print('PCA +  LDA:              %.2f ms' % ((time.time() - t) * 1000))
# Z1 = X[:, 0]

# LDA - 3 - QR-LDA - big & fast, max output dim = k
t = time.time()
A4 = LDA(n_components=1, method='qrsvd').fit(X, Y)[1]
Z2 = np.dot(X, A4[:, 0])
print('QR LDA:                  %.2f ms' % ((time.time() - t) * 1000))

# LDA - SK - big & med, max output dim = k-1
t = time.time()
Z3 = lda(n_components=1).fit_transform(X, Y)
print('SciKit LDA:              %.2f ms' % ((time.time() - t) * 1000))

# LDA - SRDA - big & med, max output dim = k+1
t = time.time()
A5 = LDA(n_components=1, method='srda').fit(X, Y)
Z5 = np.dot(X, A5)
print('SRDA:                    %.2f ms' % ((time.time() - t) * 1000))

if np.min(Z[Y == 1]) < np.min(Z[Y == -1]):
    F0 = np.sum(Z[Y == 1] < np.min(Z[Y == -1])) + np.sum(
        Z[Y == -1] > np.max(Z[Y == 1]))
else:
    F0 = np.sum(Z[Y == 1] > np.max(Z[Y == -1])) + np.sum(
        Z[Y == -1] < np.min(Z[Y == 1]))
if np.min(Z0[Y == 1]) < np.min(Z0[Y == -1]):
Пример #25
0
import numpy as np
import math
from matplotlib import pyplot as plt
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as lda

names = [
    'Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width', 'Class Label'
]
df = pd.io.parsers.read_csv("iris.data")
df.columns = names
df.head()

features = df.drop('Class Label', axis=1)
classlabels = df['Class Label']

sklearn_lda = lda(n_components=2)
sklearn_lda_features = sklearn_lda.fit_transform(features, classlabels)


def plot_lda(two_lda_dimensions, label_matrix, title):

    # Make scatter plot, with labels and colors
    for label, marker, color in zip(
        ('Iris-setosa', 'Iris-versicolor', 'Iris-virginica'), ('^', 's', 'o'),
        ('orange', 'black', 'grey')):

        plt.scatter(x=two_lda_dimensions[:, 0][label_matrix == label],
                    y=two_lda_dimensions[:, 1][label_matrix == label],
                    marker=marker,
                    color=color,
                    label=label)
all_off_firing = data.all_normal_off_firing
all_off_firing_long = all_off_firing[0, :, :]
for nrn in range(1, all_off_firing.shape[0]):
    all_off_firing_long = np.concatenate(
        (all_off_firing_long, all_off_firing[int(nrn), :, :]), axis=1)

all_off_red_pca = pca(n_components=20).fit(all_off_firing_long)
all_off_red = all_off_red_pca.transform(all_off_firing_long)

plt.imshow(exposure.equalize_hist(all_off_red))
groups = np.sort(np.asarray([0, 1, 2, 3] * 15))
plt.figure()
plt.scatter(all_off_red[:, 0], all_off_red[:, 1], c=groups)
plt.colorbar()

taste_lda = lda().fit(all_off_red, groups)
print(np.mean(taste_lda.predict(all_off_red) == groups))

trial_dist = dist_mat(all_off_firing_long, all_off_firing_long)
plt.figure()
plt.imshow(exposure.equalize_hist(trial_dist))

##

n_components = 3
taste = 1
pre_inds = np.arange(0, 80)
post_inds = np.arange(80, 160)

this_off = data.normal_off_firing[taste]
this_off_pre = this_off[:, :, pre_inds]
Пример #27
0
lr_y_test_pred = lr.predict(X_test_pca)
print( "With PCA, Logistic Regression accurancy score for testing: ",metrics.accuracy_score(y_test, lr_y_test_pred) )

## SVM
from sklearn.svm import SVC
svm = SVC(kernel='linear', C=1.0, random_state=0)
svm.fit(X_train_pca, y_train)
svm_train_predict = svm.predict(X_train_pca)
svm_test_predict = svm.predict(X_test_pca)
print("With PCA, SVM Accuracy Score for Training: ", metrics.accuracy_score(y_train, svm_train_predict))
print("With PCA, SVM Accuracy Score for Testing: ", metrics.accuracy_score(y_test, svm_test_predict))
print(" ")

## Part 4 LDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as lda
lda = lda(n_components=2)
X_train_lda = lda.fit_transform(X_train_std, y_train)
X_test_lda = lda.transform(X_test_std)

## Logistic Regression
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(X_train_lda, y_train)
lr_y_train_pred = lr.predict(X_train_lda)
print( "Part 4\n", "With LDA, Logistic Regression accurancy score for training: ",metrics.accuracy_score(y_train, lr_y_train_pred) )
lr_y_test_pred = lr.predict(X_test_lda)
print( "With LDA, Logistic Regression accurancy score for testing: ",metrics.accuracy_score(y_test, lr_y_test_pred) )
## SVM
from sklearn.svm import SVC
svm = SVC(kernel='linear', C=1.0, random_state=0)
svm.fit(X_train_lda, y_train)
Пример #28
0
def performance_evaluation(args, output_array, folds, label_list,
                           best_parameter_pair):
    if args.method == 'SVM':
        temp_str = 'The best parameter for SVM is: cost = ' + str(
            best_parameter_pair['cost']) + ', gamma = ' + str(
                best_parameter_pair['gamma'])
        # print(temp_str.center(40, '+'))
        results = []
        true_labels = []
        predict_labels = []
        predict_probability = []
        for train, test in folds:
            x_train = output_array[train]
            x_test = output_array[test]
            y_train = label_list[train]
            y_test = label_list[test]
            classification = svm.SVC(C=2**best_parameter_pair['cost'],
                                     gamma=2**best_parameter_pair['gamma'],
                                     probability=True)
            classification.fit(x_train, y_train)
            y_test_predict = classification.predict(x_test)
            y_test_prob_predict = classification.predict_proba(x_test)[:, 1]
            result = evaluation(y_test, y_test_predict)
            results.append(result)
            true_labels.append(y_test)
            predict_labels.append(y_test_predict)
            predict_probability.append(y_test_prob_predict)
        plot_roc_curve(true_labels, predict_probability, args.result_dir)
        plot_pr_curve(true_labels, predict_probability, args.result_dir)
        final_result = np.array(results).mean(axis=0)
        result_print(final_result)

    elif args.method == 'LinearSVM':
        temp_str = 'The best parameter for Linear SVM is: cost = ' + str(
            best_parameter_pair['cost'])
        # print(temp_str.center(40, '+'))
        results = []
        true_labels = []
        predict_labels = []
        predict_probability = []
        for train, test in folds:
            x_train = output_array[train]
            x_test = output_array[test]
            y_train = label_list[train]
            y_test = label_list[test]
            classification = svm.SVC(C=2**best_parameter_pair['cost'],
                                     kernel="linear",
                                     probability=True)
            classification.fit(x_train, y_train)
            y_test_predict = classification.predict(x_test)
            y_test_prob_predict = classification.predict_proba(x_test)[:, 1]
            result = evaluation(y_test, y_test_predict)
            results.append(result)
            true_labels.append(y_test)
            predict_labels.append(y_test_predict)
            predict_probability.append(y_test_prob_predict)
        plot_roc_curve(true_labels, predict_probability, args.result_dir)
        plot_pr_curve(true_labels, predict_probability, args.result_dir)
        final_result = np.array(results).mean(axis=0)
        result_print(final_result)

    elif args.method == 'RF':
        temp_str = 'The best parameter for RF is: tree = ' + str(
            best_parameter_pair['tree'])
        # print(temp_str.center(40, '+'))
        results = []
        true_labels = []
        predict_labels = []
        predict_probability = []
        for train, test in folds:
            x_train = output_array[train]
            x_test = output_array[test]
            y_train = label_list[train]
            y_test = label_list[test]
            classification = RandomForestClassifier(
                random_state=42, n_estimators=best_parameter_pair['tree'])
            classification.fit(x_train, y_train)
            y_test_predict = classification.predict(x_test)
            y_test_prob_predict = classification.predict_proba(x_test)[:, 1]
            result = evaluation(y_test, y_test_predict)
            results.append(result)
            true_labels.append(y_test)
            predict_labels.append(y_test_predict)
            predict_probability.append(y_test_prob_predict)
        plot_roc_curve(true_labels, predict_probability, args.result_dir)
        plot_pr_curve(true_labels, predict_probability, args.result_dir)
        final_result = np.array(results).mean(axis=0)
        result_print(final_result)

    elif args.method == 'KNN':
        temp_str = 'The best parameter for KNN is: neighbors = ' + str(
            best_parameter_pair['ngb'])
        # print(temp_str.center(40, '+'))
        results = []
        true_labels = []
        predict_labels = []
        predict_probability = []
        for train, test in folds:
            x_train = output_array[train]
            x_test = output_array[test]
            y_train = label_list[train]
            y_test = label_list[test]
            classification = KNeighborsClassifier(
                n_neighbors=best_parameter_pair['ngb'])
            classification.fit(x_train, y_train)
            y_test_predict = classification.predict(x_test)
            y_test_prob_predict = classification.predict_proba(x_test)[:, 1]
            result = evaluation(y_test, y_test_predict)
            results.append(result)
            true_labels.append(y_test)
            predict_labels.append(y_test_predict)
            predict_probability.append(y_test_prob_predict)
        plot_roc_curve(true_labels, predict_probability, args.result_dir)
        plot_pr_curve(true_labels, predict_probability, args.result_dir)
        final_result = np.array(results).mean(axis=0)
        result_print(final_result)

    elif args.method == 'AdaBoost' or args.method == 'NB' or args.method == 'LDA' or args.method == 'QDA':
        results = []
        true_labels = []
        predict_labels = []
        predict_probability = []
        for train, test in folds:
            x_train = output_array[train]
            x_test = output_array[test]
            y_train = label_list[train]
            y_test = label_list[test]
            if args.method == 'AdaBoost':
                classification = AdaBoostClassifier()
            elif args.method == 'NB':
                classification = GaussianNB()
            elif args.method == 'LDA':
                classification = lda()
            elif args.method == 'QDA':
                classification = qda()
            classification.fit(x_train, y_train)
            y_test_predict = classification.predict(x_test)
            y_test_prob_predict = classification.predict_proba(x_test)[:, 1]
            result = evaluation(y_test, y_test_predict)
            results.append(result)
            true_labels.append(y_test)
            predict_labels.append(y_test_predict)
            predict_probability.append(y_test_prob_predict)
        plot_roc_curve(true_labels, predict_probability, args.result_dir)
        plot_pr_curve(true_labels, predict_probability, args.result_dir)
        final_result = np.array(results).mean(axis=0)
        result_print(final_result)

    all_predict = classification.predict(output_array)
    with open(args.result_dir + 'prediction result', 'w') as f:
        space = '          '
        f.write('No.' + space + 'True Label' + space + 'Predict Label\n')
        for i in range(len(all_predict)):
            f.write(
                str(i) + space + str(label_list[i]) + space +
                str(all_predict[i]))
            f.write('\n')
def main():
    st.title("Binary Classification Web App")
    st.sidebar.title("Binary Classification Web App")
    st.markdown("Are your mushrooms edible or poisonous? 🍄")
    st.sidebar.markdown("Are your mushrooms edible or poisonous? 🍄")

    @st.cache(persist=True)
    def load_data():
        data = pd.read_csv("mushrooms.csv")
        labelencoder = LabelEncoder()
        for col in data.columns:
            data[col] = labelencoder.fit_transform(data[col])
        return data

    @st.cache(persist=True)
    def split(df):
        y = df.iloc[:, 0]
        x = df.iloc[:, 1:]
        x_train, x_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            test_size=0.3,
                                                            random_state=0)
        return x_train, x_test, y_train, y_test

    def plot_metrics(metrics_list):
        if 'Confusion Matrix' in metrics_list:
            st.subheader("Confusion Matrix")
            plot_confusion_matrix(model,
                                  X_test,
                                  y_test,
                                  display_labels=class_names)
            st.pyplot()

        if 'ROC Curve' in metrics_list:
            st.subheader("ROC Curve")
            plot_roc_curve(model, X_test, y_test)
            st.pyplot()

        if 'Precision-Recall Curve' in metrics_list:
            st.subheader('Precision-Recall Curve')
            plot_precision_recall_curve(model, X_test, y_test)
            st.pyplot()

    # st.sidebar.subheader("Choose Dataset")
    # file_name = st.sidebar.selectbox("file_name",("mushroom.csv", "dataset_1.csv", "dataset_2.csv", "dataset_3.csv")

    # df = load_data(file_name)
    class_names = ['value_0', 'value_1']
    df = load_data()

    if st.sidebar.checkbox("Show raw data", False):
        st.subheader("Mushroom Data Set (Classification)")
        st.write(df)

    X_train, X_test, y_train, y_test = split(df)

    st.sidebar.subheader("Dimenstion Reduction Technique")
    method = st.sidebar.selectbox(
        "method",
        ("Principal Component Analysis (PCA)",
         "Linear discriminant Analysis (LDA)", "KernalPCA", "NO REDUCTION"))

    if (method == "Principal Component Analysis (PCA)"):
        no_of_components = st.sidebar.number_input("no. of input feature",
                                                   1,
                                                   5,
                                                   step=1,
                                                   key='n_components')
        red_tech = PCA(n_components=no_of_components)
        X_train = red_tech.fit_transform(X_train)
        X_test = red_tech.transform(X_test)

    if (method == "Linear discriminant Analysis (LDA)"):
        no_of_components = st.sidebar.number_input("no. of input feature",
                                                   1,
                                                   5,
                                                   step=1,
                                                   key='n_components')
        red_tech = lda(n_components=no_of_components)
        X_train = red_tech.fit_transform(X_train, y_train)
        X_test = red_tech.transform(X_test)

    if (method == "KernalPCA"):
        no_of_components = st.sidebar.number_input("no. of input feature",
                                                   1,
                                                   5,
                                                   step=1,
                                                   key='n_components')
        ker = st.sidebar.radio("kernel_selection", ("Linear", "RBF"),
                               key='kern')
        red_tech = KernelPCA(n_components=no_of_components, kernel=ker)
        X_train = red_tech.fit_transform(X_train)
        X_test = red_tech.transform(X_test)

    st.sidebar.subheader("Choose Classifier")
    classifier = st.sidebar.selectbox(
        "Classifier", ("Support Vector Machine (SVM)", "Logistic Regression",
                       "Random Forest", "Auto select acco. to Dataset"))

    if classifier == 'Support Vector Machine (SVM)':
        st.sidebar.subheader("Model Hyperparameters")
        #choose parameters
        parameter = st.sidebar.radio("parameter_selection",
                                     ("Mannual", "Auto"))
        if (parameter == "Mannual"):
            C = st.sidebar.number_input("C (Regularization parameter)",
                                        0.01,
                                        10.0,
                                        step=0.01,
                                        key='C_SVM')
            kernel = st.sidebar.radio("Kernel", ("rbf", "linear"),
                                      key='kernel')
            gamma = st.sidebar.radio("Gamma (Kernel Coefficient)",
                                     ("scale", "auto"),
                                     key='gamma')
            model = SVC(C=C, kernel=kernel, gamma=gamma)
            model.fit(X_train, y_train)

        if (parameter == "Auto"):
            model_a = SVC(kernel='rbf')
            model_a.fit(X_train, y_train)
            prameters = {
                'kernel': ('linear', 'rbf'),
                'C': [1, 10],
                'gamma': [1, 10]
            }
            model = GridSearchCV(model_a, prameters, n_jobs=-1)
            model.fit(X_train, y_train)

        metrics = st.sidebar.multiselect(
            "What metrics to plot?",
            ('Confusion Matrix', 'ROC Curve', 'Precision-Recall Curve'))

        if st.sidebar.button("Classify", key='classify'):
            st.subheader("Support Vector Machine (SVM) Results")
            accuracy = model.score(X_test, y_test)
            y_pred = model.predict(X_test)
            st.write("Accuracy: ", accuracy.round(2))
            st.write(
                "Precision: ",
                precision_score(y_test, y_pred, labels=class_names).round(2))
            st.write("Recall: ",
                     recall_score(y_test, y_pred, labels=class_names).round(2))
            plot_metrics(metrics)

    if classifier == 'Logistic Regression':
        st.sidebar.subheader("Model Hyperparameters")
        parameter = st.sidebar.radio("parameter_selection",
                                     ("Mannual", "Auto"))
        if (parameter == "Mannual"):
            C = st.sidebar.number_input("C (Regularization parameter)",
                                        0.01,
                                        10.0,
                                        step=0.01,
                                        key='C_LR')
            max_iter = st.sidebar.slider("Maximum number of iterations",
                                         100,
                                         500,
                                         key='max_iter')
            model = LogisticRegression(C=C, max_iter=max_iter)
            model.fit(X_train, y_train)

        if (parameter == "Auto"):
            model_a = LogisticRegression()
            model_a.fit(X_train, y_train)
            prameters = [{'C': [1, 10], 'max_iter': [100, 500]}]
            model = GridSearchCV(model_a, prameters, n_jobs=-1)
            model.fit(X_train, y_train)

        metrics = st.sidebar.multiselect(
            "What metrics to plot?",
            ('Confusion Matrix', 'ROC Curve', 'Precision-Recall Curve'))

        if st.sidebar.button("Classify", key='classify'):
            st.subheader("Logistic Regression Results")
            accuracy = model.score(X_test, y_test)
            y_pred = model.predict(X_test)
            st.write("Accuracy: ", accuracy.round(2))
            st.write(
                "Precision: ",
                precision_score(y_test, y_pred, labels=class_names).round(2))
            st.write("Recall: ",
                     recall_score(y_test, y_pred, labels=class_names).round(2))
            plot_metrics(metrics)

    if classifier == 'Random Forest':
        st.sidebar.subheader("Model Hyperparameters")
        parameter = st.sidebar.radio("parameter_selection",
                                     ("Mannual", "Auto"))
        if (parameter == "Mannual"):
            n_estimators = st.sidebar.number_input(
                "The number of trees in the forest",
                100,
                5000,
                step=10,
                key='n_estimators')
            max_depth = st.sidebar.number_input(
                "The maximum depth of the tree",
                1,
                20,
                step=1,
                key='max_depth')
            bootstrap = st.sidebar.radio(
                "Bootstrap samples when building trees", ('True', 'False'),
                key='bootstrap')
            model = RandomForestClassifier(n_estimators=n_estimators,
                                           max_depth=max_depth,
                                           bootstrap=bootstrap,
                                           n_jobs=-1)
            model.fit(X_train, y_train)

        if (parameter == "Auto"):
            model_a = RandomForestClassifier(n_estimators=100)
            model_a.fit(X_train, y_train)
            prameters = {
                'n_estimators': [100, 300, 10],
                'criterion': ['gini', 'entropy'],
                'max_depth': [1, 20],
                'bootstrap': ['True', 'False']
            }
            model = GridSearchCV(model_a, prameters, n_jobs=-1)
            model.fit(X_train, y_train)

        metrics = st.sidebar.multiselect(
            "What metrics to plot?",
            ('Confusion Matrix', 'ROC Curve', 'Precision-Recall Curve'))

        if st.sidebar.button("Classify", key='classify'):
            st.subheader("Random Forest Results")
            accuracy = model.score(X_test, y_test)
            y_pred = model.predict(X_test)
            st.write("Accuracy: ", accuracy.round(2))
            st.write(
                "Precision: ",
                precision_score(y_test, y_pred, labels=class_names).round(2))
            st.write("Recall: ",
                     recall_score(y_test, y_pred, labels=class_names).round(2))
            plot_metrics(metrics)
Пример #30
0
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as lda
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as qda
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_mldata

if __name__ == '__main__':
    from data.data_reader import get_training_data
    from data.data_combinator import get_full_combinations

    x_train, y_train, x_val, y_val = get_training_data(validation=True)
    x_train = get_full_combinations(x_train)
    x_val = get_full_combinations(x_val)

    LDA = lda()
    LDA.fit(x_train, y_train)
    LDA_prob = LDA.predict_proba(x_val)
    LDA_prob

    QDA = qda()
    QDA.fit(x_train, y_train)
    QDA_prob = QDA.predict_proba(x_val)
    QDA_prob

    GNB = GaussianNB()
    GNB.fit(x_train, y_train)
    GaussianNB_prob = GNB.predict_proba(x_val)
    GaussianNB_prob

    # alpha = 1.0
Пример #31
0
# print(y)
y = [np.where(all_id_in_y == a)[0].tolist()[0] for a in y]
y = np.array(y)

# print(y)

# print(y2)
# print(all_id_in_y)

# clf = lda(n_components=2)
# x_new = clf.fit_transform(X, y)

# # pca = PCA(n_components=2)
# # x_new = pca.fit_transform(X, y)

# plt.scatter(x_new[:, 0], x_new[:, 1], c=y)
# plt.show()

# 3d
clf = lda(n_components=3)
x_new = clf.fit_transform(X, y)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

ax.scatter(x_new[:, 0], x_new[:, 1], x_new[:, 2], c=y, marker='o')

ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')

plt.show()
    reduced_base = base_pca.transform(base_long)
    reduced_stim = stim_pca.transform(stim_long)
    
    repeats = 500
    
    base_acc = []
    stim_acc = []
    
    for i in range(repeats):
        # These subsets are not non-overlapping!!
        train_base = np.random.choice(np.arange(60),size=45,replace=False)
        test_base = np.random.choice(np.arange(60),size=15,replace=False)
        train_stim = np.random.choice(np.arange(60),size=45,replace=False)
        test_stim = np.random.choice(np.arange(60),size=15,replace=False)
            
        base_lda = lda()
        base_lda.fit(reduced_base[train_base,:], groups[train_base])
        base_acc.append(sum(base_lda.predict(reduced_base[test_base,:]) == groups[test_base]) / len(groups[test_base]))
        #print('explained_var = %.3f, accuracy = %.3f' % (explained_var_base,accuracy))
        
        stim_lda = lda()
        stim_lda.fit(reduced_stim[train_stim,:], groups[train_stim])
        stim_acc.append(sum(stim_lda.predict(reduced_stim[test_stim,:]) == groups[test_stim]) / len(groups[test_stim]))
        #print('explained_var = %.3f, accuracy = %.3f' % (explained_var_stim,accuracy))
    
    plt.figure()
    plt.title(os.path.basename(file_list[file]))
    plt.show(plt.hist(base_acc))
    plt.show(plt.hist(stim_acc))
# =============================================================================
# =============================================================================