Exemplo n.º 1
0
#   Infer trajectory                                                        ####
# run topslam
from sklearn.manifold import TSNE, LocallyLinearEmbedding, SpectralEmbedding, Isomap
from sklearn.decomposition import FastICA, PCA

n_components = p["n_components"]

methods = {
    't-SNE':
    TSNE(n_components=n_components),
    'PCA':
    PCA(n_components=n_components),
    'Spectral':
    SpectralEmbedding(n_components=n_components, n_neighbors=p["n_neighbors"]),
    'Isomap':
    Isomap(n_components=n_components, n_neighbors=p["n_neighbors"]),
    'ICA':
    FastICA(n_components=n_components)
}
method_names = sorted(methods.keys())
method_names_selected = [
    method_names[i] for i, selected in enumerate(p["dimreds"]) if selected
]
methods = {
    method_name: method
    for method_name, method in methods.items()
    if method_name in method_names_selected
}

# dimensionality reduction
X_init, dims = run_methods(expression, methods)
y

# We see here that there are 1,797 samples and 64 features.

# ### Unsupervised learning: Dimensionality reduction
#
# We'd like to visualize our points within the 64-dimensional parameter space
# - it's difficult to effectively visualize points in such a high-dimensional space.
# - Instead we'll reduce the dimensions to 2, using an unsupervised method.
#
# Here, we'll make use of a manifold learning algorithm called *Isomap* (see **In-Depth: Manifold Learning**), and transform the data to two dimensions:

# In[32]:

from sklearn.manifold import Isomap
iso = Isomap(n_components=2)
iso.fit(digits.data)
data_projected = iso.transform(digits.data)
data_projected.shape

# We see that the projected data is now two-dimensional.
# Let's plot this data to see if we can learn anything from its structure:

# In[112]:

plt.scatter(data_projected[:, 0],
            data_projected[:, 1],
            c=digits.target,
            edgecolor='none',
            alpha=0.5,
            cmap=plt.cm.get_cmap('nipy_spectral', 10))
    ax.text(0.05,
            0.05,
            str(digits.target[i]),
            transform=ax.transAxes,
            color='green')

X = digits.data
print(X.shape)  # представляем массив пиксело длиной 64 элемента
y = digits.target
print(y.shape)  # Итого получили 1797 выборок и 64 признака

# 1. Обучение без учителя: понижение размерности

# Преобразуем данные в двумерный вид
from sklearn.manifold import Isomap  # Алгоритм обучения на базе многообразий
iso = Isomap(n_components=2)  # Понижение количества измерений до 2
iso.fit(digits.data)
data_projected = iso.transform(digits.data)
print(data_projected.shape)
# Посторим график данных
plt.scatter(data_projected[:, 0],
            data_projected[:, 1],
            c=digits.target,
            edgecolors='none',
            alpha=0.5,
            cmap=plt.cm.get_cmap("Spectral", 10))
plt.colorbar(label='digit label', ticks=range(10))
plt.clim(-0.5, 9.5)

# 2. Классификация цифр
Exemplo n.º 4
0
def embedDistanceMatrix(dmatDf, method='kpca', n_components=2, **kwargs):
    """Two-dimensional embedding of sequence distances in dmatDf,
    returning Nx2 x,y-coords: tsne, isomap, pca, mds, kpca, sklearn-tsne"""
    if isinstance(dmatDf, pd.DataFrame):
        dmat = dmatDf.values
    else:
        dmat = dmatDf

    if method == 'isomap':
        isoObj = Isomap(n_neighbors=10, n_components=n_components)
        xy = isoObj.fit_transform(dmat)
    elif method == 'mds':
        mds = MDS(n_components=n_components,
                  max_iter=3000,
                  eps=1e-9,
                  random_state=15,
                  dissimilarity="precomputed",
                  n_jobs=1)
        xy = mds.fit(dmat).embedding_
        rot = PCA(n_components=n_components)
        xy = rot.fit_transform(xy)
    elif method == 'pca':
        pcaObj = PCA(n_components=None)
        xy = pcaObj.fit_transform(dmat)[:, :n_components]
    elif method == 'kpca':
        pcaObj = KernelPCA(n_components=dmat.shape[0],
                           kernel='precomputed',
                           eigen_solver='dense')
        try:
            gram = dist2kernel(dmat)
        except:
            print(
                'Could not convert dmat to kernel for KernelPCA; using 1 - dmat/dmat.max() instead'
            )
            gram = 1 - dmat / dmat.max()
        xy = pcaObj.fit_transform(gram)[:, :n_components]
    elif method == 'lle':
        lle = LocallyLinearEmbedding(n_neighbors=30,
                                     n_components=n_components,
                                     method='standard')
        xy = lle.fit_transform(dist)
    elif method == 'sklearn-tsne':
        tsneObj = TSNE(n_components=n_components,
                       metric='precomputed',
                       random_state=0,
                       perplexity=kwargs['perplexity'])
        xy = tsneObj.fit_transform(dmat)
    else:
        print(('Method unknown: %s' % method))
        return

    assert xy.shape[0] == dmatDf.shape[0]
    xyDf = pd.DataFrame(xy[:, :n_components],
                        index=dmatDf.index,
                        columns=np.arange(n_components))
    if method == 'kpca':
        """Not sure how negative eigenvalues should be handled here, but they are usually
        small so it shouldn't make a big difference"""
        xyDf.explained_variance_ = pcaObj.lambdas_[:n_components] / pcaObj.lambdas_[
            pcaObj.lambdas_ > 0].sum()
    return xyDf
    return data_n


def scatter_3d(X, y):
    fig = plt.figure(figsize=(6, 5))
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.hot)
    ax.view_init(10, -70)
    ax.set_xlabel("$x_1$", fontsize=18)
    ax.set_ylabel("$x_2$", fontsize=18)
    ax.set_zlabel("$x_3$", fontsize=18)
    plt.show()


if __name__ == '__main__':
    X, Y = make_s_curve(n_samples=500, noise=0.1, random_state=42)

    data_1 = my_Isomap(X, 2, 10)

    data_2 = Isomap(n_neighbors=10, n_components=2).fit_transform(X)

    plt.figure(figsize=(8, 4))
    plt.subplot(121)
    plt.title("my_Isomap")
    plt.scatter(data_1[:, 0], data_1[:, 1], c=Y)

    plt.subplot(122)
    plt.title("sklearn_Isomap")
    plt.scatter(data_2[:, 0], data_2[:, 1], c=Y)
    plt.savefig("Isomap1.png")
    plt.show()
Exemplo n.º 6
0
def makeRingManifold(spikes, ep, angle, bin_size=200):
    """
    spikes : dict of hd spikes
    ep : epoch to restrict
    angle : tsd of angular direction
    bin_size : in ms
    """
    neurons = np.sort(list(spikes.keys()))
    inputs = []
    angles = []
    sizes = []
    bins = np.arange(
        ep.as_units('ms').start.iloc[0],
        ep.as_units('ms').end.iloc[0] + bin_size, bin_size)
    spike_counts = pd.DataFrame(index=bins[0:-1] + np.diff(bins) / 2,
                                columns=neurons)

    for i in neurons:
        spks = spikes[i].as_units('ms').index.values
        spike_counts[i], _ = np.histogram(spks, bins)

    rates = np.sqrt(spike_counts / (bin_size))

    epi = nts.IntervalSet(ep.loc[0, 'start'], ep.loc[0, 'end'])
    angle2 = angle.restrict(epi)
    newangle = pd.Series(index=np.arange(len(bins) - 1))
    tmp = angle2.groupby(
        np.digitize(angle2.as_units('ms').index.values, bins) - 1).mean()
    tmp = tmp.loc[np.arange(len(bins) - 1)]
    newangle.loc[tmp.index] = tmp
    newangle.index = pd.Index(bins[0:-1] + np.diff(bins) / 2.)

    tmp = rates.rolling(window=200,
                        win_type='gaussian',
                        center=True,
                        min_periods=1,
                        axis=0).mean(std=2).values
    sizes.append(len(tmp))
    inputs.append(tmp)
    angles.append(newangle)

    inputs = np.vstack(inputs)

    imap = Isomap(n_neighbors=20, n_components=2,
                  n_jobs=-1).fit_transform(inputs)

    H = newangle.values / (2 * np.pi)
    HSV = np.vstack((H, np.ones_like(H), np.ones_like(H))).T
    RGB = hsv_to_rgb(HSV)

    fig, ax = subplots()
    ax = subplot(111)
    ax.set_aspect(aspect=1)
    ax.scatter(imap[:, 0],
               imap[:, 1],
               c=RGB,
               marker='o',
               alpha=0.5,
               zorder=2,
               linewidth=0,
               s=40)

    # hsv
    display_axes = fig.add_axes([0.2, 0.25, 0.05, 0.1], projection='polar')
    colormap = plt.get_cmap('hsv')
    norm = mpl.colors.Normalize(0.0, 2 * np.pi)
    xval = np.arange(0, 2 * pi, 0.01)
    yval = np.ones_like(xval)
    display_axes.scatter(xval,
                         yval,
                         c=xval,
                         s=20,
                         cmap=colormap,
                         norm=norm,
                         linewidths=0,
                         alpha=0.8)
    display_axes.set_yticks([])
    display_axes.set_xticks(np.arange(0, 2 * np.pi, np.pi / 2))
    display_axes.grid(False)

    show()

    return imap, bins[0:-1] + np.diff(bins) / 2
Exemplo n.º 7
0
def isomap(X=None,W=None,num_vecs=None,k=None):
  embedder = Isomap(n_neighbors=k, n_components=num_vecs)
  return embedder.fit_transform(X)
Exemplo n.º 8
0
    fa_projected_data = FactorAnalysis(
        n_components=PROJECTED_DIMENSIONS).fit_transform(neural_data)
    color_3D_projection(fa_projected_data, variable_data, 'FA; ' + Title)

    # ICA

    ICA_projected_data = FastICA(
        n_components=PROJECTED_DIMENSIONS).fit_transform(neural_data)
    color_3D_projection(ICA_projected_data, variable_data, 'ICA; ' + Title)

    # Isomap

    N_NEIGHBORS = 30
    Isomap_projected_data = Isomap(
        n_components=PROJECTED_DIMENSIONS,
        n_neighbors=N_NEIGHBORS).fit_transform(neural_data)
    color_3D_projection(Isomap_projected_data, variable_data,
                        'Isomap; ' + Title)

    # tSNE

    PERPLEXITY = 30  # normally ranges 5-50
    TSNE_projected_data = TSNE(
        n_components=PROJECTED_DIMENSIONS,
        perplexity=PERPLEXITY).fit_transform(neural_data)
    color_3D_projection(TSNE_projected_data, variable_data, 'tSNE; ' + Title)

    # Multidimensional scaling

    MDS_projected_data = MDS(
Exemplo n.º 9
0
plt.ylabel("MLLE2")

# KERNEL PRINCIPAL COMPONENT ANALYSIS (KPCA)
print("Performing Kernel Principal Component Analysis (KPCA) ...")
plt.subplot(333)
kpca = KernelPCA(n_components=2, kernel='cosine').fit_transform(X)
plt.scatter(kpca[:, 0], kpca[:, 1], c=Y, cmap='viridis', s=1)
plt.title('Kernel PCA')
#plt.colorbar()
plt.xlabel("KPCA1")
plt.ylabel("KPCA2")

# ISOMAP
print("Performing Isomap Plotting ...")
plt.subplot(334)
model = Isomap(n_components=2)
isomap = model.fit_transform(X)
plt.scatter(isomap[:, 0], isomap[:, 1], c=Y, cmap='viridis', s=1)
plt.title('Isomap')
#plt.colorbar()
plt.xlabel("ISO1")
plt.ylabel("ISO2")

# LAPLACIAN EIGENMAP
print("Performing Laplacian Eigenmap (Spectral Embedding) ...")
plt.subplot(335)
model = SpectralEmbedding(n_components=2, n_neighbors=50)
se = model.fit_transform(X)
plt.scatter(se[:, 0], se[:, 1], c=Y, cmap='viridis', s=1)
plt.title('Laplacian Eigenmap')
#plt.colorbar()
Exemplo n.º 10
0
def eval_other_methods(x, y):
    gmm = mixture.GaussianMixture(covariance_type='full',
                                  n_components=args.n_clusters,
                                  random_state=0)
    gmm.fit(x)
    y_pred_prob = gmm.predict_proba(x)
    y_pred = y_pred_prob.argmax(1)
    acc = np.round(cluster_acc(y, y_pred), 5)
    nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5)
    ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5)
    print(args.dataset + " | GMM clustering on raw data")
    print("======================")
    result = "{}\t{}\t{}".format(ari, nmi, acc)
    print(result)
    print("======================")

    y_pred = KMeans(n_clusters=args.n_clusters, random_state=0).fit_predict(x)
    acc = np.round(cluster_acc(y, y_pred), 5)
    nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5)
    ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5)
    print(args.dataset + " | K-Means clustering on raw data")
    print("======================")
    result = "{}\t{}\t{}".format(ari, nmi, acc)
    print(result)
    print("======================")

    sc = SpectralClustering(n_clusters=args.n_clusters,
                            random_state=0,
                            affinity='nearest_neighbors')
    y_pred = sc.fit_predict(x)
    acc = np.round(cluster_acc(y, y_pred), 5)
    nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5)
    ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5)
    print(args.dataset + " | Spectral Clustering on raw data")
    print("======================")
    result = "{}\t{}\t{}".format(ari, nmi, acc)
    print(result)
    print("======================")

    if args.manifold_learner == 'UMAP':
        md = float(args.umap_min_dist)
        hle = umap.UMAP(random_state=0,
                        metric=args.umap_metric,
                        n_components=args.umap_dim,
                        n_neighbors=args.umap_neighbors,
                        min_dist=md).fit_transform(x)
    elif args.manifold_learner == 'LLE':
        from sklearn.manifold import LocallyLinearEmbedding
        hle = LocallyLinearEmbedding(
            n_components=args.umap_dim,
            n_neighbors=args.umap_neighbors).fit_transform(x)
    elif args.manifold_learner == 'tSNE':
        method = 'exact'
        hle = TSNE(n_components=args.umap_dim,
                   n_jobs=16,
                   random_state=0,
                   verbose=0).fit_transform(x)
    elif args.manifold_learner == 'isomap':
        hle = Isomap(
            n_components=args.umap_dim,
            n_neighbors=5,
        ).fit_transform(x)

    gmm = mixture.GaussianMixture(covariance_type='full',
                                  n_components=args.n_clusters,
                                  random_state=0)
    gmm.fit(hle)
    y_pred_prob = gmm.predict_proba(hle)
    y_pred = y_pred_prob.argmax(1)
    acc = np.round(cluster_acc(y, y_pred), 5)
    nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5)
    ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5)
    print(args.dataset + " | GMM clustering on " + str(args.manifold_learner) +
          " embedding")
    print("======================")
    result = "{}\t{}\t{}".format(ari, nmi, acc)
    print(result)
    print("======================")

    plt.scatter(*zip(*hle[:, :2]), c=y, label=y)

    plt.savefig(args.save_dir + '/' + args.dataset + '-' +
                str(args.manifold_learner) + '.png')
    plt.clf()

    y_pred = KMeans(n_clusters=args.n_clusters,
                    random_state=0).fit_predict(hle)
    acc = np.round(cluster_acc(y, y_pred), 5)
    nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5)
    ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5)
    print(args.dataset + " | K-Means " + str(args.manifold_learner) +
          " embedding")
    print("======================")
    result = "{}\t{}\t{}".format(ari, nmi, acc)
    print(result)
    print("======================")

    sc = SpectralClustering(n_clusters=args.n_clusters,
                            random_state=0,
                            affinity='nearest_neighbors')
    y_pred = sc.fit_predict(hle)
    acc = np.round(cluster_acc(y, y_pred), 5)
    nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5)
    ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5)
    print(args.dataset + " | Spectral Clustering on " +
          str(args.manifold_learner) + " embedding")
    print("======================")
    result = "{}\t{}\t{}".format(ari, nmi, acc)
    print(result)
    print("======================")
Exemplo n.º 11
0
#Set seed
np.random.seed(42)

#-------------------------------FEATURE EXTRACTION---------------------------------------------------------

#Transform the images in the images folder in a 2D numpy array with on image per row and one pixel per column
data = aux.images_as_matrix()

#Extract 6 features using Principal Component Analysis
PCA_features = PCA(n_components=6).fit_transform(data)

#Extract 6 features using t-Distributed Stochastic Neighbor Embedding
TSNE_features = TSNE(n_components=6, method="exact").fit_transform(data)

#Extract 6 features using Isometric mapping with Isomap
ISOMAP_features = Isomap(n_components=6).fit_transform(data)

#Save the 18 extracted features into one feature matrix
matrix = np.concatenate((PCA_features, TSNE_features, ISOMAP_features), axis=1)
np.savez('featureextration.npz', matrix)

#-------------------------------FEATURE SELECTION---------------------------------------------------------


def scatter_plot(features):
    """ 
    Another method to check the correlation between features
    """
    plt.figure()
    scatter_matrix(features, alpha=0.5, figsize=(15, 10), diagonal='kde')
    plt.savefig("scatter_plot.png")
Exemplo n.º 12
0
def cluster_manifold_in_embedding(hl, y, n_clusters, save_dir, visualize):
    # find manifold on autoencoded embedding
    if args.manifold_learner == 'UMAP':
        md = float(args.umap_min_dist)
        hle = umap.UMAP(random_state=0,
                        metric=args.umap_metric,
                        n_components=args.umap_dim,
                        n_neighbors=args.umap_neighbors,
                        min_dist=md).fit_transform(hl)
    elif args.manifold_learner == 'LLE':
        hle = LocallyLinearEmbedding(
            n_components=args.umap_dim,
            n_neighbors=args.umap_neighbors).fit_transform(hl)
    elif args.manifold_learner == 'tSNE':
        hle = TSNE(n_components=args.umap_dim,
                   n_jobs=16,
                   random_state=0,
                   verbose=0).fit_transform(hl)
    elif args.manifold_learner == 'isomap':
        hle = Isomap(
            n_components=args.umap_dim,
            n_neighbors=5,
        ).fit_transform(hl)

    # clustering on new manifold of autoencoded embedding
    if args.cluster == 'GMM':
        gmm = mixture.GaussianMixture(covariance_type='full',
                                      n_components=n_clusters,
                                      random_state=0)
        gmm.fit(hle)
        y_pred_prob = gmm.predict_proba(hle)
        y_pred = y_pred_prob.argmax(1)
    elif args.cluster == 'KM':
        km = KMeans(init='k-means++',
                    n_clusters=n_clusters,
                    random_state=0,
                    n_init=20)
        y_pred = km.fit_predict(hle)
    elif args.cluster == 'SC':
        sc = SpectralClustering(n_clusters=n_clusters,
                                random_state=0,
                                affinity='nearest_neighbors')
        y_pred = sc.fit_predict(hle)

    y_pred = np.asarray(y_pred)
    y_pred = y_pred.reshape(len(y_pred), )
    y = np.asarray(y)
    y = y.reshape(len(y), )
    acc = np.round(cluster_acc(y, y_pred), 5)
    nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5)
    ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5)
    print(args.dataset + " | " + args.manifold_learner +
          " on autoencoded embedding with " + args.cluster + " - N2D")
    print("======================")
    result = "{}\t{}\t{}".format(ari, nmi, acc)
    print(result)
    print("======================")

    if visualize:
        plt.scatter(*zip(*hle[:, :2]), c=y, label=y)

        plt.savefig(save_dir + '/' + args.dataset + '-n2d.png')
        plt.clf()

    return y_pred, acc, nmi, ari
Exemplo n.º 13
0
    ax.yaxis.set_major_formatter(plt.NullFormatter())
    ax.set_xlabel('feature 1', color='gray')
    ax.set_ylabel('feature 2', color='gray')
    ax.set_title(title, color='gray')


# make data
X, y = make_swiss_roll(200, noise=0.5, random_state=42)
X = X[:, [0, 2]]

# visualize data
fig, ax = plt.subplots()
ax.scatter(X[:, 0], X[:, 1], color='gray', s=30)

# format the plot
format_plot(ax, 'Input Data')

model = Isomap(n_neighbors=8, n_components=1)
y_fit = model.fit_transform(X).ravel()

# visualize data
fig, ax = plt.subplots()
pts = ax.scatter(X[:, 0], X[:, 1], c=y_fit, cmap='viridis', s=30)
cb = fig.colorbar(pts, ax=ax)

# format the plot
format_plot(ax, 'Learned Latent Parameter')
cb.set_ticks([])
cb.set_label('Latent Variable', color='gray')

plt.show()
Exemplo n.º 14
0
	Spearman_svd = []
	Cluster = []
	Cluster_svd = []
	for _ in range(iters):
		Phi = random_phi(m,X.shape[0])
		Y,noise = get_observations(X,Phi,snr=snr,return_noise=True)
		pearson_dist,spearman_dist = compare_distances(X,Y,pvalues=True)
		cluster_similarity = compare_clusters(X,Y)
		Pearson.append(pearson_dist[0])
		Spearman.append(spearman_dist[0])
		Pearson_p.append(pearson_dist[1])
		Spearman_p.append(spearman_dist[1])
		Cluster.append(cluster_similarity)
		X_mds = MDS().fit_transform(X.T).T
		Y_mds = MDS().fit_transform(Y.T).T
		X_iso = Isomap().fit_transform(X.T).T
		Y_iso = Isomap().fit_transform(Y.T).T
		pearson_mds,spearman_mds = compare_distances(X_mds,Y_mds,pvalues=True)
		pearson_iso,spearman_iso = compare_distances(X_iso,Y_iso,pvalues=True)
		Pearson_MDS.append(pearson_mds[0])
		Pearson_Iso.append(pearson_iso[0])
		Pearson_MDS_p.append(pearson_mds[1])
		Pearson_Iso_p.append(pearson_mds[1])
		ua,sa,vta = np.linalg.svd(X+noise,full_matrices=False)
		Vt = np.diag(sa).dot(vta)
		pearson_svd,spearman_svd = compare_distances(Vt[:m],Y,pvalues=False)
		cluster_similarity_svd = compare_clusters(Vt[:m],Y)
		Pearson_svd.append(pearson_svd)
		Spearman_svd.append(spearman_svd)
		Cluster_svd.append(cluster_similarity_svd)
	print prefix,m,np.average(Pearson),np.average(Pearson_p),np.average(Spearman),np.average(Spearman_p),np.average(Pearson_MDS),np.average(Pearson_MDS_p),np.average(Pearson_Iso),np.average(Pearson_Iso_p),np.average(Cluster),np.average(Pearson_svd),np.average(Spearman_svd),np.average(Cluster_svd)
Exemplo n.º 15
0
import matplotlib.pyplot as plt
import numpy as np

from sklearn.datasets import fetch_olivetti_faces
from sklearn.manifold import Isomap

# Set random seed for reproducibility
np.random.seed(1000)

if __name__ == '__main__':
    # Create the dataset
    faces = fetch_olivetti_faces()

    # Train Isomap
    isomap = Isomap(n_neighbors=5, n_components=2)
    X_isomap = isomap.fit_transform(faces['data'])

    # Plot the result
    fig, ax = plt.subplots(figsize=(18, 10))

    for i in range(100):
        ax.scatter(X_isomap[i, 0], X_isomap[i, 1], marker='o', s=100)
        ax.annotate('%d' % faces['target'][i],
                    xy=(X_isomap[i, 0] + 0.5, X_isomap[i, 1] + 0.5))

    ax.set_xlabel(r'$x_0$')
    ax.set_ylabel(r'$x_1$')
    ax.grid()

    plt.show()
def isomap(x):
    embedding = Isomap(n_components=2)
    x_transformed = embedding.fit_transform(x)
    return embedding, x_transformed