Exemplo n.º 1
0
def test_spectral_embedding_unknown_affinity(seed=36):
    # Test that SpectralClustering fails with an unknown affinity type
    se = SpectralEmbedding(n_components=1,
                           affinity="<unknown>",
                           random_state=np.random.RandomState(seed))
    with pytest.raises(ValueError):
        se.fit(S)
def cluster_responses(response_mat, n_clusters, corr_cut=0.6):
    """
    Clusters the neuron responses using spectral clustering
    :param response_mat: The response matrix with all neuron responses
    :param n_clusters: The desired number of clusters
    :param corr_cut: The correlation cutoff to consider a given neuron to be part of a cluster
    :return:
        [0]: The cluster ids
        [1]: 3D embedding coordinates for plotting
    """
    # create trial average
    response_mat = trial_average(response_mat, 3)
    # compute pairwise correlations
    pw_corrs = np.corrcoef(response_mat.T)
    pw_corrs[np.isnan(pw_corrs)] = 0
    pw_corrs[pw_corrs < 0.2] = 0
    # perform spectral clustering
    spec_clust = SpectralClustering(n_clusters, affinity="precomputed")
    clust_ids = spec_clust.fit_predict(pw_corrs)
    spec_emb = SpectralEmbedding(3, affinity="precomputed")
    coords = spec_emb.fit_transform(pw_corrs)
    # use correlation to cluster centroids to determine final cluster membership
    regressors = np.zeros((response_mat.shape[0], n_clusters))
    for i in range(n_clusters):
        regressors[:, i] = np.mean(response_mat[:, clust_ids == i], 1)
    for i in range(response_mat.shape[1]):
        max_ix = -1
        max_corr = 0
        for j in range(n_clusters):
            c = np.corrcoef(response_mat[:, i], regressors[:, j])[0, 1]
            if c >= corr_cut and c > max_corr:
                max_ix = j
                max_corr = c
            clust_ids[i] = max_ix
    return clust_ids, coords
Exemplo n.º 3
0
def se(X_train_scaled, X_test_scaled, num_components):
    # Locally Linear Embedding
    embedding = SpectralEmbedding(n_components=num_components)
    X_train_se = embedding.fit_transform(X_train_scaled)
    X_test_se = embedding.fit_transform(X_test_scaled)

    return X_train_se, X_test_se
class LaplacianEigenmaps(AbstractReducer):
    def __init__(self, d: int = 2, random_state: int = 0, **kwargs):
        super().__init__(d, random_state)
        self._main = SpectralEmbedding(n_components=d,
                                       random_state=random_state,
                                       **kwargs)

    def fit_transform(self, x: np.ndarray, **kwargs) -> np.ndarray:
        return self._main.fit_transform(x)

    def fit(self, x: np.ndarray, **kwargs):
        return self._main.fit(x)

    def transform(self, x: np.ndarray, **kwargs) -> np.ndarray:
        raise NotImplementedError

    def set_random_state(self, random_state: int = 0):
        self.random_state = random_state
        self._main.random_state = random_state

    @property
    def is_deterministic(self) -> bool:
        return False

    @property
    def is_stateful(self) -> bool:
        return True

    @staticmethod
    def get_parameter_ranges() -> dict:
        return {'n_neighbors': (int, 1, 20)}
Exemplo n.º 5
0
def plot_md_scaling(mdist, nd=3):
    """
    Plots the dimensionality rescaling of the distance matrix

    :param mdist:
    :param nd:
    :return:
    """
    #mds = MDS(n_components=3, dissimilarity='precomputed')

    mds = SpectralEmbedding(n_components=nd,
                            affinity='precomputed',
                            n_neighbors=3)
    pdata = mds.fit_transform(mdist)

    fig = plt.figure(figsize=(10, 10))
    if nd == 2:
        ax = fig.add_subplot(111)
        ax.scatter(pdata[:, 0], pdata[:, 1], cmap=plt.get_cmap("Blues"))
    else:
        ax = fig.add_subplot(111, projection='3d')
        ax.scatter(pdata[:, 0],
                   pdata[:, 1],
                   pdata[:, 2],
                   depthshade=False,
                   cmap=plt.get_cmap("Blues"))

    plt.show()
Exemplo n.º 6
0
def spectral_embedding(file_name, dimension, num_neighbors, label):
    # aka Laplacian eigenmaps
    # finds a low dimensional representation of the data using a spectral decomposition of the graph Laplacian
    # graph = discrete approximation of the low dimensional manifold in the high dimensional space
    # minimization of the cost function based on the graph preserves local distances

    # 1. weighted graph construction
    # 2. graph Laplacian construction: unnormalized: L = D-A, normalized: L=D^{-1/2}(D-A)D^{-1/2}
    # 3. partial eigenvalue decomposition

    balls = np.loadtxt(file_name)
    matrix = balls[:, 0:dimension]
    new_matrix = convert_angles_to_cos_sin(matrix)
    s_embedding = SpectralEmbedding(n_components=2, affinity='nearest_neighbors', gamma=None, random_state=None,
                                    eigen_solver=None, n_neighbors=num_neighbors)
    transformed_matrix = s_embedding.fit_transform(new_matrix)
    ball_coords = np.zeros((balls.shape[0], dimension+3))
    for i in xrange(balls.shape[0]):
        ball_coords[i, 0:dimension] = balls[i, 0:dimension].tolist()
        ball_coords[i, dimension:dimension+2] = transformed_matrix[i]
        if label == 'cluster':
            ball_coords[i, dimension+2] = balls[i, dimension].tolist()
        elif label == 'eq':
            ball_coords[i, dimension+2] = (-0.0019872041*300*np.log(abs(balls[i, dimension+1]))).tolist()
        elif label == 'committor':
            ball_coords[i, dimension+2] = (balls[i, dimension+2]/abs(balls[i, dimension+1])).tolist()
        print ' '.join([str(x) for x in ball_coords[i, :]])
Exemplo n.º 7
0
def test_spectral_embedding_unknown_eigensolver(seed=36):
    # Test that SpectralClustering fails with an unknown eigensolver
    se = SpectralEmbedding(n_components=1, affinity="precomputed",
                           random_state=np.random.RandomState(seed),
                           eigen_solver="<unknown>")
    with pytest.raises(ValueError):
        se.fit(S)
Exemplo n.º 8
0
def test_spectral_embedding_amg_solver(seed=36):
    # Test spectral embedding with amg solver
    pytest.importorskip('pyamg')

    se_amg = SpectralEmbedding(n_components=2,
                               affinity="nearest_neighbors",
                               eigen_solver="amg",
                               n_neighbors=5,
                               random_state=np.random.RandomState(seed))
    se_arpack = SpectralEmbedding(n_components=2,
                                  affinity="nearest_neighbors",
                                  eigen_solver="arpack",
                                  n_neighbors=5,
                                  random_state=np.random.RandomState(seed))
    embed_amg = se_amg.fit_transform(S)
    embed_arpack = se_arpack.fit_transform(S)
    assert _check_with_col_sign_flipping(embed_amg, embed_arpack, 1e-5)

    # same with special case in which amg is not actually used
    # regression test for #10715
    # affinity between nodes
    row = [0, 0, 1, 2, 3, 3, 4]
    col = [1, 2, 2, 3, 4, 5, 5]
    val = [100, 100, 100, 1, 100, 100, 100]

    affinity = sparse.coo_matrix((val + val, (row + col, col + row)),
                                 shape=(6, 6)).toarray()
    se_amg.affinity = "precomputed"
    se_arpack.affinity = "precomputed"
    embed_amg = se_amg.fit_transform(affinity)
    embed_arpack = se_arpack.fit_transform(affinity)
    assert _check_with_col_sign_flipping(embed_amg, embed_arpack, 1e-5)
Exemplo n.º 9
0
def dimension_reduce():
    ''' This compares a few different methods of
    dimensionality reduction on the current dataset.
    '''
    pca = PCA(n_components=2)                             # initialize a dimensionality reducer
    pca.fit(digits.data)                                  # fit it to our data
    X_pca = pca.transform(digits.data)                    # apply our data to the transformation
    plt.subplot(1, 3, 1)
    plt.scatter(X_pca[:, 0], X_pca[:, 1], c=digits.target)# plot the manifold
    
    se = SpectralEmbedding()
    X_se = se.fit_transform(digits.data)
    plt.subplot(1, 3, 2)
    plt.scatter(X_se[:, 0], X_se[:, 1], c=digits.target)
    
    isomap = Isomap(n_components=2, n_neighbors=20)
    isomap.fit(digits.data)
    X_iso = isomap.transform(digits.data)
    plt.subplot(1, 3, 3)
    plt.scatter(X_iso[:, 0], X_iso[:, 1], c=digits.target)
    plt.show()

    plt.matshow(pca.mean_.reshape(8, 8))                  # plot the mean components
    plt.matshow(pca.components_[0].reshape(8, 8))         # plot the first principal component
    plt.matshow(pca.components_[1].reshape(8, 8))         # plot the second principal component
    plt.show()
Exemplo n.º 10
0
def test_spectral_embedding():
    # Test chaining KNeighborsTransformer and SpectralEmbedding
    n_neighbors = 5

    n_samples = 1000
    centers = np.array([
        [0.0, 5.0, 0.0, 0.0, 0.0],
        [0.0, 0.0, 4.0, 0.0, 0.0],
        [1.0, 0.0, 0.0, 5.0, 1.0],
    ])
    S, true_labels = make_blobs(n_samples=n_samples,
                                centers=centers,
                                cluster_std=1.,
                                random_state=42)

    # compare the chained version and the compact version
    est_chain = make_pipeline(
        KNeighborsTransformer(n_neighbors=n_neighbors, mode='connectivity'),
        SpectralEmbedding(n_neighbors=n_neighbors,
                          affinity='precomputed',
                          random_state=42))
    est_compact = SpectralEmbedding(n_neighbors=n_neighbors,
                                    affinity='nearest_neighbors',
                                    random_state=42)
    St_compact = est_compact.fit_transform(S)
    St_chain = est_chain.fit_transform(S)
    assert_array_almost_equal(St_chain, St_compact)
Exemplo n.º 11
0
 def __init__(self, n_nodes=None, 
              npcs=0.8, 
              embedding_dims=2,
              cov_estimator='corpcor', 
              cov_reg=None, 
              cov_indices=None,
              max_iter=10,
              sigma=0.01, 
              lam=1., 
              gamma=1.,
              n_neighbors=30,
              just_tree=False):
     self.n_nodes = n_nodes
     self.cov_reg = cov_reg
     self.cov_estimator = cov_estimator
     self.cov_indices = cov_indices
     self.max_iter = max_iter
     self.sigma = sigma
     self.lam = lam
     self.gamma = gamma
     self.npcs = npcs
     self.n_neighbors = n_neighbors
     self.embedding = SpectralEmbedding(n_components=embedding_dims, 
                                        affinity='precomputed')
     self.pca = PCA(n_components=self.npcs)
     self.just_tree = just_tree
Exemplo n.º 12
0
def draw_feature_vecs(X, model, n_samples):
    # create data from same and different classes
    c, data1 = create_1_data(n_samples, X)
    _, data0 = create_0_data(n_samples, X, category=c)

    # isolate last layer of model before dense layer
    layer_output = model.layers[-2].output
    activation_model = keras.models.Model(inputs=model.input,
                                          outputs=layer_output)

    features1 = activation_model.predict(data1)
    features0 = activation_model.predict(data0)
    features = np.concatenate((features1, features0), axis=0)

    # create diffusion map
    embedding = SpectralEmbedding(n_components=2)
    features_transformed = embedding.fit_transform(features)

    # plot classes
    fig = plt.figure(figsize=(8, 6))
    ax = fig.add_subplot(111)
    ax.scatter(features_transformed[:n_samples, 0],
               features_transformed[:n_samples, 1],
               c='r',
               s=10,
               label='same class (1)')
    ax.scatter(features_transformed[n_samples:, 0],
               features_transformed[n_samples:, 1],
               c='b',
               s=10,
               label='diff class (0)')
    plt.title('Feature vectors for inputs from same/different classes')
    ax.legend()
    plt.show()
Exemplo n.º 13
0
 def embed_spectral(train, test):
     traintest = np.concatenate((train, test))
     from sklearn.manifold import SpectralEmbedding
     se = SpectralEmbedding(n_components=2, eigen_solver="arpack")
     X2d = se.fit_transform(traintest)
     X2d = MinMaxScaler().fit_transform(X2d)
     return X2d[:train.shape[0]], X2d[train.shape[0]:]
Exemplo n.º 14
0
 def sub_window_creation(self, images, kernels):
     gb_all_sw = []
     label = []
     for i in range(0, 100, 11):
         for j in range(0, 50, 11):
             for k in range(len(images)):
                 image = images[k]
                 sw_image = image[i:i+50, j:j+50]
                 sw_image = cv2.resize(sw_image, dsize=(12, 12), interpolation=cv2.INTER_NEAREST)
                 # print('sw size', sw_image.shape)
                 gabored_image = Preprocessing.process(self, sw_image, kernels)
                 # print('gab size', gabored_image.shape)
                 # model = SpectralEmbedding(n_components=100, n_neighbors=10)
                 # reduced_sw = model.fit_transform(gabored_image.reshape(-1, 1))
                 # print('gab size', gabored_image.reshape(1, -1).shape)
                 # gb_all_sw.append(gabored_image)
                 gb_all_sw.append(gabored_image)
                 label.append(int(k/4))
                 # print('red size', reduced_sw.reshape(-1, 1).shape)
                 # plt.imshow(image[i:i+50, j:j+50], cmap='gray')
                 # plt.show()
                 # plt.imshow(gabored_image, cmap='gray')
                 # plt.show()
     print(len(gb_all_sw))
     print(len(gb_all_sw[0]))
     # LEM demension reduction
     model = SpectralEmbedding(n_components=100, n_neighbors=10)
     # reduced_sw = model.fit_transform(gb_all_sw)
     reduced_sw = model.fit_transform(gb_all_sw)
     print('final', len(reduced_sw))
     print('final', reduced_sw[0].shape)
     print(label)
Exemplo n.º 15
0
    def fit(self, X, y=None):
        """Fit the model from data in X.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training vector, where n_samples is the number of samples
            and n_features is the number of features.

        Y: Ignored.

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        logging.debug('Computing locally adjusted affinities.')
        d = dist.squareform(dist.pdist(X, metric=self.distance))

        if 0 <= self.n_components <= 1:
            n_components = max(int(self.n_components * X.shape[1]), 1)
        else:
            n_components = self.n_components

        logging.debug('Computing embedding of affinities.')
        embedder = SpectralEmbedding(n_components=n_components,
                                     affinity='precomputed_nearest_neighbors',
                                     gamma=None,
                                     random_state=self.random_state,
                                     eigen_solver=self.eigen_solver,
                                     n_neighbors=self.n_neighbors,
                                     n_jobs=self.n_jobs)
        self.embedding_ = embedder.fit_transform(d)
        return self
Exemplo n.º 16
0
def timeline_overlapped_exp(clpeaks, ncl, timepeaks, nfiles, gap=300, step=50 * 300, length=300 * 300, laplace=0.0):
    ldiffs = []
    for exp, _, nfile in nfiles:
        lmtrans = compute_timeline_pmatrices(exp, clpeaks, ncl, timepeaks, gap=gap, step=step, length=length,
                                             laplace=laplace)
        print len(lmtrans)
        ldiffs.append(lmtrans)

    # for i in range(len(ldiffs)-1):
    #     for j in range(i+1, len(ldiffs)):
    md = []
    y = []
    for l in ldiffs:
        md.extend(l)
    i = 0
    for l in ldiffs:
        y.extend([nfiles[i][1]] * len(l))
        i += 1
    #print len(ldiffs[i]), len(ldiffs[j]), len(y), len(md)
    mdist = compute_trans_dist(md)
    fig = plt.figure()
    #mds = MDS(n_components=3, dissimilarity='precomputed')
    #mds = TSNE(n_components=3, perplexity=50.0, early_exaggeration=2.0, learning_rate=50.0, n_iter=2000, metric='precomputed')
    mds = SpectralEmbedding(n_components=3, affinity='precomputed', n_neighbors=5)
    X_new = mds.fit_transform(mdist)
    #print 'STRESS = ', mds.stress_
    ax = fig.gca(projection='3d')
    plt.scatter(X_new[:, 1], X_new[:, 2], zs=X_new[:, 0], c=y, s=25)
    #plt.scatter(X_new[:, 0], X_new[:, 1], c=y)
    plt.show()
Exemplo n.º 17
0
def make_spectral_plot():
    file_out = "spectral"
    methods = {
        "Spectral NN": SpectralEmbedding(affinity="nearest_neighbors"),
        "Spectral RBF": SpectralEmbedding(affinity="rbf"),
    }
    make_plot_labeled(methods, file_out=file_out)
Exemplo n.º 18
0
 def _spectral_dbscan(fcd, n_dim=2, eps=0.3, min_samples=50):
     fcd = fcd - fcd.min()
     se = SpectralEmbedding(n_dim, affinity="precomputed")
     xi = se.fit_transform(fcd)
     pd = pdist(xi)
     eps = np.percentile(pd, int(100 * eps))
     db = DBSCAN(eps=eps, min_samples=min_samples).fit(xi)
     return xi.T, db.labels_
Exemplo n.º 19
0
def LapEigenmap(affinity_matrix, dim, random_state):
    if random_state is None:
        random_state = np.random.RandomState()
    component_embedding = SpectralEmbedding(
        n_components=dim, affinity="precomputed",
        random_state=random_state).fit_transform(affinity_matrix)
    component_embedding /= component_embedding.max()
    return component_embedding
Exemplo n.º 20
0
def spectralembed(input, finaldim):
    from sklearn.manifold import SpectralEmbedding
    # - 'nearest_neighbors' : construct affinity matrix by knn graph
    # - 'rbf' : construct affinity matrix by rbf kernel
    # - 'precomputed' : interpret X as precomputed affinity matrix
    embedding = SpectralEmbedding(n_components=finaldim, affinity='rbf')
    X_transformed = embedding.fit_transform(input.todense().transpose())
    return X_transformed, input.todense() * X_transformed
Exemplo n.º 21
0
def component_layout(data,
                     n_components,
                     component_labels,
                     dim,
                     metric="euclidean",
                     metric_kwds={}):
    """Provide a layout relating the separate connected components. This is done
    by taking the centroid of each component and then performing a spectral embedding
    of the centroids.

    Parameters
    ----------
    data: array of shape (n_samples, n_features)
        The source data -- required so we can generate centroids for each
        connected component of the graph.

    n_components: int
        The number of distinct components to be layed out.

    component_labels: array of shape (n_samples)
        For each vertex in the graph the label of the component to
        which the vertex belongs.

    dim: int
        The chosen embedding dimension.

    metric: string or callable (optional, default 'euclidean')
        The metric used to measure distances among the source data points.

    metric_kwds: dict (optional, default {})
        Keyword arguments to be passed to the metric function.

    Returns
    -------
    component_embedding: array of shape (n_components, dim)
        The ``dim``-dimensional embedding of the ``n_components``-many
        connected components.
    """

    component_centroids = np.empty((n_components, data.shape[1]),
                                   dtype=np.float64)

    for label in range(n_components):
        component_centroids[label] = data[component_labels == label].mean(
            axis=0)

    distance_matrix = pairwise_distances(component_centroids,
                                         metric=metric,
                                         **metric_kwds)
    affinity_matrix = np.exp(-distance_matrix**2)

    component_embedding = SpectralEmbedding(
        n_components=dim,
        affinity="precomputed").fit_transform(affinity_matrix)
    component_embedding /= component_embedding.max()

    return component_embedding
Exemplo n.º 22
0
def plotSpectralEmbedding(X, Y, out_p, is_regr=False):
    X, Y = deepcopy(X), deepcopy(Y)
    pca = PCA(n_components=10)
    X = pca.fit_transform(X)
    sm = SpectralEmbedding(n_components=3, eigen_solver="arpack", n_neighbors=10, n_jobs=-1)
    X = sm.fit_transform(X)
    title = "Spectral Embedding"
    out_p += "spectral_embedding.png"
    plotClusterPairGrid(X, Y, out_p, 3, 1, title, is_regr)
Exemplo n.º 23
0
def test_error_pyamg_not_available():
    se_precomp = SpectralEmbedding(
        n_components=2,
        affinity="rbf",
        eigen_solver="amg",
    )
    err_msg = "The eigen_solver was set to 'amg', but pyamg is not available."
    with pytest.raises(ValueError, match=err_msg):
        se_precomp.fit_transform(S)
 def runSpectralEmbedding_KMeans(self):
     """
     Run sklearn-SpectralEmbedding to reduce the dimensionality of the data
     Cluster embedding with K-Means
     """
     spE = SpectralEmbedding(n_components=2)
     self.dspE = spE.fit_transform(self.dataset)
     self.kmeansSpE = KMeans(n_clusters=self.n_clusters,
                             random_state=0).fit_predict(self.dspE)
     return self.dspE, self.kmeansSpE
Exemplo n.º 25
0
def my_se(X, y=None, l1=.1, n_components=2, **kwargs):
    rrfs = RRFS(X.shape[1], hidden=n_components)
    model = SpectralEmbedding(n_components=n_components)
    codes = model.fit_transform(X)
    codes = (codes - np.min(codes)) / (np.max(codes) - np.min(codes))
    #rrfs.train_representation_network(x_train, name=dataset+'_rep.hd5', epochs=1000)
    score = rrfs.train_fs_network(X, rep=codes, l1=l1, epochs=300, loss='mse')
    # sort the feature scores in an ascending order according to the feature scores
    idx = np.argsort(score)[::-1]
    return idx
def vis_se(feature):
    ''' visualize extracted features using Spectral Embedding '''
    if len(feature.shape) == 5:
        feature = feature.reshape(feature.shape[:-4] +
                                  (-1, feature.shape[3], feature.shape[4]))
        feature = feature.reshape((feature.shape[0], -1))
    elif len(feature.shape) == 4:
        feature = feature.reshape((feature.shape[0], -1))
    se = SpectralEmbedding(n_neighbors=7, n_components=2)
    projected = se.fit_transform(feature)
    return projected
Exemplo n.º 27
0
def get_spectral_embedding_prod(gram_matrix, n_dims=3, normalize=True, random_state=SEED):
    # The gram matrix is already an affinity;
    # but it has the undesirable quality of making high energy chords more similar than low energy chords
    # we normalise accordingly
    # Alternatively: RBF. See next fn
    inv_root_energy = 1.0 / np.maximum(np.sqrt(np.diagonal(gram_matrix)), 1)
    affinity = gram_matrix * np.outer(inv_root_energy, inv_root_energy)
    transformer = SpectralEmbedding(n_components=n_dims, affinity="precomputed", random_state=random_state)
    transformed = transformer.fit_transform(affinity)
    if normalize:
        transformed = normalize_var(transformed)
    return transformed
Exemplo n.º 28
0
def affinity_matrix1(eigen_solver,n):
    n = int(n) # n value for nearest neighbor
    
    # 3D embedding transform created using default Euclidean metric
    embedding = SpectralEmbedding(n_components=3, affinity='nearest_neighbors', n_jobs=n_cpus, eigen_solver=eigen_solver, n_neighbors=n)
    data_transformed = embedding.fit_transform(data_df) # Transform data using embedding
    
    data_affinity = scipy.sparse.csr_matrix.toarray(embedding.affinity_matrix_) # Affinity matrix as a numpy array
    
    plot = hv.Image(data_affinity).opts(width=500, height=400, colorbar=True, cmap='Greys', title='Euclidean Affinity k='+str(n)) # Plot aray as image
    
    return plot
Exemplo n.º 29
0
def get_spectral_embedding(adj, d):
    """
    Given adj is N*N, return its feature mat N*D, D is fixed in model
    :param adj:
    :return:
    """

    adj_ = adj.data.cpu().numpy()
    emb = SpectralEmbedding(n_components=d)
    res = emb.fit_transform(adj_)
    x = torch.from_numpy(res).float().cuda()
    return x
Exemplo n.º 30
0
def md_scaling(mdist, nd=3):
    """
    performs dimensionality reduction of the distance matrix
    :param mdist:
    :param nd:
    :return:
    """
    nneig = int(np.sqrt(mdist.shape[0]))
    mds = SpectralEmbedding(n_components=nd,
                            affinity='precomputed',
                            n_neighbors=nneig)
    return mds.fit_transform(mdist)
def handle_cannot_link_constraints(X,
                                   d_transform,
                                   cannot_link_constraints,
                                   n,
                                   norm_p,
                                   spectral_embedding_components=None,
                                   sc_sigma=1):
    alpha = d_transform.max()
    d_p = np.power(d_transform, norm_p)

    if spectral_embedding_components is None:
        spectral_embedding_components = len(X[0])

    sc_embedding = SpectralEmbedding(
        n_components=spectral_embedding_components, affinity="precomputed")
    affinity_mat = np.exp(-np.power(squareform(d_transform), 2) /
                          (2 * (sc_sigma**2)))
    embedding = sc_embedding.fit_transform(affinity_mat)

    for c in cannot_link_constraints:
        i, j = c
        # make sure that alwas i > j
        if i < j:
            i, j = j, i

        e_i = embedding[i]
        e_j = embedding[j]

        for x in range(1, n):
            for y in range(x):

                if x == i and y == j:
                    val = 2
                else:
                    e_x = embedding[x]
                    e_y = embedding[y]

                    # use l2 norm distance here.
                    d_ex_ej = norm(e_x - e_j)
                    d_ex_ei = norm(e_x - e_i)
                    v_x = (d_ex_ej - d_ex_ei) / (d_ex_ej + d_ex_ei)

                    d_ey_ej = norm(e_y - e_j)
                    d_ey_ei = norm(e_y - e_i)
                    v_y = (d_ey_ej - d_ey_ei) / (d_ey_ej + d_ey_ei)

                    val = np.abs(v_x - v_y)

                d_p[square_to_condensed(x, y,
                                        n)] += np.power(val * alpha, norm_p)

    return np.power(d_p, 1 / float(norm_p))
Exemplo n.º 32
0
def get_spectral_embedding_dist(dist_matrix, n_dims=3, gamma=0.0625, normalize=True, random_state=SEED):
    # see previous fn
    # this needs to be 64 bit for stability
    dist_matrix = dist_matrix.astype("float64")
    affinity = np.exp(-gamma * dist_matrix * dist_matrix)
    transformer = SpectralEmbedding(n_components=n_dims, affinity="precomputed", random_state=random_state)
    transformed = transformer.fit_transform(affinity)
    # natural scale is dicey on this one. rescale to uni-ish variance
    var = np.var(transformed, 0)
    mean = np.mean(transformed, 0)
    if normalize:
        transformed = normalize_var(transformed)
    return transformed
Exemplo n.º 33
0
def se(features):
    """
	Adds the two SE components to the features table.
	"""

    X = features.values
    se = SpectralEmbedding(n_components=2)
    Y = se.fit_transform(X)

    features['se1'] = Y[:, 0]
    features['se2'] = Y[:, 1]

    return features
Exemplo n.º 34
0
def get_spectral_emb(adj, max_size):
    """
    Given adj is N*N, return its feature mat N*D, D is fixed in model
    :param adj: adjacent matrix
    :param max_size: the amount of dimension to be embedded
    :return: spectral embedding of every node in this graph
    """

    adj_ = adj.data.cpu().numpy()
    emb = SpectralEmbedding(n_components=max_size)
    res = emb.fit_transform(adj_)
    x = torch.from_numpy(res).float().cuda()
    return x
Exemplo n.º 35
0
def init_hybrid(exp_mat, epi_mat, thresh, nclust):
    num_components = epi_mat.shape[1] - 1
    corr_dist = 1.0 - squareform(pdist(exp_mat.transpose(), metric="cosine"))
    spec = SpectralEmbedding(n_components=num_components,
                             affinity="precomputed")
    spec_coord = spec.fit_transform(corr_dist)
    epi_bin = (epi_mat > 0.0).astype(float)
    full_coord = np.concatenate((spec_coord, epi_bin), 1)
    kmeans = KMeans(n_clusters=nclust).fit(full_coord)
    tf_matrix = (kmeans.cluster_centers_[:, num_components:] >
                 thresh).astype(float).transpose()
    gene_matrix = binarize_vector(kmeans.labels_, nclust)
    return tf_matrix, gene_matrix
Exemplo n.º 36
0
def swiss_roll_test():


    n_points = 1000
    X, color = datasets.samples_generator.make_s_curve(n_points,
                                                       random_state=0)
    n_neighbors=20
    n_components=2

    # original lE algorithm


    t0 = time()
    ml_model = SpectralEmbedding(n_neighbors=n_neighbors,
                                 n_components=n_components)
    Y = ml_model.fit_transform(X)
    t1 = time()

    # 2d projection
    fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(5,10))
    ax[0].scatter(Y[:,0], Y[:,1], c=color, label='scikit')
    ax[0].set_title('Sklearn-LE: {t:.2g}'.format(t=t1-t0))


    # Jakes LPP Algorithm

    t0 = time()
    ml_model = LocalityPreservingProjection(n_components=n_components)
    ml_model.fit(X)
    Y = ml_model.transform(X)
    t1 = time()

    ax[1].scatter(Y[:,0], Y[:,1], c=color, label='Jakes Algorithm')
    ax[1].set_title('Jakes LPP: {t:.2g}'.format(t=t1-t0))

    # my SSSE algorith,

    t0 = time()
    ml_model = LocalityPreservingProjections(weight='angle',
                                             n_components=n_components,
                                             n_neighbors=n_neighbors,
                                             sparse=True,
                                             eig_solver='dense')
    ml_model.fit(X)
    Y = ml_model.transform(X)
    t1 = time()

    ax[2].scatter(Y[:,0], Y[:,1], c=color, label='My LPP Algorithm')
    ax[2].set_title('My LPP: {t:.2g}'.format(t=t1-t0))

    plt.show()
Exemplo n.º 37
0
def demo(k):
    X, t = make_swiss_roll(noise=1)

    le = SpectralEmbedding(n_components=2, n_neighbors=k)
    le_X = le.fit_transform(X)

    ler = LER(n_components=2, n_neighbors=k, affinity='rbf')
    ler_X = ler.fit_transform(X, t)

    _, axes = plt.subplots(nrows=1, ncols=3, figsize=plt.figaspect(0.33))
    axes[0].set_axis_off()
    axes[0] = plt.subplot(131, projection='3d')
    axes[0].scatter(*X.T, c=t, s=50)
    axes[0].set_title('Swiss Roll')
    axes[1].scatter(*le_X.T, c=t, s=50)
    axes[1].set_title('LE Embedding')
    axes[2].scatter(*ler_X.T, c=t, s=50)
    axes[2].set_title('LER Embedding')
    plt.show()
Exemplo n.º 38
0
def swiss_roll_test():

    import matplotlib.pyplot as plt
    plt.style.use('ggplot')

    from time import time

    from sklearn import manifold, datasets
    from sklearn.manifold import SpectralEmbedding

    n_points = 1000
    X, color = datasets.samples_generator.make_s_curve(n_points,
                                                       random_state=0)
    n_neighbors=10
    n_components=2

    # original scikit-learn lE algorithm
    t0 = time()
    ml_model = SpectralEmbedding(affinity='nearest_neighbors',
                                 n_neighbors=n_neighbors,
                                 n_components=n_components)
    Y = ml_model.fit_transform(X)
    t1 = time()

    # 2d projection
    fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(5,10))
    ax[0].scatter(Y[:,0], Y[:,1], c=color, label='scikit')
    ax[0].set_title('Sklearn LE: {t:.2g}'.format(t=t1-t0))

    # my Laplacian Eigenmaps algorithm

    t0 = time()
    ml_model = LaplacianEigenmaps(n_components=n_components,
                                  n_neighbors=n_neighbors)
    ml_model.fit(X)
    Y = ml_model.fit_transform(X)
    t1 = time()

    ax[1].scatter(Y[:,0], Y[:,1], c=color, label='My LE Algorithm')
    ax[1].set_title('My LE: {t:.2g}'.format(t=t1-t0))

    plt.show()
def plot2d(X, y, scale=True, normalize=False, embedding='pca', title=''): 
	"""
	Plot data transformed into two dimensions by PCA. 
	PCA transforms into a new embedding dimension such that 
	the first dimension contains the maximal variance and following 
	dimensions maximal remaining variance. 
	This shoudl spread the observed n-dimensional data maximal. This 
	is unsupervised and will not consider target values. 
	"""
	if (scale): 
		scaler = StandardScaler()
		X = scaler.fit_transform(X)

	if (normalize): 
		normalizer = Normalizer(norm='l2')
		X = normalizer.fit_transform(X)
		
	if (embedding is 'pca'): 
		pca = PCA(n_components=2)
		X_transformed = pca.fit_transform(X)
	elif (embedding is 'isomap'):
		isomap = Isomap(n_components=2, n_neighbors=20)
		X_transformed = isomap.fit_transform(X)
	elif (embedding is 'lle' ): 
		lle = LocallyLinearEmbedding(n_components=2, n_neighbors=5)
		X_transformed = lle.fit_transform(X)
	elif (embedding is 'tsne'): 
		t_sne = TSNE(n_components=2)
		X_transformed = t_sne.fit_transform(X)
	elif (embedding is 'spectral'): 
		se = SpectralEmbedding(n_components=2)
		X_transformed = se.fit_transform(X)
	elif (embedding is 'mds'):
		mds = MDS(n_components=2)
		X_transformed = mds.fit_transform(X)
	elif (embedding is 'gallery'): 
		plt.figure(1)
		
		plt.subplot(231)
		plt.title('pca')
		X_t = PCA(n_components=2).fit_transform(X)
		plt.scatter(X_t[:,0 ], X_t[:, 1], c=y)

		plt.subplot(232)
		plt.title('isomap')
		X_t = Isomap(n_neighbors=20).fit_transform(X)
		plt.scatter(X_t[:,0 ], X_t[:, 1], c=y)

		plt.subplot(233)
		plt.title('lle')
		X_t = LocallyLinearEmbedding(n_neighbors=20).fit_transform(X)
		plt.scatter(X_t[:,0 ], X_t[:, 1], c=y)

		plt.subplot(234)
		plt.title('tsne')
		X_t = TSNE().fit_transform(X)
		plt.scatter(X_t[:,0 ], X_t[:, 1], c=y)

		plt.subplot(235)
		plt.title('spectral')
		X_t = SpectralEmbedding().fit_transform(X)
		plt.scatter(X_t[:,0 ], X_t[:, 1], c=y)

		plt.subplot(236)
		plt.title('mds')
		X_t = MDS().fit_transform(X)
		plt.scatter(X_t[:,0 ], X_t[:, 1], c=y)

		plt.suptitle('Gallery transforms ' + title)

		return plt
	else:
		raise ValueError("Choose between pca, isomap and tsne")

	plt.title(title + ' ' + embedding + ' plot')
	sc = plt.scatter(X_transformed[:, 0], X_transformed[:, 1], c=y)
	plt.colorbar(sc)
	return plt
###############################################################################

# <codecell>

###############################################################################
# 2 circles
img = circle1 + circle2
mask = img.astype(bool)
img = img.astype(float)

img += 1 + 0.1 * np.random.randn(*img.shape)

graph = image.img_to_graph(img, mask=mask)
graph.data = np.exp(-graph.data / graph.data.std())

se = SpectralEmbedding(n_components=5,affinity='precomputed')
Y = se.fit_transform(graph)

for j in range(0,se.n_components):
    pl.subplot(1,se.n_components, j+1)
    label_im = -np.ones(mask.shape)
    label_im[mask] = Y[:,j]
    pl.title('Eigen Vec. %i' % (j+1), size=18)
    pl.imshow(label_im,cmap=pl.cm.Spectral)
pl.show()

pl.figure()
pl.scatter(Y[:, 1], Y[:, 2], c=Y[:,0], cmap=pl.cm.Spectral)

# <codecell>
Exemplo n.º 41
0
def apply_spectral_embedding(proj_data, proj_weights=None):
    model = SpectralEmbedding(n_components=2, random_state=RANDOM_SEED)
    norm_data = normalize_columns(proj_data);
    result = model.fit_transform(norm_data.T);
    return result;
import numpy as np
from sklearn.utils.arpack import eigsh

app = service.prodbox.CinemaService()

X = app.getWeightedSearchFeatures(15)

graph = kneighbors_graph(X, 10)
lap = graph_laplacian(graph, True)

from sklearn.decomposition import TruncatedSVD
svd = TruncatedSVD(n_components = 30, algorithm="arpack")
lap = spectral_embedding_._set_diag(lap, 1)
svd.fit(-lap)

eigenvalues = np.diag(svd.components_ * (-lap).todense() * svd.components_.T)

eigenvalues2, _ = eigsh(-lap, k=30, which='LM', sigma=1)
print(eigenvalues)

print(eigenvalues2)

se = SpectralEmbedding(n_components = 30, eigen_solver='arpack', affinity="nearest_neighbors")
se.fit(X)

app.quit()

# TODO : check budget distribution, draw budget conditionnaly
out = connected_components(graph)

Exemplo n.º 43
0
import sklearn.metrics
from sklearn.decomposition import PCA



(X, Y, frac0, frac1) = pickle.load( open( "mydataset.p", "rb" ) )
σ = 0.4
γ = 1.0/(2*σ*σ)

#	PCA
pca = PCA(n_components=1, svd_solver='full')
pca_out = pca.fit_transform(X)                 


#	KPCA
embedding = SpectralEmbedding(n_components=1)
K = sklearn.metrics.pairwise.rbf_kernel(X, gamma=γ)
H = np.eye(800) - (1.0/800)*np.ones((800,800))
K = H.dot(K).dot(H)
Xout = embedding.fit_transform(K)



plt.figure(figsize=(10,3))
plt.subplot(131)
plt.plot(X[Y == 0][:,0], X[Y == 0][:,1], 'go')
plt.plot(X[Y == 1][:,0], X[Y == 1][:,1], 'bx')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Data in original space')
Exemplo n.º 44
0
    
km_model = KMeans(n_clusters = K, n_init = 1) 
results_KM = np.zeros((trials, 3))
for i in range(trials):
    ypred = km_model.fit_predict(train_x)
    nmi = metrics.adjusted_mutual_info_score(train_y, ypred)
    ari = metrics.adjusted_rand_score(train_y, ypred)
    ac  = acc(ypred, train_y)
    results_KM[i] = np.array([nmi, ari, ac])

KM_mean = np.mean(results_KM, axis = 0)
KM_std  = np.std(results_KM, axis = 0)   
# Perform SC    
print('SC started...')
results_SC = np.zeros((trials, 3))
se_model = SpectralEmbedding(n_components=K, affinity='rbf', gamma = 0.1)
se_vec = se_model.fit_transform(train_x)
for i in range(trials):
    ypred = km_model.fit_predict(se_vec)
    nmi = metrics.adjusted_mutual_info_score(train_y, ypred)
    ari = metrics.adjusted_rand_score(train_y, ypred)
    ac  = acc(ypred, train_y)
    results_SC[i] = np.array([nmi, ari, ac])

SC_mean = np.mean(results_SC, axis = 0)
SC_std  = np.std(results_SC, axis = 0)   

# for PenDigits, perform DCN and SAE+KM
config = {'Init': '',
          'lbd': .5, 
          'beta': 1, 
        geomMeanOfD = np.array(np.sqrt(val1 * val2)).reshape(1, -1)
        tmpAdj = (groupV * geomMeanOfD).dot(groupV.T)
        tmpRowMatrix.append(tmpAdj)
    tmpRowMatrix = np.hstack(tmpRowMatrix)
    simMatAllTimes.append(tmpRowMatrix)
simMatAllTimes = np.vstack(simMatAllTimes)

simMatAllTimes = setDiagToOne(forcePosDef(simMatAllTimes))

# tsneModel = TSNE(n_components=2, metric='precomputed',
#                 method='barnes_hut', perplexity=30.0)
# #method='barnes_hut'
# #method='exact'
# distMat = 1.0 / (simMatAllTimes + 1.01)

tsneModel = SpectralEmbedding(n_components=2, affinity='precomputed',
                              gamma=1.0, n_neighbors=6)

distMat = simMatAllTimes + 1.0


tsneModel = tsneModel.fit(distMat)
sizeScale = np.abs(tsneModel.embedding_.ravel()).max()
tsneModel.embedding_ /= sizeScale

#############################
#
# Plotting
#

# set plot parameters
Exemplo n.º 46
0
class BranchedEmbeddedGaussians(object):

    def __init__(self, n_nodes=None, 
                 npcs=0.8, 
                 embedding_dims=2,
                 cov_estimator='corpcor', 
                 cov_reg=None, 
                 cov_indices=None,
                 max_iter=10,
                 sigma=0.01, 
                 lam=1., 
                 gamma=1.,
                 n_neighbors=30,
                 just_tree=False):
        self.n_nodes = n_nodes
        self.cov_reg = cov_reg
        self.cov_estimator = cov_estimator
        self.cov_indices = cov_indices
        self.max_iter = max_iter
        self.sigma = sigma
        self.lam = lam
        self.gamma = gamma
        self.npcs = npcs
        self.n_neighbors = n_neighbors
        self.embedding = SpectralEmbedding(n_components=embedding_dims, 
                                           affinity='precomputed')
        self.pca = PCA(n_components=self.npcs)
        self.just_tree = just_tree

    def fit(self, data_array):
        n_samples, n_dims = data_array.shape
        if self.n_nodes is None:
            self.n_nodes = 0.1
        if type(self.n_nodes) == float:
            self.n_nodes = max(2, np.round(n_samples * self.n_nodes))

        self._pca_tx = self.pca.fit_transform(data_array)
        self._affinity = adaptive_rbf_matrix(data_array,
                                             n_neighbors=self.n_neighbors)
        self._embedding_tx = self.embedding.fit_transform(self._affinity)

        pt = PrincipalGraph(gstruct='span-tree', gamma=self.gamma, 
                            sigma=self.sigma, max_iter=self.max_iter,
                            lam=self.lam, n_nodes=self.n_nodes)
        pt.fit(self._embedding_tx)
        self._pt = pt
        self.graph = pt.graph
        self.node_positions = self._pt.node_positions

        if self.cov_indices is None:
            cov_indices = np.arange(0, n_samples)
        else:
            cov_indices = self.cov_indices
        if not self.just_tree:
            self.means, self.covariances = self._calculate_gaussian_params(data_array, 
                                                                        self._pt._probabilities,
                                                                     cov_indices)


    def _map_samples_to_nodes(self, 
                              data_array, 
                              means, 
                              covs):
        mapping = np.zeros([data_array.shape[0]]) - 1
        mapping_probs = np.zeros([data_array.shape[0], means.shape[0]])
        for i in xrange(means.shape[0]):
            mapping_probs[:, i] = stats.multivariate_normal.pdf(data_array[:, self.cov_indices], 
                                                                mean=means[i, self.cov_indices], 
                                                                cov=covs[i, :, :], allow_singular=True)
        
        for i in xrange(data_array.shape[0]):
            mapping[i] = np.argmax(mapping_probs[i, :])
        return mapping, mapping_probs

    def _calculate_gaussian_params(self, 
                                   data_array, 
                                   mapping_probs, 
                                   cov_indices):
        means = np.zeros([self.n_nodes, data_array.shape[1]])
        cov_dim = len(cov_indices)
        covariances = np.zeros([self.n_nodes, cov_dim, cov_dim])
        for i in xrange(self.n_nodes):
            weights = mapping_probs[:, i]
            weights = np.reshape(weights, [len(weights), 1])
            weighted_data = weights * data_array
            means[i, :] = weighted_data.sum(axis=0)
            if self.cov_reg is None:
                covariances[i, :, :] = np.copy(corpcor.cov_shrink(data_array[:, cov_indices], 
                                                                  weights=weights))
            else:
                covariances[i, :, :] = np.copy(corpcor.cov_shrink(data_array[: cov_indices],
                                                                  weights=weights, 
                                                                  **{'lambda':self.cov_reg}))
        return means, covariances


    def predict_proba(self, data_array):
        mapping, mapping_probs = self._map_samples_to_nodes(data_array, 
                                                            self.means, 
                                                            self.covs)
        return mapping, mapping_probs

    def predict(self, data_array):
        mapping, _ = self.predict_proba(data_array, 
                                        self.means, 
                                        self.covs)
        return mapping