示例#1
0
def clusterize_mat(X, n_clusters, reord_mat=False, reord_method='eta-trick'):
    # X2 = X.copy()
    # minX = X2.min()
    # X2 -= minX
    if reord_mat:
        if reord_method == 'eta-trick':
            my_method = SpectralEtaTrick(n_iter=10)
        elif reord_method == 'mdso':
            my_method = SpectralOrdering()
        else:
            my_method = SpectralBaseline()

    ebd = spectral_embedding(X - X.min(),
                             norm_laplacian='random_walk',
                             norm_adjacency=False)
    N = X.shape[0]
    if n_clusters == 1:
        if reord_mat:
            return (X, np.arange(N))
        else:
            return (X)
    else:
        fied_vec = ebd[:, 0]
        fied_diff = abs(fied_vec[1:] - fied_vec[:-1])
        bps = np.append(0, np.argsort(-fied_diff)[:n_clusters - 1])
        bps = np.append(bps, N)
        bps = np.sort(bps)
        x_flat = X.flatten()
        s_clus = np.zeros(N**2)
        if reord_mat:
            permu = np.zeros(0, dtype='int32')
        for k_ in range(n_clusters):
            in_clst = np.arange(bps[k_], bps[k_ + 1])
            if not in_clst.size:
                print("empty cluster!")
                continue
            iis = np.repeat(in_clst, len(in_clst))
            jjs = np.tile(in_clst, len(in_clst))
            sub_idx = np.ravel_multi_index((iis, jjs), (N, N))
            s_clus[sub_idx] = x_flat[sub_idx]  # Projection on block matrices

            if reord_mat:
                sub_mat = X.copy()[in_clst, :]
                sub_mat = sub_mat.T[in_clst, :].T
                sub_perm = my_method.fit_transform(sub_mat - sub_mat.min())
                sub_cc = in_clst[sub_perm]
                permu = np.append(permu, sub_cc)

        S_clus = np.reshape(s_clus, (N, N))
        if reord_mat:
            return (S_clus, permu)
        else:
            return (S_clus)
示例#2
0
def ser_dupli_alt_clust2(A,
                         C,
                         seriation_solver='eta-trick',
                         n_iter=100,
                         n_clusters=8,
                         do_strong=False,
                         include_main_diag=True,
                         do_show=True,
                         Z_true=None):

    (n_, n1) = A.shape
    n2 = len(C)
    N = int(np.sum(C))
    assert (n_ == n1 and n_ == n2)

    if seriation_solver == 'mdso':
        my_solver = SpectralOrdering(norm_laplacian='random_walk')
    elif seriation_solver == 'eta-trick':
        my_solver = SpectralEtaTrick(n_iter=10)
    else:  # use basic spectral Algorithm from Atkins et. al.
        my_solver = SpectralBaseline()

    cluster_solver = SpectralClustering(n_clusters=n_clusters,
                                        affinity='precomputed')

    # Initialization
    Z = np.zeros((n_, N))
    jj = 0
    for ii in range(n_):  # TODO : make this faster ?
        Z[ii, jj:jj + C[ii]] = 1
        jj += C[ii]
    dc = np.diag(1. / C)

    S_t = Z.T @ dc @ A @ dc @ Z

    max_val = A.max()
    # max_val = S_t.max()

    perm_tot = np.arange(N)

    # Iterate
    for it in range(n_iter):
        # S_old
        # S_t -= S_t.min()  # to make sure it is non-negative after linprog

        # Reorder the matrix
        permu = my_solver.fit_transform(S_t)
        # S_tp = S_t[permu, :][:, permu]
        S_tp = S_t.copy()[permu, :]
        S_tp = S_tp.T[permu, :].T

        R_t = proj2Rmat(S_tp,
                        do_strong=do_strong,
                        include_main_diag=include_main_diag,
                        verbose=0,
                        u_b=max_val)
        print(R_t.min())
        R_t -= R_t.min()
        # (iis, jjs, vvs) = find(R_t)
        # qv = np.percentile(vvs, 50)
        # iis = iis[vvs>qv]
        # jjs = jjs[vvs>qv]
        # vvs = vvs[vvs>qv]
        # R_t = coo_matrix((vvs, (iis, jjs)), shape=R_t.shape)
        # R_t = R_t.toarray()

        ebd = spectral_embedding(R_t, norm_laplacian=False)
        if n_clusters > 1:
            # fied_vec = ebd[:, 0]
            # fied_diff = abs(fied_vec[1:] - fied_vec[:-1])
            # bps = np.append(0, np.argsort(-fied_diff)[:n_clusters-1])
            # bps = np.append(bps, N)
            # bps = np.sort(bps)

            # bps = get_k_necks(R_t, n_clusters-1)
            # bps = np.append(0, bps)
            # bps = np.append(bps, N)
            # bps = np.sort(bps)
            bps = np.array([0, N])
        else:
            bps = np.array([0, N])
        print(bps)
        labels_ = np.zeros(N)
        # for labels_[bps[]]

        Z = Z[:, permu]

        # perm_tot = perm_tot[permu]

        # Cluster the similarity matrix
        # labels_ = cluster_solver.fit_predict(R_t.max() - R_t)
        # print(sum(labels_))

        # Reorder each cluster
        s_clus = np.zeros(N**2)  # TODO: adapt to the sparse case
        s_flat = R_t.flatten()
        permu2 = np.zeros(0, dtype='int32')
        # permu = np.arange(N)

        for k_ in range(n_clusters):
            # in_clst = np.where(labels_ == k_)[0]
            in_clst = np.arange(bps[k_], bps[k_ + 1])
            # sub_mat = R_t[in_clst, :]
            # sub_mat = sub_mat.T[in_clst, :].T
            # sub_perm = my_solver.fit_transform(sub_mat)
            # sub_cc = in_clst[sub_perm]
            sub_cc = in_clst

            # inv_sub_perm = np.argsort(sub_perm)
            # permu[in_clst] = sub_cc  # in_clst[inv_sub_perm]
            # permu[in_clst] = in_clst[inv_sub_perm]
            permu2 = np.append(permu2, sub_cc)

            # (iis, jjs) = np.meshgrid(in_clst, in_clst)
            # iis = iis.flatten()
            # jjs = jjs.flatten()
            iis = np.repeat(in_clst, len(in_clst))
            jjs = np.tile(in_clst, len(in_clst))
            sub_idx = np.ravel_multi_index((iis, jjs), (N, N))
            #
            # (iord, jord) = np.meshgrid(sub_cc, sub_cc)
            # iord = iord.flatten()
            # jord = jord.flatten()
            # sub_ord = np.ravel_multi_index((iord, jord), (N, N))
            #
            s_clus[sub_idx] = s_flat[sub_idx]  # Projection on block matrices
            # S_clus[in_clst, :][:, in_clst] += sub_mat

        # is_identity = (np.all(permu == np.arange(N)) or
        #                np.all(permu == np.arange(N)[::-1]))
        # if is_identity:
        #     break

        alpha_ = 0.
        S_clus = (1 - alpha_) * np.reshape(s_clus, (N, N)) + alpha_ * S_t
        # S_clus = np.reshape(s_clus, (N, N))
        S_tp = S_clus.copy()[permu2, :]
        # S_tp = S_t.copy()[permu, :]
        S_tp = S_tp.T[permu2, :].T
        # S_tp = S_tp.T[permu, :].T

        # R_t = proj2Rmat(S_tp, do_strong=do_strong,
        #                 include_main_diag=include_main_diag, verbose=0,
        #                 u_b=max_val)
        # R_t = S_tp

        double_perm = permu[permu2]
        Z = Z[:, permu2]

        perm_tot = perm_tot[double_perm]

        if do_show:
            title = "iter {}".format(int(it))
            if Z_true is not None:
                mean_dist, _, is_inv = eval_assignments(Z, Z_true)
                title += " mean dist {}".format(mean_dist)
                # if is_inv:
                #     Z = Z[:, ::-1]
            visualize_mat(S_t, S_tp, R_t, Z, ebd, title, Z_true=Z_true)

        S_t = proj2dupli(S_tp,
                         Z,
                         A,
                         u_b=max_val,
                         k_sparse=None,
                         include_main_diag=include_main_diag)

    return (S_t, Z, R_t)
示例#3
0
def spectral_eta_trick2(X, n_iter=50, dh=1, p=1, return_score=False,
                       do_plot=False, circular=False, norm_laplacian=None,
                       norm_adjacency=None, eigen_solver=None,
                       scale_embedding=False,
                       add_momentum=None):
    """
    Performs Spectral Eta-trick Algorithm from
    https://arxiv.org/pdf/1806.00664.pdf
    which calls several instances of the Spectral Ordering baseline (Atkins) to
    try to minimize 1-SUM or Huber-SUM (instead of 2-SUM)
    with the so-called eta-trick.
    """

    (n, n2) = X.shape
    assert(n == n2)

    if n < 3:
        best_perm = np.arange(n)
        if return_score:
            return(best_perm, -1)
        else:
            return(best_perm)

    spectral_algo = SpectralBaseline(circular=circular,
                                     norm_laplacian=norm_laplacian,
                                     norm_adjacency=norm_adjacency,
                                     eigen_solver=eigen_solver,
                                     scale_embedding=scale_embedding)

    best_perm = np.arange(n)
    best_score = n**(p+2)

    if issparse(X):
        if not isinstance(X, coo_matrix):
            X = coo_matrix(X)

        r, c, v = X.row, X.col, X.data
        eta_vec = np.ones(len(v))
        if add_momentum:
            eta_old = np.ones(len(v))

        for it in range(n_iter):

            X_w = X.copy()
            X_w.data /= eta_vec

            embedding = spectral_embedding(X_w)
            new_perm = np.argsort(embedding[:, 0])

            # new_perm = spectral_algo.fit_transform(X_w)
            if np.all(new_perm == best_perm):
                break
            if new_perm[0] > new_perm[-1]:
                embedding = embedding[::-1, :]
                new_perm *= -1
                new_perm += (n-1)

            new_score = p_sum_score(X, permut=new_perm, p=p)
            if new_score < best_score:
                best_perm = new_perm

            p_inv = np.argsort(new_perm)

            # eta_vec = abs(p_inv[r] - p_inv[c])
            d_ = 3
            eta_vec = np.sum(abs(embedding[r, :d_] - embedding[c, :d_]), axis=1)
            # if circular:
            #     # pass
            #     eta_vec = np.minimum(eta_vec, n - eta_vec)
            # eta_vec = np.maximum(dh, eta_vec)

            if do_plot:
                title = "it %d, %d-SUM: %1.5e" % (it, p, new_score)
                plot_mat(X, permut=new_perm, title=title)

    else:
        eta_mat = np.ones((n, n))

        for it in range(n_iter):

            X_w = np.divide(X, eta_mat)
            embedding = spectral_embedding(X_w)
            new_perm = np.argsort(embedding[:, 0])

            # new_perm = spectral_algo.fit_transform(X_w)
            # if new_perm[0] > new_perm[-1]:
            #     embedding = embedding[::-1, :]
            #     new_perm *= -1
            #     new_perm += (n-1)
            # if np.all(new_perm == best_perm):
            #     break

            new_score = p_sum_score(X, permut=new_perm, p=p)
            if new_score < best_score:
                best_perm = new_perm

            p_inv = np.argsort(new_perm)

            d_ = 5
            d_ = min(n-1, d_)
            # eta_vec = np.sum(abs(embedding[r, :d_] - embedding[c, :d_]), axis=1)
            eta_mat = np.identity(n).flatten()
            for dim in range(d_):
                # eta_mat = eta_mat + abs(np.tile(embedding[:, dim], n) - np.repeat(embedding[:, dim], n))
                d_perm = np.argsort(embedding[:, dim])
                d_perm = (1./(1 + dim)) * np.argsort(d_perm)
                eta_mat = eta_mat + abs(np.tile(d_perm, n) - np.repeat(d_perm, n))

            # eta_mat = abs(np.tile(p_inv, n) - np.repeat(p_inv, n))
            # if circular:
            #     # pass
            #     eta_mat = np.minimum(eta_mat, n - eta_mat)
            eta_mat = np.reshape(eta_mat, (n, n))
            # eta_mat = np.maximum(dh, eta_mat)

            if do_plot:
                title = "it %d, %d-SUM: %1.5e" % (it, p, new_score)
                plot_mat(X, permut=new_perm, title=title)

    if return_score:
        return(best_perm, best_score)
    else:
        return(best_perm)
示例#4
0
def spectral_eta_trick3(X, n_iter=50, dh=1, score_function='Huber', return_score=False,
                        do_plot=False, circular=False, norm_laplacian=None,
                        norm_adjacency=None, eigen_solver=None,
                        scale_embedding=False,
                        add_momentum=None,
                        avg_dim=1, avg_scaling=True):
    """
    Performs Spectral Eta-trick Algorithm from
    https://arxiv.org/pdf/1806.00664.pdf
    which calls several instances of the Spectral Ordering baseline (Atkins) to
    try to minimize 1-SUM or Huber-SUM (instead of 2-SUM)
    with the so-called eta-trick.

    Parameters
        ----------
        n_iter : int, default 50
            Number of iterations.

        score_function : string, default pSUM
            Which score we aim to minimize. Either '1SUM', '2SUM', 'Huber', 'R2S'
            (robust 2SUM function from the paper).
            If Huber or R2S, it is computer with the parameter dh provided.
            By design, the algorithm seeks to minimize the Huber loss. However,
            we keep the permutation that yields the best score amongst all, according
            to the score computed with score_function.
            
        dh : int, default 1
            Parameter for the Huber loss minimized.

        circular : boolean, default False
            Whether we wish to find a circular or a linear ordering.

        eigen_solver : string, default 'arpack'
            Solver for the eigenvectors computations. Can be 'arpack', 'amg', or
            'lopbcg'. 'amg' is faster for large sparse matrices but requires the
            pyamg package.

        add_momentum : Nonetype or float, default None.
            gamma parameter in Algorithm... from the paper.
            If gamma > 0, we set eta_{t+1} = gamma * eta_t + (1-gamma) * eta^*,
            where eta^* is the solution at iteration (t).

        avg_dim : int, default 1.
            Number of dimensions to use in the spectral embedding.
            If d = 1, it is the regular eta trick with eta = |pi_i - pi_j|.
            If d > 1, instead we sum |pi^k_i - pi^k_j| over the d first dimensions,
            where pi^k is the permutation that sorts the coordinates of the k-th dimension
            of the spectral embedding (not just the first, which is the Fiedler vector).
        
        avg_scaling : boolean, default True.
            If avg_dim > 1, the previous sum is weighted by the default scaling 1/(1+k)
            if avg_scaling = True.

        return_score : boolean, default False.
            Whether to return the best score (computed with score function) or not.
        
        norm_laplacian : string, default "unnormalized"
            type of normalization of the Laplacian. Can be "unnormalized",
            "random_walk", or "symmetric".

        norm_adjacency : str or bool, default 'coifman'
            If 'coifman', use the normalization of the similarity matrix,
            W = Dinv @ W @ Dinv, to account for non uniform sampling of points on
            a 1d manifold (from Lafon and Coifman's approximation of the Laplace
            Beltrami operator)
            Otherwise, leave the adjacency matrix as it is.
            TODO : also implement the 'sinkhorn' normalization

        scale_embedding : string or boolean, default True
            if scaled is False, the embedding is just the concatenation of the
            eigenvectors of the Laplacian, i.e., all dimensions have the same
            weight.
            if scaled is "CTD", the k-th dimension of the spectral embedding
            (k-th eigen-vector) is re-scaled by 1/sqrt(lambda_k), in relation
            with the commute-time-distance.
            If scaled is True or set to another string than "CTD", then the
            heuristic scaling 1/sqrt(k) is used instead.
        
    """

    (n, n2) = X.shape
    assert(n == n2)

    if n < 3:
        best_perm = np.arange(n)
        if return_score:
            return(best_perm, -1)
        else:
            return(best_perm)

    best_perm = np.arange(n)
    best_score = compute_score(X, score_function=score_function, dh=dh, perm=None)

    if issparse(X):
        if not isinstance(X, coo_matrix):
            X = coo_matrix(X)

        r, c, v = X.row, X.col, X.data
        eta_vec = np.ones(len(v))
        if add_momentum:
            eta_old = np.ones(len(v))

        for it in range(n_iter):

            X_w = X.copy()
            X_w.data /= eta_vec

            default_dim = 8
            if avg_dim > default_dim:
                default_dim = avg_dim + 1

            embedding = spectral_embedding(X_w, norm_laplacian=norm_laplacian,
                                           norm_adjacency=norm_adjacency,
                                           eigen_solver=eigen_solver,
                                           scale_embedding=scale_embedding,
                                           n_components=default_dim)

            new_perm = np.argsort(embedding[:, 0])

            # new_perm = spectral_algo.fit_transform(X_w)
            if np.all(new_perm == best_perm):
                break
            if new_perm[0] > new_perm[-1]:
                embedding = embedding[::-1, :]
                new_perm *= -1
                new_perm += (n-1)

            new_score = compute_score(X, score_function=score_function, dh=dh, perm=new_perm)
            if new_score < best_score:
                best_perm = new_perm

            p_inv = np.argsort(new_perm)

            # eta_vec = abs(p_inv[r] - p_inv[c])
            eta_vec = np.zeros(len(r))
            d_ = min(avg_dim, n-1)
            for dim in range(d_):
                # eta_mat = eta_mat + abs(np.tile(embedding[:, dim], n) - np.repeat(embedding[:, dim], n))
                d_perm = np.argsort(embedding[:, dim])
                d_perm = np.argsort(d_perm)
                eta_add = abs(d_perm[r] - d_perm[c])
                if circular:
                    eta_add = np.minimum(eta_add, n - eta_add)

                eta_add = np.maximum(dh, eta_add)

                if avg_scaling:
                    eta_add = eta_add * 1./np.sqrt(1 + dim)

                eta_vec += eta_add
            #     eta_mat = eta_mat + abs(np.tile(d_perm, n) - np.repeat(d_perm, n))
            # eta_vec = np.sum(abs(embedding[r, :d_] - embedding[c, :d_]), axis=1)
            # if circular:
            #     # pass
            #     eta_vec = np.minimum(eta_vec, n - eta_vec)
            # eta_vec = np.maximum(dh, eta_vec)

            if do_plot:
                title = "it %d, score: %1.5e" % (it, new_score)
                plot_mat(X, permut=new_perm, title=title)

    else:
        eta_mat = np.ones((n, n))

        for it in range(n_iter):

            X_w = np.divide(X, eta_mat)

            default_dim = 8
            if avg_dim > default_dim:
                default_dim = avg_dim + 1

            embedding = spectral_embedding(X_w, norm_laplacian=norm_laplacian,
                                           norm_adjacency=norm_adjacency,
                                           eigen_solver=eigen_solver,
                                           scale_embedding=scale_embedding,
                                           n_components=default_dim)

            new_perm = np.argsort(embedding[:, 0])

            # new_perm = spectral_algo.fit_transform(X_w)
            # if new_perm[0] > new_perm[-1]:
            #     embedding = embedding[::-1, :]
            #     new_perm *= -1
            #     new_perm += (n-1)
            # if np.all(new_perm == best_perm):
            #     break

            new_score = compute_score(X, score_function=score_function, dh=dh, perm=new_perm)
            if new_score < best_score:
                best_perm = new_perm

            p_inv = np.argsort(new_perm)

            d_ = min(avg_dim, n-1)
            # eta_vec = np.sum(abs(embedding[r, :d_] - embedding[c, :d_]), axis=1)
            eta_mat = np.identity(n).flatten()
            for dim in range(d_):
                # eta_mat = eta_mat + abs(np.tile(embedding[:, dim], n) - np.repeat(embedding[:, dim], n))
                d_perm = np.argsort(embedding[:, dim])
                d_perm = np.argsort(d_perm)
                eta_add = abs(np.tile(d_perm, n) - np.repeat(d_perm, n))
                if circular:
                    eta_add = np.minimum(eta_add, n - eta_add)

                eta_add = np.maximum(dh, eta_add)

                if avg_scaling:
                    eta_add = eta_add * 1./np.sqrt((1 + dim))
                
                eta_mat = eta_mat + eta_add


            # eta_mat = abs(np.tile(p_inv, n) - np.repeat(p_inv, n))
            # if circular:
            #     # pass
            #     eta_mat = np.minimum(eta_mat, n - eta_mat)
            eta_mat = np.reshape(eta_mat, (n, n))
            # eta_mat = np.maximum(dh, eta_mat)

            if do_plot:
                title = "it %d, score: %1.5e" % (it, new_score)
                plot_mat(X, permut=new_perm, title=title)

    if return_score:
        return(best_perm, best_score)
    else:
        return(best_perm)
示例#5
0
def get_embedding(adjacency,
                  n_components=8,
                  eigen_solver=None,
                  random_state=None,
                  eigen_tol=1e-15,
                  norm_laplacian=False,
                  drop_first=True,
                  norm_adjacency=False,
                  scale_embedding=False,
                  verb=0,
                  method='spectral'):
    drop_first = False
    if method == 'cMDS':
        embedding = classical_MDS_embedding(adjacency,
                                            n_components=n_components,
                                            eigen_solver=eigen_solver,
                                            random_state=random_state,
                                            eigen_tol=eigen_tol,
                                            norm_adjacency=norm_adjacency,
                                            norm_laplacian=norm_laplacian,
                                            drop_first=drop_first,
                                            scale_embedding=scale_embedding,
                                            verb=verb)

    elif method == 'MDS':
        embedding = metric_MDS_embedding(adjacency,
                                         n_components=n_components,
                                         eigen_solver=eigen_solver,
                                         random_state=random_state,
                                         eigen_tol=eigen_tol,
                                         norm_adjacency=norm_adjacency,
                                         norm_laplacian=norm_laplacian,
                                         drop_first=drop_first,
                                         scale_embedding=scale_embedding,
                                         verb=verb,
                                         metric=True)

    elif method == 'NMDS':
        embedding = metric_MDS_embedding(adjacency,
                                         n_components=n_components,
                                         eigen_solver=eigen_solver,
                                         random_state=random_state,
                                         eigen_tol=eigen_tol,
                                         norm_adjacency=norm_adjacency,
                                         norm_laplacian=norm_laplacian,
                                         drop_first=drop_first,
                                         scale_embedding=scale_embedding,
                                         verb=verb,
                                         metric=False)

    elif method == 'TSNE':
        embedding = tSNE_embedding(adjacency,
                                   n_components=n_components,
                                   eigen_solver=eigen_solver,
                                   random_state=random_state,
                                   eigen_tol=eigen_tol,
                                   norm_adjacency=norm_adjacency,
                                   norm_laplacian=norm_laplacian,
                                   drop_first=drop_first,
                                   scale_embedding=scale_embedding,
                                   verb=verb)

    else:
        drop_first = True
        embedding = spectral_embedding(adjacency,
                                       n_components=n_components,
                                       eigen_solver=eigen_solver,
                                       random_state=random_state,
                                       eigen_tol=eigen_tol,
                                       norm_adjacency=norm_adjacency,
                                       norm_laplacian=norm_laplacian,
                                       drop_first=drop_first,
                                       scale_embedding=scale_embedding,
                                       verb=verb)

    return embedding