def run_one_exp(n,
                k,
                dim,
                ampl,
                type_matrix,
                scaled,
                n_avrg,
                type_lap=None,
                type_noise='gaussian'):
    """
    Run n_avrg experiments for a given set of parameters, and return the mean
    kendall-tau score and the associated standard deviation (among the
    n_avrg instances).
    """

    # Pre-defined settings for Laplacian_Fiedler : if circular, random_walk,
    # if linear, unnormalized; Laplacian_init : random_walk
    if not type_lap:
        type_lap = 'random_walk'

    if type_matrix[0] == 'L':
        circular = False
    elif type_matrix[0] == 'C':
        circular = True
    else:
        raise ValueError("type matrix must be in ['LinearBanded',"
                         "'CircularBanded', 'LinearStrongDecrease',"
                         "'CircularStrongDecrease']")
    # Create matrix generator
    data_gen = MatrixGenerator()
    # Create spectral solver
    reord_method = SpectralOrdering(dim=dim,
                                    k_nbrs=k,
                                    circular=circular,
                                    scaled=scaled,
                                    type_laplacian=type_lap,
                                    verb=1)
    # Initialize array of results
    scores = np.zeros(n_avrg)
    for i_exp in range(n_avrg):
        np.random.seed(i_exp)
        data_gen.gen_matrix(n,
                            type_matrix=type_matrix,
                            apply_perm=True,
                            noise_ampl=ampl,
                            law=type_noise)
        this_perm = reord_method.fit_transform(data_gen.sim_matrix)
        scores[i_exp] = evaluate_ordering(this_perm,
                                          data_gen.true_perm,
                                          circular=circular)
        print('.', end='')
    print('')

    return (scores.mean(), scores.std(), scores)
Exemplo n.º 2
0
def clusterize_mat(X, n_clusters, reord_mat=False, reord_method='eta-trick'):
    # X2 = X.copy()
    # minX = X2.min()
    # X2 -= minX
    if reord_mat:
        if reord_method == 'eta-trick':
            my_method = SpectralEtaTrick(n_iter=10)
        elif reord_method == 'mdso':
            my_method = SpectralOrdering()
        else:
            my_method = SpectralBaseline()

    ebd = spectral_embedding(X - X.min(),
                             norm_laplacian='random_walk',
                             norm_adjacency=False)
    N = X.shape[0]
    if n_clusters == 1:
        if reord_mat:
            return (X, np.arange(N))
        else:
            return (X)
    else:
        fied_vec = ebd[:, 0]
        fied_diff = abs(fied_vec[1:] - fied_vec[:-1])
        bps = np.append(0, np.argsort(-fied_diff)[:n_clusters - 1])
        bps = np.append(bps, N)
        bps = np.sort(bps)
        x_flat = X.flatten()
        s_clus = np.zeros(N**2)
        if reord_mat:
            permu = np.zeros(0, dtype='int32')
        for k_ in range(n_clusters):
            in_clst = np.arange(bps[k_], bps[k_ + 1])
            if not in_clst.size:
                print("empty cluster!")
                continue
            iis = np.repeat(in_clst, len(in_clst))
            jjs = np.tile(in_clst, len(in_clst))
            sub_idx = np.ravel_multi_index((iis, jjs), (N, N))
            s_clus[sub_idx] = x_flat[sub_idx]  # Projection on block matrices

            if reord_mat:
                sub_mat = X.copy()[in_clst, :]
                sub_mat = sub_mat.T[in_clst, :].T
                sub_perm = my_method.fit_transform(sub_mat - sub_mat.min())
                sub_cc = in_clst[sub_perm]
                permu = np.append(permu, sub_cc)

        S_clus = np.reshape(s_clus, (N, N))
        if reord_mat:
            return (S_clus, permu)
        else:
            return (S_clus)
Exemplo n.º 3
0
def clusterize_from_bps(X, bps, reord_clusters=True, reord_method=None):

    (N, N2) = X.shape
    assert (N == N2)
    n_clusters = len(bps) - 1

    if reord_clusters:
        permu = np.zeros(0, dtype='int32')
        if reord_method == 'eta-trick':
            my_method = SpectralEtaTrick(n_iter=10)
        elif reord_method == 'mdso':
            my_method = SpectralOrdering()
        else:
            my_method = SpectralBaseline()

    x_flat = X.flatten()
    s_clus = np.zeros(N**2)
    for k_ in range(n_clusters):
        in_clst = np.arange(bps[k_], bps[k_ + 1])
        if not in_clst.size:
            print("empty cluster!")
            continue
        iis = np.repeat(in_clst, len(in_clst))
        jjs = np.tile(in_clst, len(in_clst))
        sub_idx = np.ravel_multi_index((iis, jjs), (N, N))
        s_clus[sub_idx] = x_flat[sub_idx]  # Projection on block matrices

        if reord_clusters:
            if len(in_clst) < 3:
                sub_perm = np.arange(len(in_clst))
            else:
                sub_mat = X.copy()[in_clst, :]
                sub_mat = sub_mat.T[in_clst, :].T
                min_sub = sub_mat.min()
                if min_sub < 0:
                    sub_perm = my_method.fit_transform(sub_mat - sub_mat.min())
                else:
                    sub_perm = my_method.fit_transform(sub_mat)
            sub_cc = in_clst[sub_perm]
            permu = np.append(permu, sub_cc)

    S_clus = np.reshape(s_clus, (N, N))

    if reord_clusters:
        return (S_clus, permu)
    else:
        return (S_clus)
Exemplo n.º 4
0
def ser_dupli_alt_clust2(A,
                         C,
                         seriation_solver='eta-trick',
                         n_iter=100,
                         n_clusters=8,
                         do_strong=False,
                         include_main_diag=True,
                         do_show=True,
                         Z_true=None):

    (n_, n1) = A.shape
    n2 = len(C)
    N = int(np.sum(C))
    assert (n_ == n1 and n_ == n2)

    if seriation_solver == 'mdso':
        my_solver = SpectralOrdering(norm_laplacian='random_walk')
    elif seriation_solver == 'eta-trick':
        my_solver = SpectralEtaTrick(n_iter=10)
    else:  # use basic spectral Algorithm from Atkins et. al.
        my_solver = SpectralBaseline()

    cluster_solver = SpectralClustering(n_clusters=n_clusters,
                                        affinity='precomputed')

    # Initialization
    Z = np.zeros((n_, N))
    jj = 0
    for ii in range(n_):  # TODO : make this faster ?
        Z[ii, jj:jj + C[ii]] = 1
        jj += C[ii]
    dc = np.diag(1. / C)

    S_t = Z.T @ dc @ A @ dc @ Z

    max_val = A.max()
    # max_val = S_t.max()

    perm_tot = np.arange(N)

    # Iterate
    for it in range(n_iter):
        # S_old
        # S_t -= S_t.min()  # to make sure it is non-negative after linprog

        # Reorder the matrix
        permu = my_solver.fit_transform(S_t)
        # S_tp = S_t[permu, :][:, permu]
        S_tp = S_t.copy()[permu, :]
        S_tp = S_tp.T[permu, :].T

        R_t = proj2Rmat(S_tp,
                        do_strong=do_strong,
                        include_main_diag=include_main_diag,
                        verbose=0,
                        u_b=max_val)
        print(R_t.min())
        R_t -= R_t.min()
        # (iis, jjs, vvs) = find(R_t)
        # qv = np.percentile(vvs, 50)
        # iis = iis[vvs>qv]
        # jjs = jjs[vvs>qv]
        # vvs = vvs[vvs>qv]
        # R_t = coo_matrix((vvs, (iis, jjs)), shape=R_t.shape)
        # R_t = R_t.toarray()

        ebd = spectral_embedding(R_t, norm_laplacian=False)
        if n_clusters > 1:
            # fied_vec = ebd[:, 0]
            # fied_diff = abs(fied_vec[1:] - fied_vec[:-1])
            # bps = np.append(0, np.argsort(-fied_diff)[:n_clusters-1])
            # bps = np.append(bps, N)
            # bps = np.sort(bps)

            # bps = get_k_necks(R_t, n_clusters-1)
            # bps = np.append(0, bps)
            # bps = np.append(bps, N)
            # bps = np.sort(bps)
            bps = np.array([0, N])
        else:
            bps = np.array([0, N])
        print(bps)
        labels_ = np.zeros(N)
        # for labels_[bps[]]

        Z = Z[:, permu]

        # perm_tot = perm_tot[permu]

        # Cluster the similarity matrix
        # labels_ = cluster_solver.fit_predict(R_t.max() - R_t)
        # print(sum(labels_))

        # Reorder each cluster
        s_clus = np.zeros(N**2)  # TODO: adapt to the sparse case
        s_flat = R_t.flatten()
        permu2 = np.zeros(0, dtype='int32')
        # permu = np.arange(N)

        for k_ in range(n_clusters):
            # in_clst = np.where(labels_ == k_)[0]
            in_clst = np.arange(bps[k_], bps[k_ + 1])
            # sub_mat = R_t[in_clst, :]
            # sub_mat = sub_mat.T[in_clst, :].T
            # sub_perm = my_solver.fit_transform(sub_mat)
            # sub_cc = in_clst[sub_perm]
            sub_cc = in_clst

            # inv_sub_perm = np.argsort(sub_perm)
            # permu[in_clst] = sub_cc  # in_clst[inv_sub_perm]
            # permu[in_clst] = in_clst[inv_sub_perm]
            permu2 = np.append(permu2, sub_cc)

            # (iis, jjs) = np.meshgrid(in_clst, in_clst)
            # iis = iis.flatten()
            # jjs = jjs.flatten()
            iis = np.repeat(in_clst, len(in_clst))
            jjs = np.tile(in_clst, len(in_clst))
            sub_idx = np.ravel_multi_index((iis, jjs), (N, N))
            #
            # (iord, jord) = np.meshgrid(sub_cc, sub_cc)
            # iord = iord.flatten()
            # jord = jord.flatten()
            # sub_ord = np.ravel_multi_index((iord, jord), (N, N))
            #
            s_clus[sub_idx] = s_flat[sub_idx]  # Projection on block matrices
            # S_clus[in_clst, :][:, in_clst] += sub_mat

        # is_identity = (np.all(permu == np.arange(N)) or
        #                np.all(permu == np.arange(N)[::-1]))
        # if is_identity:
        #     break

        alpha_ = 0.
        S_clus = (1 - alpha_) * np.reshape(s_clus, (N, N)) + alpha_ * S_t
        # S_clus = np.reshape(s_clus, (N, N))
        S_tp = S_clus.copy()[permu2, :]
        # S_tp = S_t.copy()[permu, :]
        S_tp = S_tp.T[permu2, :].T
        # S_tp = S_tp.T[permu, :].T

        # R_t = proj2Rmat(S_tp, do_strong=do_strong,
        #                 include_main_diag=include_main_diag, verbose=0,
        #                 u_b=max_val)
        # R_t = S_tp

        double_perm = permu[permu2]
        Z = Z[:, permu2]

        perm_tot = perm_tot[double_perm]

        if do_show:
            title = "iter {}".format(int(it))
            if Z_true is not None:
                mean_dist, _, is_inv = eval_assignments(Z, Z_true)
                title += " mean dist {}".format(mean_dist)
                # if is_inv:
                #     Z = Z[:, ::-1]
            visualize_mat(S_t, S_tp, R_t, Z, ebd, title, Z_true=Z_true)

        S_t = proj2dupli(S_tp,
                         Z,
                         A,
                         u_b=max_val,
                         k_sparse=None,
                         include_main_diag=include_main_diag)

    return (S_t, Z, R_t)
Exemplo n.º 5
0
def ser_dupli_alt_clust(A,
                        C,
                        seriation_solver='eta-trick',
                        n_iter=100,
                        n_clusters=8,
                        do_strong=False,
                        include_main_diag=True,
                        do_show=True,
                        Z_true=None):

    (n_, n1) = A.shape
    n2 = len(C)
    N = int(np.sum(C))
    assert (n_ == n1 and n_ == n2)

    if seriation_solver == 'mdso':
        my_solver = SpectralOrdering(norm_laplacian='random_walk')
    elif seriation_solver == 'eta-trick':
        my_solver = SpectralEtaTrick(n_iter=10)
    else:  # use basic spectral Algorithm from Atkins et. al.
        my_solver = SpectralBaseline()

    cluster_solver = SpectralClustering(n_clusters=n_clusters,
                                        affinity='precomputed')

    # Initialization
    Z = np.zeros((n_, N))
    jj = 0
    for ii in range(n_):  # TODO : make this faster ?
        Z[ii, jj:jj + C[ii]] = 1
        jj += C[ii]
    dc = np.diag(1. / C)

    S_t = Z.T @ dc @ A @ dc @ Z

    max_val = A.max()
    # max_val = S_t.max()

    perm_tot = np.arange(N)

    # Iterate
    for it in range(n_iter):
        # S_old
        # S_t -= S_t.min()  # to make sure it is non-negative after linprog

        permu1 = my_solver.fit_transform(S_t)
        S_t = S_t[permu1, :]
        S_t = S_t.T[permu1, :].T

        # Cluster the similarity matrix
        if (it % 10 == 0) and (it > 9):
            labels_ = cluster_solver.fit_predict(R_t.max() - R_t)

            # Reorder each cluster
            s_clus = np.zeros(N**2)  # TODO: adapt to the sparse case
            s_flat = S_t.flatten()
            permu = np.zeros(0, dtype='int32')
            # permu = np.arange(N)
            for k_ in range(n_clusters):
                in_clst = np.where(labels_ == k_)[0]
                sub_mat = S_t[in_clst, :]
                sub_mat = sub_mat.T[in_clst, :].T
                sub_perm = my_solver.fit_transform(sub_mat)
                sub_cc = in_clst[sub_perm]

                # inv_sub_perm = np.argsort(sub_perm)
                # permu[in_clst] = sub_cc  # in_clst[inv_sub_perm]
                # permu[in_clst] = in_clst[inv_sub_perm]
                permu = np.append(permu, sub_cc)

                # (iis, jjs) = np.meshgrid(in_clst, in_clst)
                # iis = iis.flatten()
                # jjs = jjs.flatten()
                iis = np.repeat(in_clst, len(in_clst))
                jjs = np.tile(in_clst, len(in_clst))
                sub_idx = np.ravel_multi_index((iis, jjs), (N, N))
                #
                # (iord, jord) = np.meshgrid(sub_cc, sub_cc)
                # iord = iord.flatten()
                # jord = jord.flatten()
                # sub_ord = np.ravel_multi_index((iord, jord), (N, N))
                #
                s_clus[sub_idx] = s_flat[
                    sub_idx]  # Projection on block matrices
                # S_clus[in_clst, :][:, in_clst] += sub_mat

            is_identity = (np.all(permu == np.arange(N))
                           or np.all(permu == np.arange(N)[::-1]))
            # if is_identity:
            #     break

            alpha_ = 0.
            S_clus = (1 - alpha_) * np.reshape(s_clus, (N, N)) + alpha_ * S_t
            # S_clus = np.reshape(s_clus, (N, N))
            S_tp = S_clus.copy()[permu, :]
            # S_tp = S_t.copy()[permu, :]
            S_tp = S_tp.T[permu, :].T
            # S_tp = S_tp.T[permu, :].T

        else:
            permu = np.arange(N)
            S_tp = S_t

        permu = permu1[permu]

        R_t = proj2Rmat(S_tp,
                        do_strong=do_strong,
                        include_main_diag=include_main_diag,
                        verbose=0,
                        u_b=max_val)
        # R_t = S_tp

        Z = Z[:, permu]

        perm_tot = perm_tot[permu]

        if do_show:
            title = "iter {}".format(int(it))
            if Z_true is not None:
                mean_dist, _, is_inv = eval_assignments(Z, Z_true)
                title += " mean dist {}".format(mean_dist)
                # if is_inv:
                #     Z = Z[:, ::-1]
            visualize_mat(S_t, S_tp, R_t, Z, permu, title, Z_true=Z_true)

        S_t = proj2dupli(R_t,
                         Z,
                         A,
                         u_b=max_val,
                         k_sparse=None,
                         include_main_diag=include_main_diag)

    return (S_t, Z)
Exemplo n.º 6
0
def ser_dupli_alt_clust3(A,
                         C,
                         seriation_solver='eta-trick',
                         n_iter=100,
                         n_clusters=1,
                         do_strong=False,
                         include_main_diag=True,
                         do_show=True,
                         Z_true=None,
                         cluster_interval=10,
                         enforce_sparsity=False):

    (n_, n1) = A.shape
    n2 = len(C)
    N = int(np.sum(C))
    assert (n_ == n1 and n_ == n2)

    if seriation_solver == 'mdso':
        my_solver = SpectralOrdering(norm_laplacian='random_walk')
    elif seriation_solver == 'eta-trick':
        my_solver = SpectralEtaTrick(n_iter=20)
    else:  # use basic spectral Algorithm from Atkins et. al.
        my_solver = SpectralBaseline()

    # Initialization
    Z = np.zeros((n_, N))
    jj = 0
    for ii in range(n_):  # TODO : make this faster ?
        Z[ii, jj:jj + C[ii]] = 1
        jj += C[ii]
    dc = np.diag(1. / C)

    S_t = Z.T @ dc @ A @ dc @ Z

    max_val = A.max()

    perm_tot = np.arange(N)

    bps_exists = False
    # Iterate
    for it in range(n_iter):
        # S_old
        # S_t -= S_t.min()  # to make sure it is non-negative after linprog
        # print(S_t.min())
        permu = my_solver.fit_transform(S_t - S_t.min())

        is_identity = (np.all(permu == np.arange(N))
                       or np.all(permu == np.arange(N)[::-1]))
        # if is_identity:
        #     break

        # S_tp = S_t[permu, :][:, permu]
        S_tp = S_t.copy()[permu, :]
        S_tp = S_tp.T[permu, :].T

        # if False:  #(it % cluster_interval == 0) and (it > 0):
        #     R_clus, p2 = clusterize_mat(S_tp, n_clusters, reord_mat=False)
        # else:
        #     R_clus = S_tp
        #     p2 = np.arange(N)

        # R_clus = R_clus[p2, :]
        # R_clus = R_clus.T[:, p2].T

        # permu = permu[p2]

        if (it % cluster_interval == 0) and (it > 10000):
            # R_clus, p2 = clusterize_mat(S_tp, n_clusters, reord_mat=True)
            R_clus, p2 = simple_clusters(S_tp, n_clusters, reord_clusters=True)
            R_clus = R_clus[p2, :]
            R_clus = R_clus.T[p2, :].T
            # R_clus = clusterize_mat(S_tp, n_clusters, reord_mat=False)
            # p2 = np.arange(N)
        else:
            R_clus = S_tp
            # R_clus = simple_clusters(S_tp, n_clusters)
            p2 = np.arange(N)

        permu = permu[p2]

        R_t = proj2Rmat(R_clus,
                        do_strong=do_strong,
                        include_main_diag=include_main_diag,
                        verbose=0,
                        u_b=max_val)
        print(R_t.min())

        if (it % cluster_interval == 0) and (it > 0):
            # R_clus = clusterize_mat(R_t, n_clusters, reord_mat=False)
            reord_clusters = True
            if reord_clusters:
                (R_clus, p2, bps) = simple_clusters(R_t,
                                                    n_clusters,
                                                    reord_clusters=True,
                                                    return_breakpoints=True)
            else:
                R_clus, bps = simple_clusters(R_t,
                                              n_clusters,
                                              reord_clusters=False,
                                              return_breakpoints=True)
                p2 = np.arange(N)
            permu = permu[p2]
            bps_exists = True
        else:
            R_clus = R_t

        # R_t -= R_t.min()

        Z = Z[:, permu]

        # Flip sub-orderings in clusters if Z_true provided
        mean_dist, _, is_inv = eval_assignments(Z, Z_true)
        print("before rearranging clusters, mean dist : %1.2f" % (mean_dist))
        if (Z_true is not None) and (bps_exists):
            n_clusters = len(bps) - 1
            for k_ in range(n_clusters):
                Zbis = Z.copy()
                in_clst = np.arange(bps[k_], bps[k_ + 1])
                if not in_clst.size:
                    print("empty cluster!")
                    continue
                mean_dist1, _, _ = eval_assignments(Zbis, Z_true)
                Zbis[:, in_clst] = Zbis[:, in_clst[::-1]]
                mean_dist2, _, _ = eval_assignments(Zbis, Z_true)
                if mean_dist2 < mean_dist1:
                    Z[:, in_clst] = Z[:, in_clst[::-1]]
                    permu[in_clst] = permu[in_clst[::-1]]
                # if is_inv:
                #     Z[:, in_clst] = Z[:, in_clst][:, ::-1]
                #     permu[in_clst] = permu[in_clst][::-1]

            mean_dist, _, is_inv = eval_assignments(Z, Z_true)
            print("after rearranging clusters, mean dist : %1.2f" %
                  (mean_dist))

        perm_tot = perm_tot[permu]

        # r_clus_sym = is_symmetric(R_clus)
        # r_sym = is_symmetric(R_t)
        # s_sym = is_symmetric(S_tp)
        # print(r_clus_sym, r_sym, s_sym)

        if do_show:
            title = "iter {}".format(int(it))
            if Z_true is not None:
                mean_dist, _, is_inv = eval_assignments(Z, Z_true)
                title += " mean dist {}".format(mean_dist)
                if is_inv:
                    Z = Z[:, ::-1]
            visualize_mat(R_clus, S_tp, R_t, Z, permu, title, Z_true=Z_true)

        S_t = proj2dupli(R_clus,
                         Z,
                         A,
                         u_b=max_val,
                         k_sparse=enforce_sparsity,
                         include_main_diag=include_main_diag)

    return (S_t, Z, R_clus, S_tp)
Exemplo n.º 7
0
def ser_dupli_alt(A,
                  C,
                  seriation_solver='eta-trick',
                  n_iter=100,
                  do_strong=False,
                  include_main_diag=True,
                  do_show=True,
                  Z_true=None):

    (n_, n1) = A.shape
    n2 = len(C)
    N = int(np.sum(C))
    assert (n_ == n1 and n_ == n2)

    if seriation_solver == 'mdso':
        my_solver = SpectralOrdering(norm_laplacian='random_walk')
    elif seriation_solver == 'eta-trick':
        my_solver = SpectralEtaTrick(n_iter=10)
    else:  # use basic spectral Algorithm from Atkins et. al.
        my_solver = SpectralBaseline()

    # Initialization
    Z = np.zeros((n_, N))
    jj = 0
    for ii in range(n_):  # TODO : make this faster ?
        Z[ii, jj:jj + C[ii]] = 1
        jj += C[ii]
    dc = np.diag(1. / C)

    S_t = Z.T @ dc @ A @ dc @ Z

    max_val = A.max()

    perm_tot = np.arange(N)

    # Iterate
    for it in range(n_iter):
        # S_old
        # S_t -= S_t.min()  # to make sure it is non-negative after linprog
        # print(S_t.min())
        permu = my_solver.fit_transform(S_t)

        is_identity = (np.all(permu == np.arange(N))
                       or np.all(permu == np.arange(N)[::-1]))
        # if is_identity:
        #     break

        # S_tp = S_t[permu, :][:, permu]
        S_tp = S_t.copy()[permu, :]
        S_tp = S_tp.T[permu, :].T

        R_t = proj2Rmat(S_tp,
                        do_strong=do_strong,
                        include_main_diag=include_main_diag,
                        verbose=0,
                        u_b=max_val)
        print(R_t.min())
        # R_t -= R_t.min()

        Z = Z[:, permu]

        perm_tot = perm_tot[permu]

        if do_show:
            title = "iter {}".format(int(it))
            if Z_true is not None:
                mean_dist, _, is_inv = eval_assignments(Z, Z_true)
                title += " mean dist {}".format(mean_dist)
                if is_inv:
                    Z = Z[:, ::-1]
            visualize_mat(S_t, S_tp, R_t, Z, permu, title, Z_true=Z_true)

        S_t = proj2dupli(R_t,
                         Z,
                         A,
                         u_b=max_val,
                         k_sparse=None,
                         include_main_diag=include_main_diag)

    return (S_t, Z)
Exemplo n.º 8
0
    ax = fig.add_subplot(111)
    plt.scatter(true_inv_perm[iis], true_inv_perm[jjs])

# Parameters for Spectral Ordering
apply_perm = False  # whether to randomly permute the matrix, so that the
# ground truth is not the trivial permutation (1, ..., n).
circular = True

# Call Spectral Ordering method
reord_method = SpectralOrdering(dim=dim,
                                k_nbrs=k_nbrs,
                                circular=circular,
                                scaled=scaled,
                                type_laplacian=type_lap,
                                verb=1,
                                type_new_sim='exp',
                                norm_local_diss=False,
                                norm_sim=False,
                                merge_if_ccs=False,
                                min_cc_len=min_cc_len,
                                do_eps_graph=False,
                                eps_val=95)
# Run the spectral ordering method on the DNA reads similarity matrix
global_perm = reord_method.fit_transform(new_mat)
# sim_mat = new_mat

if do_plots:
    ebd = reord_method.embedding
    fig = plt.figure()
    ax = Axes3D(fig)
    ax.scatter(ebd[:, 0], ebd[:, 1], ebd[:, 2])
Exemplo n.º 9
0
circular = True if type_similarity[0] == 'C' else False  # circular or linear
scaled = 'heuristic'  # whether or not to scale the coordinates of the
# embedding so that the larger dimensions have fewer importance

# Build data matrix
data_gen = SimilarityMatrix()
data_gen.gen_matrix(n,
                    type_matrix=type_similarity,
                    apply_perm=apply_perm,
                    noise_ampl=ampl_noise,
                    law=type_noise)

# Call Spectral Ordering method
reord_method = SpectralOrdering(n_components=n_components,
                                k_nbrs=k_nbrs,
                                circular=circular,
                                scale_embedding=scaled,
                                norm_laplacian='random_walk')
my_perm = reord_method.fit_transform(data_gen.sim_matrix)
reord_method.new_sim = reord_method.new_sim.toarray()
# reord_method.fit(data_gen.sim_matrix)

score = evaluate_ordering(my_perm, data_gen.true_perm, circular=circular)
print("Kendall-Tau score = {}".format(score))

inv_perm = np.argsort(data_gen.true_perm)
# Display some results
fig, axes = plt.subplots(2, 2)
axes[0, 0].tick_params(
    axis='x',  # changes apply to the x-axis
    which='both',  # both major and minor ticks are affected
Exemplo n.º 10
0
circular = False  # whether we are running Circular or Linear Seriation
scaled = True  # whether or not to scale the coordinates of the embedding so
# that the larger dimensions have fewer importance

# Build data matrix
data_gen = MatrixGenerator()
data_gen.gen_matrix(n,
                    type_matrix=type_similarity,
                    apply_perm=apply_perm,
                    noise_ampl=ampl_noise,
                    law=type_noise)

# Call Spectral Ordering method
reord_method = SpectralOrdering(dim=dim,
                                k_nbrs=k_nbrs,
                                circular=circular,
                                scaled=scaled,
                                type_laplacian='random_walk')
my_perm = reord_method.fit_transform(data_gen.sim_matrix)
# reord_method.fit(data_gen.sim_matrix)

score = evaluate_ordering(my_perm, data_gen.true_perm, circular=circular)
print("Kendall-Tau score = {}".format(score))

inv_perm = inverse_perm(data_gen.true_perm)
# Display some results
fig, axes = plt.subplots(2, 2)
axes[0, 0].tick_params(
    axis='x',  # changes apply to the x-axis
    which='both',  # both major and minor ticks are affected
    bottom=False,  # ticks along the bottom edge are off
Exemplo n.º 11
0
def run_one_exp(n,
                k,
                dim,
                ampl,
                type_matrix,
                n_avrg,
                type_noise='gaussian',
                norm_laplacian='unnormalized',
                norm_adjacency=False,
                scale_embedding='heuristic',
                embedding_method='spectral'):
    """
    Run n_avrg experiments for a given set of parameters, and return the mean
    kendall-tau score and the associated standard deviation (among the
    n_avrg instances).
    """

    if type_matrix[0] == 'L':
        circular = False
    elif type_matrix[0] == 'C':
        circular = True
    else:
        raise ValueError("type matrix must be in ['LinearBanded',"
                         "'CircularBanded', 'LinearStrongDecrease',"
                         "'CircularStrongDecrease']")
    # Create matrix generator
    data_gen = SimilarityMatrix()
    # Create spectral solver
    if embedding_method == 'TSNE':
        reord_method = SpectralOrdering(n_components=2,
                                        k_nbrs=k,
                                        norm_adjacency=norm_adjacency,
                                        norm_laplacian=norm_laplacian,
                                        scale_embedding=scale_embedding,
                                        circular=circular,
                                        merge_if_ccs=True,
                                        embedding_method=embedding_method)
    else:
        reord_method = SpectralOrdering(n_components=dim,
                                        k_nbrs=k,
                                        norm_adjacency=norm_adjacency,
                                        norm_laplacian=norm_laplacian,
                                        scale_embedding=scale_embedding,
                                        circular=circular,
                                        merge_if_ccs=True,
                                        embedding_method=embedding_method)

    # Initialize array of results
    scores = np.zeros(n_avrg)
    for i_exp in range(n_avrg):
        np.random.seed(i_exp)
        data_gen.gen_matrix(n,
                            type_matrix=type_matrix,
                            apply_perm=True,
                            noise_ampl=ampl,
                            law=type_noise)
        this_perm = reord_method.fit_transform(data_gen.sim_matrix)
        scores[i_exp] = evaluate_ordering(this_perm,
                                          data_gen.true_perm,
                                          circular=circular)

    return (scores.mean(), scores.std(), scores)
Exemplo n.º 12
0
dim = 5  # number of dimensions of the embedding
circular = True  # whether we are running Circular or Linear Seriation
scaled = 'CTD'  # whether or not to scale the coordinates of the embedding so
# that the larger dimensions have fewer importance
type_lap = 'unnormalized'  # Remark : we have observed stranged (and poor)
# results with the normalized Laplacians
min_cc_len = 10  # Drop the tiny connected components

# Call Spectral Ordering method
reord_method = SpectralOrdering(dim=dim,
                                k_nbrs=k_nbrs,
                                circular=circular,
                                scaled=scaled,
                                type_laplacian=type_lap,
                                verb=1,
                                type_new_sim='exp',
                                norm_local_diss=False,
                                norm_sim=False,
                                merge_if_ccs=True,
                                min_cc_len=min_cc_len,
                                do_eps_graph=True,
                                preprocess_only=True)
# Run the spectral ordering method on the DNA reads similarity matrix
t0 = time()
reord_method.fit(new_mat)
my_ebd = reord_method.embedding
tme = time() - t0

print("my embedding in {}s".format(tme))

skl_method = SpectralEmbedding(n_components=dim, affinity='precomputed')
Exemplo n.º 13
0
true_inv_perm = np.argsort(true_perm)

# Set parameters for Spectral Ordering method
scale_embedding = False
k_nbrs = 20
circular = True
eigen_solver = 'amg'  # faster than arpack on large sparse matrices.
# requires pyamg package (conda install pyamg or pip install pyamg)
norm_adjacency = 'coifman'  # yields better results in practice
norm_laplacian = False  # normalization of the laplacian seems to mess things
# up for large sparse matrices
merge_if_ccs = True  # the new similarity matrix may be disconnected
reord_method = SpectralOrdering(scale_embedding=scale_embedding,
                                k_nbrs=k_nbrs,
                                circular=circular,
                                eigen_solver=eigen_solver,
                                norm_adjacency=norm_adjacency,
                                norm_laplacian=norm_laplacian,
                                merge_if_ccs=merge_if_ccs,
                                n_components=8)
# Run the method
reord_method.fit(new_mat)

# Plot the laplacian embedding
embedding = reord_method.embedding
fig = plt.figure()
ax = Axes3D(fig)
ax.scatter(embedding[:, 0], embedding[:, 1], embedding[:, 2], c=true_inv_perm)
# plt.title("3d embedding of DNA overlap based similarity matrix")
ax.set_xlabel(r'$f_1$', fontsize=18)
ax.set_ylabel(r'$f_2$', fontsize=18)
ax.set_zlabel(r'$f_3$', fontsize=18)