Пример #1
0
def optimize_global_layout(
    P,
    Z,
    a,
    b,
    alpha,
    n_epochs,
    verbose=False,
    savefig=False,
    label=None
):

    costs = []

    for i in range(n_epochs):

        d_squared = np.square(adjacency_matrix(Z))
        z_diff = np.expand_dims(Z, axis=1) - np.expand_dims(Z, axis=0)
        d_inverse = np.expand_dims(pow(1 + a * d_squared ** b, -1), axis=2)

        # Q is the normalized distance in low dimensional space 
        Q = pow(0.001 + d_squared, -1)
        np.fill_diagonal(Q, 0)
        Q = np.dot(1 - P, Q)
        Q /= np.sum(Q, axis=1, keepdims=True)
        # Q /= Q.max()

        grad = np.expand_dims(
            2 * a * b * P * (1e-12 + d_squared) ** (b - 1) - 2 * b * Q, axis=2
        )
        dZ = np.sum(grad * z_diff * d_inverse, axis=1)
        Z -= alpha * dZ

        if verbose:
            tmpZ = Z / np.sum(Z, axis=1, keepdims=True)
            cost = get_CE(P, tmpZ, d_squared, a, b)
            # cost = get_DTM(P, tmpZ, sigma=0.1)
            costs.append(cost)
            print(
                f"[INFO] Current loss: {cost:.6f}, @ iteration: {i+1}/{n_epochs}"
            )

        if savefig:
            if i < 10 or i == 30:
                from umato.umato_ import plot_tmptmp
                plot_tmptmp(data=Z, label=label, name=f"pic1_global{i}")

    return Z
Пример #2
0
        for i in range(len(learning_rate)):
            for j in range(len(perplexity)):

                # run TSNE
                y = TSNE(n_components=args.dim,
                         perplexity=perplexity[j],
                         learning_rate=learning_rate[i],
                         init="pca",
                         n_jobs=-1,
                         random_state=0,
                         verbose=2).fit_transform(x)

                # save as csv
                path = os.path.join(os.getcwd(), "visualization", "public",
                                    "results", args.data)
                save_csv(path,
                         alg_name=f"tsne_{perplexity[j]}_{learning_rate[i]}",
                         data=y,
                         label=label)
    else:
        y = TSNE(n_components=args.dim,
                 random_state=0,
                 verbose=2,
                 init="pca",
                 n_jobs=-1).fit_transform(x)
        path = os.path.join(os.getcwd(), "visualization", "public", "results",
                            args.data)
        save_csv(path, alg_name="tsne", data=y, label=label)
        plot_tmptmp(y, label, "tsne")
Пример #3
0
def nn_layout_optimize(
    head_embedding,
    tail_embedding,
    head,
    tail,
    hub_info,
    n_epochs,
    n_vertices,
    epochs_per_sample,
    a,
    b,
    rng_state,
    gamma,
    learning_rate=1.0,
    negative_sample_rate=5.0,
    parallel=False,
    verbose=False,
    k=0,
    label=None,
):

    (_, dim) = head_embedding.shape
    move_other = head_embedding.shape[0] == tail_embedding.shape[0]
    alpha = learning_rate

    epochs_per_negative_sample = epochs_per_sample / negative_sample_rate
    epoch_of_next_negative_sample = epochs_per_negative_sample.copy()
    epoch_of_next_sample = epochs_per_sample.copy()

    optimize_fn = numba.njit(
        _nn_layout_optimize_single_epoch, fastmath=True, parallel=parallel
    )
    for n in range(n_epochs):
        optimize_fn(
            head_embedding,
            tail_embedding,
            head,
            tail,
            hub_info,
            n_vertices,
            epochs_per_sample,
            a,
            b,
            rng_state,
            gamma,
            dim,
            move_other,
            alpha,
            epochs_per_negative_sample,
            epoch_of_next_negative_sample,
            epoch_of_next_sample,
            n,
        )

        alpha = learning_rate * (1.0 - (float(n) / float(n_epochs)))

        if verbose and n % 10 == 0:
            from umato.umato_ import plot_tmptmp

            plot_tmptmp(data=head_embedding, label=label, name=f"pic3_{k}_local{n}")
            print("\tcompleted ", n, " / ", n_epochs, "epochs")

    return head_embedding
Пример #4
0
from umato.utils import init_position
import argparse

parser = argparse.ArgumentParser(description="args for umato")
parser.add_argument("--data", type=str, help="choose data: spheres, mnist, fmnist, kmnist, flow, swissroll, scurve, single-cell", default="allen")
parser.add_argument("--hub_num", type=int, help="choose number of hubs", default=400)
parser.add_argument("--n_samples", type=int, help="choose number of samples", default=1500)
parser.add_argument("--init", type=str, help="choose initialization method", default="pca")
args = parser.parse_args()


if __name__ == "__main__":

    x, label = get_data(args.data, n_samples=args.n_samples)
    y = umato.UMATO(verbose=True, ll=label, hub_num=args.hub_num).fit_transform(x)
    plot_tmptmp(y, label, f"umato")
    save_csv('./', alg_name=f"umato", data=y, label=label)

    # x = x[np.arange(0, 10000, 50)]
    # label = label[np.arange(0, 10000, 50)]

    # for epoch in [200, 500, 1000, 2000, 5000]:
    #     x, label = get_data(args.data, n_samples=args.n_samples)  # spheres, mnist, fmnist, kmnist
    #     y = umato.UMATO(verbose=True, ll=label, hub_num=args.hub_num, global_n_epochs=epoch).fit_transform(x)
    #     plot_tmptmp(y, label, f"umato_{args.data}_{epoch}")
    #     save_csv('./', alg_name=f"umato_{args.data}_{epoch}", data=y, label=label)

    # # UMTO
    # for dt in ['fmnist', 'mnist', 'kmnist']:
    #     # x = load_digits()  # (1797, 64 dim)
    #     x, label = get_data(dt, n_samples=args.n_samples)  # spheres, mnist, fmnist, kmnist
Пример #5
0
parser = argparse.ArgumentParser(description="UMAP embedding")
parser.add_argument("--data", type=str, help="choose dataset", required=True)
parser.add_argument("--dim", type=str, help="choose embedding dimension", default=2)
parser.add_argument("--init", type=str, help="choose initialization method", default="pca")
parser.add_argument("--hp", type=bool, help="whether to explore hyperparameter settings", default=False)
parser.add_argument("--n_samples", type=int, help="choose number of samples", default=1500)

args = parser.parse_args()


if __name__ == "__main__":

    x, label = get_data(args.data, n_samples=args.n_samples)
    y = UMAP(n_components=args.dim, verbose=True).fit_transform(x)
    plot_tmptmp(y, label, f"umap")
    save_csv('./', alg_name=f"umap", data=y, label=label)


    # if args.hp:
    #     # read data
    #     x, label = get_data(args.data, n_samples=args.n_samples)

    #     n_neighbor = np.arange(5, 55, 5)
    #     min_dist = np.arange(0, 1.1, 0.1)

    #     for i in range(len(n_neighbor)):
    #         for j in range(len(min_dist)):

    #             # run UMAP
    #             y = UMAP(n_components=args.dim, n_neighbors=n_neighbor[i], min_dist=min_dist[j], verbose=True).fit_transform(x)
Пример #6
0
import os
from .dataset import get_data, save_csv
from umato.umato_ import plot_tmptmp

parser = argparse.ArgumentParser(description="PCA embedding")
parser.add_argument("--data", type=str, help="choose dataset", required=True)
parser.add_argument("--dim",
                    type=str,
                    help="choose embedding dimension",
                    default=2)
parser.add_argument("--n_samples",
                    type=int,
                    help="choose number of samples",
                    default=1500)

args = parser.parse_args()

if __name__ == "__main__":

    # read data
    x, label = get_data(args.data, n_samples=args.n_samples)

    # run PCA
    y = PCA(n_components=args.dim).fit_transform(x)

    # save as csv
    path = os.path.join(os.getcwd(), "visualization", "public", "results",
                        args.data)
    plot_tmptmp(y, label, "pca")
    save_csv(path, alg_name="pca", data=y, label=label)