def optimize_global_layout( P, Z, a, b, alpha, n_epochs, verbose=False, savefig=False, label=None ): costs = [] for i in range(n_epochs): d_squared = np.square(adjacency_matrix(Z)) z_diff = np.expand_dims(Z, axis=1) - np.expand_dims(Z, axis=0) d_inverse = np.expand_dims(pow(1 + a * d_squared ** b, -1), axis=2) # Q is the normalized distance in low dimensional space Q = pow(0.001 + d_squared, -1) np.fill_diagonal(Q, 0) Q = np.dot(1 - P, Q) Q /= np.sum(Q, axis=1, keepdims=True) # Q /= Q.max() grad = np.expand_dims( 2 * a * b * P * (1e-12 + d_squared) ** (b - 1) - 2 * b * Q, axis=2 ) dZ = np.sum(grad * z_diff * d_inverse, axis=1) Z -= alpha * dZ if verbose: tmpZ = Z / np.sum(Z, axis=1, keepdims=True) cost = get_CE(P, tmpZ, d_squared, a, b) # cost = get_DTM(P, tmpZ, sigma=0.1) costs.append(cost) print( f"[INFO] Current loss: {cost:.6f}, @ iteration: {i+1}/{n_epochs}" ) if savefig: if i < 10 or i == 30: from umato.umato_ import plot_tmptmp plot_tmptmp(data=Z, label=label, name=f"pic1_global{i}") return Z
for i in range(len(learning_rate)): for j in range(len(perplexity)): # run TSNE y = TSNE(n_components=args.dim, perplexity=perplexity[j], learning_rate=learning_rate[i], init="pca", n_jobs=-1, random_state=0, verbose=2).fit_transform(x) # save as csv path = os.path.join(os.getcwd(), "visualization", "public", "results", args.data) save_csv(path, alg_name=f"tsne_{perplexity[j]}_{learning_rate[i]}", data=y, label=label) else: y = TSNE(n_components=args.dim, random_state=0, verbose=2, init="pca", n_jobs=-1).fit_transform(x) path = os.path.join(os.getcwd(), "visualization", "public", "results", args.data) save_csv(path, alg_name="tsne", data=y, label=label) plot_tmptmp(y, label, "tsne")
def nn_layout_optimize( head_embedding, tail_embedding, head, tail, hub_info, n_epochs, n_vertices, epochs_per_sample, a, b, rng_state, gamma, learning_rate=1.0, negative_sample_rate=5.0, parallel=False, verbose=False, k=0, label=None, ): (_, dim) = head_embedding.shape move_other = head_embedding.shape[0] == tail_embedding.shape[0] alpha = learning_rate epochs_per_negative_sample = epochs_per_sample / negative_sample_rate epoch_of_next_negative_sample = epochs_per_negative_sample.copy() epoch_of_next_sample = epochs_per_sample.copy() optimize_fn = numba.njit( _nn_layout_optimize_single_epoch, fastmath=True, parallel=parallel ) for n in range(n_epochs): optimize_fn( head_embedding, tail_embedding, head, tail, hub_info, n_vertices, epochs_per_sample, a, b, rng_state, gamma, dim, move_other, alpha, epochs_per_negative_sample, epoch_of_next_negative_sample, epoch_of_next_sample, n, ) alpha = learning_rate * (1.0 - (float(n) / float(n_epochs))) if verbose and n % 10 == 0: from umato.umato_ import plot_tmptmp plot_tmptmp(data=head_embedding, label=label, name=f"pic3_{k}_local{n}") print("\tcompleted ", n, " / ", n_epochs, "epochs") return head_embedding
from umato.utils import init_position import argparse parser = argparse.ArgumentParser(description="args for umato") parser.add_argument("--data", type=str, help="choose data: spheres, mnist, fmnist, kmnist, flow, swissroll, scurve, single-cell", default="allen") parser.add_argument("--hub_num", type=int, help="choose number of hubs", default=400) parser.add_argument("--n_samples", type=int, help="choose number of samples", default=1500) parser.add_argument("--init", type=str, help="choose initialization method", default="pca") args = parser.parse_args() if __name__ == "__main__": x, label = get_data(args.data, n_samples=args.n_samples) y = umato.UMATO(verbose=True, ll=label, hub_num=args.hub_num).fit_transform(x) plot_tmptmp(y, label, f"umato") save_csv('./', alg_name=f"umato", data=y, label=label) # x = x[np.arange(0, 10000, 50)] # label = label[np.arange(0, 10000, 50)] # for epoch in [200, 500, 1000, 2000, 5000]: # x, label = get_data(args.data, n_samples=args.n_samples) # spheres, mnist, fmnist, kmnist # y = umato.UMATO(verbose=True, ll=label, hub_num=args.hub_num, global_n_epochs=epoch).fit_transform(x) # plot_tmptmp(y, label, f"umato_{args.data}_{epoch}") # save_csv('./', alg_name=f"umato_{args.data}_{epoch}", data=y, label=label) # # UMTO # for dt in ['fmnist', 'mnist', 'kmnist']: # # x = load_digits() # (1797, 64 dim) # x, label = get_data(dt, n_samples=args.n_samples) # spheres, mnist, fmnist, kmnist
parser = argparse.ArgumentParser(description="UMAP embedding") parser.add_argument("--data", type=str, help="choose dataset", required=True) parser.add_argument("--dim", type=str, help="choose embedding dimension", default=2) parser.add_argument("--init", type=str, help="choose initialization method", default="pca") parser.add_argument("--hp", type=bool, help="whether to explore hyperparameter settings", default=False) parser.add_argument("--n_samples", type=int, help="choose number of samples", default=1500) args = parser.parse_args() if __name__ == "__main__": x, label = get_data(args.data, n_samples=args.n_samples) y = UMAP(n_components=args.dim, verbose=True).fit_transform(x) plot_tmptmp(y, label, f"umap") save_csv('./', alg_name=f"umap", data=y, label=label) # if args.hp: # # read data # x, label = get_data(args.data, n_samples=args.n_samples) # n_neighbor = np.arange(5, 55, 5) # min_dist = np.arange(0, 1.1, 0.1) # for i in range(len(n_neighbor)): # for j in range(len(min_dist)): # # run UMAP # y = UMAP(n_components=args.dim, n_neighbors=n_neighbor[i], min_dist=min_dist[j], verbose=True).fit_transform(x)
import os from .dataset import get_data, save_csv from umato.umato_ import plot_tmptmp parser = argparse.ArgumentParser(description="PCA embedding") parser.add_argument("--data", type=str, help="choose dataset", required=True) parser.add_argument("--dim", type=str, help="choose embedding dimension", default=2) parser.add_argument("--n_samples", type=int, help="choose number of samples", default=1500) args = parser.parse_args() if __name__ == "__main__": # read data x, label = get_data(args.data, n_samples=args.n_samples) # run PCA y = PCA(n_components=args.dim).fit_transform(x) # save as csv path = os.path.join(os.getcwd(), "visualization", "public", "results", args.data) plot_tmptmp(y, label, "pca") save_csv(path, alg_name="pca", data=y, label=label)