def display_latent_code( latent_code: np.ndarray, labels: np.ndarray, title: str, seed: int ) -> None: """ Plots the computed latent code representation for the features. Parameters ---------- latent_code: np.ndarray The latent code representation for features. labels: np.ndarray The labels for the dataset features. title: str The plot title to use. seed: int The pseudorandom seed to use for reproducible t-SNE visualization. """ tsne_encoder = TSNE(random_seed=seed, perplexity=50, learning_rate=10, n_iter=5000) latent_code = tsne_encoder.fit_transform(latent_code) sns.set_style("darkgrid") plt.scatter(latent_code[:, 0], latent_code[:, 1], c=labels, marker="o") plt.title(title) plt.grid() plt.savefig(fname=f"data/{title}.png", dpi=150) plt.show()
def apply_tsne(act, perplexity, n_iter): time_start = time.time() tsne = TSNE(n_components=2, verbose=1, perplexity=perplexity, n_iter=n_iter) tsne_results = tsne.fit_transform(act) # del act print(f"Time elapsed: {time.time() - time_start} seconds") return tsne_results
def compute_tsne(X, plot=True): tsne_model = TSNE(n_components=2, perplexity=40, learning_rate=100, verbose=10) tsne_Y = tsne_model.fit_transform(tsne_vectors) if plot: fig = plt.figure(figsize=(10, 10)) ax = fig.gca() ax.scatter(tsne_Y[:, 1], tsne_Y[:, 0], c=tsne_labels, s=1, cmap='hsv')
def write_tsne(label_dict, embedding, extension="png", tsne_model=None): """ base:https://medium.com/analytics-vidhya/super-fast-tsne-cuda-on-kaggle-b66dcdc4a5a4 """ if tsne_model is None: tsne_model = TSNE() x_embedding = tsne_model.fit_transform(embedding) for key in label_dict: label = label_dict[key] embedding_and_label = pd.concat([pd.DataFrame(x_embedding), pd.DataFrame(data=label,columns=["label"])], axis=1) sns.FacetGrid(embedding_and_label, hue="label", height=6).map(plt.scatter, 0, 1).add_legend() plt.savefig("{}.{}".format(key,extension)) plt.clf() plt.close('all')
def reduce_dimensions(model, perplexity): num_dimensions = 2 # final num dimensions (2D, 3D, etc) vectors = [] # positions in vector space labels = [] # keep track of words to label our data again later for word in model.wv.vocab: vectors.append(model.wv[word]) labels.append(word) # convert both lists into numpy vectors for reduction vectors = np.asarray(vectors) labels = np.asarray(labels) # reduce using t-SNE vectors = np.asarray(vectors) tsne = TSNE(n_components=num_dimensions, perplexity=perplexity) vectors = tsne.fit_transform(vectors) x_vals = [v[0] for v in vectors] y_vals = [v[1] for v in vectors] print(x_vals[:5]) return x_vals, y_vals, labels
def apply_tsne(j): idx, md5, x = j tsne = TSNE(**kwargs) return (idx, md5, tsne.fit_transform(x))
# the predictions would have a hazy circle, since the best guess would be the # mean of all the rotated digits. Since we don't rotate our view2 images, we # instead get something that's only a bit hazy around the edges -- corresonding # to the mean of all the non-rotated digits. # Next let's visualize our 20d test embeddings with T-SNE and see if they # represent our original underlying representation -- the digits from 0-9 -- of # which we made two views of. In the perfect scenario, each of the 10,000 # vectors of our test embedding would be one of ten vectors, representing the # digits from 0-9. (Our network wouldn't do this, as it tries to reconstruct # each unique view1 image exactly). In lieu of this we can hope for embedding # vectors corresponding to the same digits to be closer together. tsne = TSNE() tsneEmbeddings = tsne.fit_transform(testEmbed) def plot2DEmbeddings(embeddings, labels): pointColors = [] origColors = [ [55, 55, 55], [255, 34, 34], [38, 255, 38], [10, 10, 255], [255, 12, 255], [250, 200, 160], [120, 210, 180], [150, 180, 205], [210, 160, 210], [190, 190, 110] ] origColors = (np.array(origColors)) / 255 for l in labels.cpu().numpy(): pointColors.append(tuple(origColors[l].tolist())) fig, ax = plt.subplots()
data = [] target = [] subdirectories = { dir_: idx for (idx, dir_) in enumerate(os.listdir(bboxForTsne_path)) if os.path.isdir(os.path.join(bboxForTsne_path, dir_)) } for sub_ in subdirectories.keys(): for img_ in imread_collection( os.path.join(bboxForTsne_path, sub_) + '/*.jpg'): target.append(sub_) data.append(resize(img_, (300, 300)).ravel()) return np.array(data), target, subdirectories data, target, target_num = load_images_from_subdirectories() tsne = TSNE() data_tsne = tsne.fit_transform(data) #np.save('tsne.npy', tsne) for x, y, tg in zip(data_tsne[:, 0], data_tsne[:, 1], target): plt.scatter(x, y, c=color[target_num[tg]]) plt.xlim(data_tsne[:, 0].min(), data_tsne[:, 0].max()) # 최소, 최대 plt.ylim(data_tsne[:, 1].min(), data_tsne[:, 1].max()) # 최소, 최대 plt.xlabel('t-SNE 특성0') # x축 이름 plt.ylabel('t-SNE 특성1') # y축 이름 plt.savefig('./result.png')
y_s = y_s.cuda() x_t = x_t.cuda() optimizer.zero_grad() # optimizer_ad.zero_grad() ########### Networks Forward Propagation f_s, p_s = model(x_s) f_t, p_t = model(x_t) features = torch.cat((f_s, f_t), dim=0) outputs = torch.cat((p_s, p_t), dim=0) loss = nn.CrossEntropyLoss()(outputs.narrow(0, 0, x_s.size(0)), y_s) ### TSNE tsne_model = TSNE(learning_rate=100) transformed = tsne_model.fit_transform(f_s.detach()) # transformed = tsne_model.fit_transform(f_s.detach().cpu()) X_embedded = TSNE(n_components=2, perplexity=15, learning_rate=10).fit_transform(f_s.detach()) xs = transformed[:, 0] ys = transformed[:, 1] fig = plt.figure() plt.scatter(xs, ys, c=y_s.cpu()) fig.savefig('f_s_tsne.png') pdb.set_trace() writer.add_figure('f_s_tsne', fig, niter)
embed_set = "embedding" embed_dir = "stored_data/embeddings" embed_dir = os.path.join(embed_dir, embed_name, "embeddings") create_folder(embed_dir) fig_dir = os.path.join(embed_dir, "figures") create_folder(fig_dir) df_emb, embeddings = calculate_embedding(test_dataset, emb_model, savedir=os.path.join(embed_dir, embed_set), concatenate="append") print(embeddings.mean()) print(embeddings.var()) embeddings = sklearn.preprocessing.StandardScaler().fit_transform(embeddings.reshape(embeddings.shape[0], -1)) print("normalized") print(embeddings.mean()) print(embeddings.var()) df_emb = df_emb.fillna("") tsne = TSNE() tsne_emb = tsne.fit_transform(X=embeddings.reshape(embeddings.shape[0], -1)) tsne_plots(tsne_emb, df_emb, savefig=os.path.join(fig_dir, embed_set)) scater_valid_rat = scatter_ratio(embeddings.reshape(embeddings.shape[0], -1), df_emb.reset_index()) silhouette_valid_score = sklearn.metrics.silhouette_score( embeddings.reshape(embeddings.shape[0], -1), df_emb.event_labels, metric='euclidean') LOG.info("Valid silhouette for all classes in 2D (tsne) : {}".format( sklearn.metrics.silhouette_score(df_emb[["X", "Y"]], df_emb.event_labels, metric='euclidean'))) embed_dir = "stored_data/embeddings" embed_dir = os.path.join(embed_dir, embed_name) create_folder(embed_dir) np.save(os.path.join(embed_dir, "embed" + str(epoch_model)), embeddings) test_fr.to_csv(os.path.join(embed_dir, "df" + str(epoch_model)), sep="\t", index=False)
def tsne_grid(val_loader, model): # Generate t-sne-based matrix of images features = [] images = [] for i, (input, _, index, names) in enumerate(val_loader): index = index.cuda(async=True) input_var = torch.autograd.Variable(input) index_var = torch.autograd.Variable(index) # compute output feature = model(input_var) feature = feature.cpu() for i in range(feature.data.numpy().shape[0]): images.append(input.numpy()[i, ...]) features.append(feature.data.numpy()[i, :]) print(len(features)) print(np.array(images).shape) print(np.array(features).shape) img_collection = np.moveaxis(np.array(images), 1, -1) print(img_collection.shape) size = 45 perplexity = 20 tsne_iter = 5000 print("Running tsne...") tsne = TSNE(perplexity=perplexity, n_components=2, init='random', n_iter=tsne_iter) X_2d = tsne.fit_transform(np.array(features)[0:size * size, :]) print("tsne complete. Normalizing...") X_2d -= X_2d.min(axis=0) X_2d /= X_2d.max(axis=0) print("Normalization complete. Creating plot...") grid = np.dstack( np.meshgrid(np.linspace(0, 1, size), np.linspace(0, 1, size))).reshape(-1, 2) cost_matrix = cdist(grid, X_2d, "sqeuclidean").astype(np.float32) cost_matrix = cost_matrix * (100000 / cost_matrix.max()) _, row_asses, col_asses = lapjv(cost_matrix) grid_jv = grid[col_asses] out = np.ones((size * 224, size * 224, 3)) for pos, img in zip(grid_jv, img_collection[0:size * size]): h_range = int(np.floor(pos[0] * (size - 1) * 224)) w_range = int(np.floor(pos[1] * (size - 1) * 224)) out[h_range:h_range + 224, w_range:w_range + 224] = image.img_to_array(img) print("plot complete. Saving gridded plot...") im = image.array_to_img(out) im.save('UFL_TSNE_GRID.jpeg', quality=100) print("Gridded plot saved!") out = np.zeros((size * 224, size * 224, 3)) for pos, img in zip(X_2d, img_collection[0:size * size]): h_range = int(pos[0] * (size - 1) * 224) w_range = int(pos[1] * (size - 1) * 224) out[h_range:h_range + 224, w_range:w_range + 224] = image.img_to_array(img) print("plot complete. Saving cloud plot...") im = image.array_to_img(out) im.save('UFL_TSNE_CLOUD.jpeg', quality=100) print("Cloud plot saved!")
# verbose=2) verbose=rsfk_verbose) # t = time.time() - init_t t = rsfk._last_search_time # Ignore data initialization time nne_rate = quality_function(real_indices, indices, real_sqd_dist, dist, max_k=K) time_list.append(t) print("RSFK Time: {}".format(t), flush=True) print("RSFK NNP: {}".format(nne_rate), flush=True) p = tsne.fit_transform(dataX, pre_knn=(indices, dist)) tsne_nne = get_nne_rate_tsne(dataX, p, max_k=K, pre_knn=(real_sqd_dist, real_indices)) quality_list.append([nne_rate, tsne_nne]) print("TSNE NNE: {}".format(tsne_nne)) # plot_emb(p, # fig_name="knn_experiment2/{}_K{}_{}trees".format(dataset_name, K, n_trees), # fig_title="t-SNE result with KNN error = {}\n".format(tsne_nne)+ # r"$R_{\mathrm{NX}}(K)$ = "+str(nne_rate)) plot_emb(p, fig_name="knn_experiment2/{}_K{}_{}trees".format(
from keras.datasets import mnist from tsnecuda import TSNE import matplotlib.pyplot as plt (x_train, y_train), (x_test, y_test) = mnist.load_data() print(y_train.shape) print(x_train.shape) tsne = TSNE(n_iter=1000, verbose=1, num_neighbors=64) tsne_results = tsne.fit_transform(x_train.reshape(60000, -1)) print(tsne_results.shape) # Create the figure fig = plt.figure(figsize=(8, 8)) ax = fig.add_subplot(1, 1, 1, title='TSNE') # Create the scatter ax.scatter(x=tsne_results[:, 0], y=tsne_results[:, 1], c=y_train, cmap=plt.cm.get_cmap('Paired'), alpha=0.4, s=0.5) plt.show()
def figTwoManifold(fileList, bc_1, bc_2, output_file_name="default", labels=None, gen_random=True): X = [] X_indices = [] j = 0 # labels = [] for i, file in enumerate(fileList): unique_sequences = {} print(file) # labels.append(i) pat_start = r"(" + bc_1 + ")" pat_start = re.compile(pat_start) pat_end = r"(" + bc_2[:-3] + ")" pat_end = re.compile(pat_end) tmp_indices = [j,j] c = 0 for h,seq in readFasta(file): if 'N' in seq: continue search_start = pat_start.search(seq) search_end = pat_end.search(seq) if search_start and search_end: seq_trimmed = seq[search_start.span()[1]:search_end.span()[0]] # seq_full = bc_1 + seq_trimmed + bc_2 seq_full = seq_trimmed if len(seq_full) > 45: continue # enc = encode(seq_trimmed) # x_gp.append(enc) # y_gp.append(i) # print(seq_trimmed) if seq_full in unique_sequences: unique_sequences[seq_full] += 1 else: unique_sequences[seq_full] = 1 c+=1 # if c > 100000: # break for seq in unique_sequences: enc = encode(seq) X.append(enc) j+=1 tmp_indices[1] = j X_indices.append(tmp_indices) print(len(unique_sequences)) if gen_random: print("Random") tmp_indices = [j,j] for k in range(2000000): seq = generate_random_rna(random.randint(32, 42)) enc = encode(seq) X.append(enc) j+=1 tmp_indices[1] = j X_indices.append(tmp_indices) print(k) ## Add specials. print("Specials") tmp_indices = [j,j] wt_seq = encode("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA") X.append(wt_seq) j+=1 selected_seq = encode("GAAGGAAGAAAATGCAGAAAAAAAGAAAAAAATGTCTGG") X.append(selected_seq) j+=1 for h, seq in readFasta("T5_R8_hits_strict.fasta"): search_start = pat_start.search(seq) search_end = pat_end.search(seq) if search_start and search_end: seq_trimmed = seq[search_start.span()[1]:search_end.span()[0]] X.append(encode(seq_trimmed)) j+=1 print(seq_trimmed) tmp_indices[1] = j X_indices.append(tmp_indices) # print(X_indices[-1][0]+1) # exit() X = np.array(X) print(X.shape) print(X_indices) # exit() plt.figure(figsize=(12, 6)) ## Look at manifolds. # pca = PCA(n_components=2).fit(X) # pca_all = TSNE(n_components=2).fit_transform(X) # pca = TruncatedSVD(n_components=2, n_iter=7, random_state=42).fit(X) # pca_all = pca.transform(X) # tsne = TSNE( # perplexity=50, # metric="cosine", # initialization="pca", # n_jobs=16, # random_state=42, # verbose=True # ) # pca_fit = tsne.fit(X) # pca = pca_fit.transform(X) tsne = TSNE( perplexity=50, metric="euclidean", random_seed=42, verbose=True ) pca = tsne.fit_transform(X) # pca = umap.UMAP(n_neighbors=30, # n_components=2, # random_state=42) # pca.fit(X) n_col = 5 n_row = 2 cmap = sns.color_palette("tab10", 8) for i in range(len(X_indices)-1): idx_grp = X_indices[i] plt.subplot(n_row, n_col, i+1) # pca_all = tsne_fit.transform(x_gp) # pca_all = tsne.fit_transform(x_gp, pca) print("Plotting indices: %d:%d" %(idx_grp[0], idx_grp[1])) if labels == None: plt.title("R%d" % (i)) else: plt.title(labels[i]) # plt.scatter(pca_all[:, 0], pca_all[:, 1], s=3, c="slategrey") sns.kdeplot(x=pca[idx_grp[0]:idx_grp[1], 0], y=pca[idx_grp[0]:idx_grp[1], 1], cmap="viridis", fill=True) # print(encode("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")) # wt = pca.transform(encode(bc_1 + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + bc_2).reshape(1,-1)) # selected = pca.transform(encode(bc_1 + "GAAGGAAGAAAATGCAGAAAAAAAGAAAAAAATGTCTGG" + bc_2).reshape(1,-1)) # wt = tsne.fit_transform(encode("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA").reshape(1,-1)) # selected = tsne.fit_transform(encode("GAAGGAAGAAAATGCAGAAAAAAAGAAAAAAATGTCTGG").reshape(1,-1)) wt_index = X_indices[-1][0] selected_index = X_indices[-1][0]+1 plt.scatter(pca[wt_index, 0], pca[wt_index, 1], s=10, c='red') # plt.scatter(pca[selected_index, 0], pca[selected_index, 1], s=10, c='red', marker='D') # motif_matches_indices = [X_indices[-1][0]+2, X_indices[-1][1]-1] # print(motif_matches_indices) # plt.scatter(pca[motif_matches_indices[0]:motif_matches_indices[1], 0], pca[motif_matches_indices[0]:motif_matches_indices[1], 1], s=10, c='pink', marker='P') plt.ylim(np.min(pca[:, 0])-5, np.max(pca[:, 0])+5) plt.xlim(np.min(pca[:, 1])-5, np.max(pca[:, 1])+5) plt.tight_layout() # plt.show(block=True) plt.savefig(output_file_name + '.pdf', dpi=300, bbox_inches='tight')
'vectors': vectors, 'labels': labels, 'image_paths': image_paths }, './results/{}_ep_{}.pth'.format(task_name, load_result_ep)) result_dict = torch.load('./results/{}_ep_{}.pth'.format( task_name, load_result_ep)) vectors = result_dict['vectors'] labels = pd.DataFrame(result_dict['labels']) image_paths = result_dict['image_paths'] tsne_vectors = vectors tsne_labels = labels tsne_model = TSNE() tsne_Y = tsne_model.fit_transform(tsne_vectors) image_paths_modified = [ './' + os.path.join(*i.split('/')[-3:]) for i in image_paths ] image_paths_modified[0] vis_df = pd.DataFrame(tsne_labels) vis_df['tsne_x'] = tsne_Y[:, 1] vis_df['tsne_y'] = tsne_Y[:, 0] vis_df['plot_id'] = vis_df['plot'] vis_df['scan_date'] = vis_df['scan_date'].astype(int) vis_df['image_path'] = image_paths_modified vis_df.to_csv(f'./{task_name}_ep{load_result_ep}.csv')