def t_sen_new(feature_name, layer, isNorm): all = get_data_new(feature_name, layer) print("cal....") fea = TSNE(n_components=2).fit_transform(all) if isNorm: fea = (fea - fea.min(0)) / (fea.max(0) - fea.min(0)) print("printing....") plt.scatter(fea[:1680, 0], fea[:1680, 1], label="legitimate", c="seagreen", s=19.5) #plt.scatter(fea[:7201, 0], fea[:7201, 1], label="legitimate", c="seagreen", s=19.5) #7201 plt.scatter(fea[1680:, 0], fea[1680:, 1], label="malicious", c="orangered", s=19.5) #plt.scatter(fea[7201:, 0], fea[7201:, 1], label="malicious", c="orangered", s=19.5) # plt.scatter(fea[1000:, 0], fea[1000:, 1], label="bad") plt.legend(loc='upper right', fontsize='large') plt.tight_layout() #plt.savefig(layer + "_1000.bmp",format='bmp', dpi=1000) plt.savefig(layer + "1600_ALL", format='jpeg', dpi=600) plt.show()
def plot_quantized_embeddings(self): output_image_path = os.path.join(self.args.output_image_path, self.args.val_set, self.args.load_iteration) os.makedirs(output_image_path, exist_ok=True) embs = self.model.get_vqvae_embeddings() _embedding = nn.Embedding(64, 128) _embedding.weight.data.uniform_(-1 / 64, 1 / 64) embs_init = _embedding.weight.data.cpu() # embs = torch.cat((embs,embs_init),dim=0) # print(embs[:,:5]) ''' print('UMAP...shape_{}'.format(embs.shape)) proj = umap.UMAP(n_neighbors=5, min_dist=0.2, metric='cosine').fit_transform(embs) x_min, x_max = proj.min(0), proj.max(0) proj = (proj - x_min) / (x_max - x_min) plt.subplot(1,2,1) plt.scatter(proj[:,0], proj[:,1], alpha=0.3) plt.title('UMAP-{}x{}'.format(embs.shape[0],embs.shape[1])) ''' print('t-SNE...final {} | init {}'.format(embs.shape, embs_init.shape)) proj = TSNE(n_components=2, init='pca', perplexity=50, random_state=self.args.seed).fit_transform(embs) x_min, x_max = proj.min(0), proj.max(0) proj = (proj - x_min) / (x_max - x_min) #plt.subplot(1,2,2) plt.scatter(proj[:, 0], proj[:, 1], alpha=0.3) plt.title('tSNE-{}x{}'.format(embs.shape[0], embs.shape[1])) plt.savefig(os.path.join(output_image_path, 'vqvae.png'))
def run(self): self.log("Starting ReduceEmbeddingDimensionality") vectorizer = get_vectorizer(self._vectorizer_name) paper_matrix = vectorizer.paper_matrix X = 0.5 * paper_matrix['abstract'] + 0.5 * paper_matrix['title'] self.log(X.shape) points = TSNE(n_components=3, verbose=True).fit_transform(X) points = scale(points) dois = paper_matrix['index_arr'] id_map = paper_matrix['id_map'] result = dict() category_memberships = CategoryMembership.objects.filter( paper__in=dois) for membership in self.progress(category_memberships): doi = membership.paper.pk matrix_index = id_map[doi] category_pk = membership.category.pk category_score = membership.score if doi not in result: result[doi] = { 'doi': doi, 'title': membership.paper.title, 'point': points[matrix_index].tolist(), 'top_category': category_pk, 'published_at': json.dumps(membership.paper.published_at, cls=DjangoJSONEncoder), 'top_category_score': category_score } elif result[doi]['top_category_score'] <= category_score: result[doi]['top_category'] = category_pk result[doi]['top_category_score'] = category_score output = { 'papers': list(result.values()), 'means': points.mean(axis=0).tolist(), 'max': points.max(axis=0).tolist(), 'min': points.min(axis=0).tolist() } if settings.DEBUG: with open('../web/assets/embeddings_3d.json', 'w+') as f: json.dump(output, f) else: s3_bucket_client = S3BucketClient( aws_access_key=settings.AWS_ACCESS_KEY_ID, aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY, endpoint_url=settings.AWS_S3_ENDPOINT_URL, bucket=settings.AWS_STORAGE_BUCKET_NAME) s3_bucket_client.upload_as_json(settings.AWS_EMBEDDINGS_FILE_PATH, output) Paper.objects.all().update(visualized=True) self.log("ReduceEmbeddingDimensionality finished")
def t_sne(X, y): X = np.asarray(X) y = np.asarray(y) from sklearn.manifold import TSNE import matplotlib.pyplot as plt import time start = time.time() X_tsne = TSNE(n_components=2).fit_transform(X) end = time.time() print(end - start) x_min, x_max = X_tsne.min(0), X_tsne.max(0) X_norm = (X_tsne - x_min) / (x_max - x_min) # 归一化 plt.figure(figsize=(8, 8)) for i in range(X_norm.shape[0]): plt.text(X_norm[i, 0], X_norm[i, 1], str(y[i]), color=plt.cm.Set1(y[i]), fontdict={ 'weight': 'bold', 'size': 9 }) plt.xticks([]) plt.yticks([]) plt.show()
def plot_static_embeddings(self, output_path): # filter the samples by speakers sampled # hack code small_indexes = [ index for index in self.indexes if index[0][:len('p000')] in self.sampled_speakers ] random.shuffle(small_indexes) small_indexes = small_indexes[:self.args.max_samples] # generate the tensor and dataloader for evaluation tensor = [ self.pkl_data[key][t:t + self.config.segment_size] for key, t, _, _, _ in small_indexes ] speakers = [key[:len('p000')] for key, _, _, _, _ in small_indexes] # add the dimension for channel tensor = torch.from_numpy(np.array(tensor)).unsqueeze(dim=1) dataset = TensorDataset(tensor) dataloader = DataLoader(dataset, batch_size=20, shuffle=False, num_workers=0) all_embs = [] # run the model for data in dataloader: data = cc(data[0]) embs = self.model.get_static_embeddings(data) all_embs = all_embs + embs.detach().cpu().numpy().tolist() all_embs = np.array(all_embs) print(all_embs.shape) # TSNE embs_2d = TSNE(n_components=2, init='pca', perplexity=50).fit_transform(all_embs) x_min, x_max = embs_2d.min(0), embs_2d.max(0) embs_norm = (embs_2d - x_min) / (x_max - x_min) # plot to figure female_cluster = [ i for i, speaker in enumerate(speakers) if self.speaker2gender[speaker] == 'F' ] male_cluster = [ i for i, speaker in enumerate(speakers) if self.speaker2gender[speaker] == 'M' ] colors = np.array( [self.speaker_index[speaker] for speaker in speakers]) plt.scatter(embs_norm[female_cluster, 0], embs_norm[female_cluster, 1], c=colors[female_cluster], marker='x') plt.scatter(embs_norm[male_cluster, 0], embs_norm[male_cluster, 1], c=colors[male_cluster], marker='o') plt.savefig(output_path) return
def coords_func(lib, opts, args): if opts.type: seq_features_file = config["blackbird"]["seq_features"].get("unicode") seq_features = cPickle.load(open(seq_features_file, "rb")) keys = seq_features.keys() if opts.type == "mean": features = np.empty((len(seq_features), 20)) for idx, key in enumerate(seq_features): length = seq_features[key].shape[1] features[idx, :] = seq_features[key][:, int(0.1 * length):int(0.9 * length)].mean(axis=1) elif opts.type == "lstm": print("Loading network...") model = LSTMSeq2Seq(config["blackbird"]["lstm"]["arch"].get("unicode"), config["blackbird"]["lstm"]["weights"].get("unicode"), config["blackbird"]["lstm"]["output"].get()) # Pad sequences maxlen = 150 padded_seq_features = np.empty((len(seq_features), maxlen, 20)) for idx, key in enumerate(seq_features): padded_seq_features[idx, :, :] = sequence.pad_sequences(seq_features[key], maxlen=maxlen, dtype="float32").T print("Getting vectors...") features = model.predict(padded_seq_features) else: print("Provide a valid --type [mean, lstm]") sys.exit(1) print("Reducing dimensions...") features_2d = TSNE(n_components=2).fit_transform(features) print("Writing to db...") conn = sqlite3.connect(config["blackbird"]["db"].get("unicode")) cur = conn.cursor() cur.execute("DELETE FROM coords") to_insert = [] for idx in xrange(features_2d.shape[0]): to_insert.append((keys[idx], features_2d[idx, 0], features_2d[idx, 1])) cur.executemany("INSERT INTO coords VALUES (?, ?, ?)", to_insert) conn.commit() conn.close() # Fill leftovers ids_to_fill = [] for item in lib.items(): if item.id not in keys: ids_to_fill.append(item.id) self.fill(ids_to_fill, features_2d.min(axis=0), features_2d.max(axis=0)) else: print("Provide a valid --type [mean, lstm]")
def calculate_projection(self): self._perform_svd() if self.method == SKLEARN: projection_vectors = SKLEARN_TSNE(n_components=2, perplexity=40, verbose=2).fit_transform(self.data_vectors) elif self.method == MAATEN: projection_vectors = MATTENS_TSNE(self.data_vectors, no_dims=2, initial_dims=self.data_vectors.shape[1], perplexity=40.0) else: projection_vectors = UMAP_PROJECTION(n_neighbors=5, min_dist=0.3).fit_transform(self.data_vectors) projection_vectors -= projection_vectors.min(axis=0) projection_vectors /= projection_vectors.max(axis=0) self.projection_vectors = projection_vectors
def plot_speaker_embeddings(self): output_image_path = os.path.join(self.args.output_image_path,self.args.val_set,self.args.load_iteration) os.makedirs(output_image_path,exist_ok=True) speakers = [] utts = [] # in_test # self.samples = ['252', '240', '237', '341', '274', '236', '272', '329', '271', '301'] # out_test # self.samples = ['232', '305', '227', '238', '263', '339', '376', '318', '286', '312'] for speaker in self.samples: speakers += [speaker] * len(self.indexes[speaker]) utts += self.indexes[speaker] use_spec = 'dmel' if self.args.model_type == 'AdaVAEd' else 'mel' dataset = EvaluateDateset(os.path.join(self.args.data_dir, self.args.dataset), speakers, utts, segment_size=None, load_spectrogram=use_spec) dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=4, pin_memory=True) embs = [] for data in dataloader: spec = cc(data['spectrogram']) emb = self.model.get_speaker_embeddings(spec) embs += emb.detach().cpu().numpy().tolist() print('Evaluate: {}/{}'.format(len(embs),len(dataloader)),end='\r') embs = np.array(embs) norms = np.sqrt(np.sum(embs ** 2, axis=1, keepdims=True)) embs = embs / norms # t-SNE print('\nt-SNE...') embs_2d = TSNE(n_components=2, init='pca', perplexity=50).fit_transform(embs) x_min, x_max = embs_2d.min(0), embs_2d.max(0) embs_2d = (embs_2d - x_min) / (x_max - x_min) # plot to figure female_cluster = [i for i, speaker in enumerate(speakers) if self.speaker_infos[speaker][1] == 'F'] male_cluster = [i for i, speaker in enumerate(speakers) if self.speaker_infos[speaker][1] == 'M'] colors = np.array([self.samples_index[speaker] for speaker in speakers]) plt.scatter(embs_2d[female_cluster, 0], embs_2d[female_cluster, 1], c=colors[female_cluster], marker='x') plt.scatter(embs_2d[male_cluster, 0], embs_2d[male_cluster, 1], c=colors[male_cluster], marker='o') plt.savefig(os.path.join(output_image_path,'speaker.png')) plt.clf() plt.cla() plt.close() return
def show_result(self, labels): print("Visualizing clustering result...") x_tsne = TSNE().fit_transform(self.train_set) x_min, x_max = x_tsne.min(0), x_tsne.max(0) # normalize x_norm = (x_tsne - x_min) / (x_max - x_min) plt.figure(figsize=(8, 8)) for i in range(x_norm.shape[0]): plt.text(x_norm[i, 0], x_norm[i, 1], str(labels[i]), color=plt.cm.Set1(labels[i])) plt.xticks([]) plt.yticks([]) plt.show()
def tsne_show(data, labels, title=''): print("start tsne analysis...") sorted_ftrs = [] uniq_labels = np.array(sorted(list(set(labels)))) all_cls_ft_num = np.zeros(len(uniq_labels)) # color_list = list(colors._colors_full_map.values()) colors = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS) by_hsv = sorted( (tuple(mcolors.rgb_to_hsv(mcolors.to_rgba(color)[:3])), name) for name, color in colors.items()) color_list = [name for hsv, name in by_hsv] for i, lb in enumerate(uniq_labels): corr_ftrs = data[labels == lb] all_cls_ft_num[i] = len(corr_ftrs) sorted_ftrs.extend(corr_ftrs) sorted_ftrs = np.array(sorted_ftrs) ft_tsne = TSNE(init='pca').fit_transform(sorted_ftrs[:3000]) ft_min, ft_max = ft_tsne.min(0), ft_tsne.max(0) ft_norm = (ft_tsne - ft_min) / (ft_max - ft_min) j = 0 gd_cls = 0 lst_gd_cls = 0 cls_ft_num = all_cls_ft_num[j] plt.figure(figsize=(10, 10)) for i in range(ft_norm.shape[0]): if i >= cls_ft_num: j += 1 cls_ft_num += all_cls_ft_num[j] if all_cls_ft_num[j] >= 20: if j > lst_gd_cls: gd_cls += 1 plt.scatter(ft_norm[i, 0], ft_norm[i, 1], color=color_list[3 * gd_cls]) lst_gd_cls = gd_cls if gd_cls >= 50: break # plt.scatter(ft_norm[i, 0], ft_norm[i, 1], color=color_list[j]) # if j >= 20: # break print(j) plt.title(title + 'resnet >=20 * 50') plt.show()
def distMat2colors(dm, lab=False): # tsne into 3dim from sklearn.manifold import TSNE dm_3dim = TSNE(n_components=3, metric="precomputed").fit_transform(dm) n = dm_3dim.shape[0] #print(dm_3dim) # embedding is transformed to fit in CIELAB from skimage.color import lab2rgb #rgb1 = lab2rgb(dm_3dim.reshape(n, 1, 3)) #print(rgb1.reshape(n, 3)) #b, t = dm_3dim.min(0), dm_3dim.max(0) b, t = dm_3dim.min(), dm_3dim.max() #valid_lab = (dm_3dim - b) * np.array([99.99, 254.99, 254.99]) / (t - b) - np.array([0, 127, 127]) # range of value = 100, +-127, +-127 valid_lab = (dm_3dim - b) * np.array([100.0, 100, 100]) / ( t - b) # percentage ?? #valid_lab = (np.array([1, 2, 2]) * (dm_3dim - b) / (t - b)) - np.array([0, 1, 1]) # range of value = [0,1] for each if lab: # return CIELAB for each elem # return valid_lab print("lab[0-1]") print(valid_lab) print("3dim") print(dm_3dim) return valid_lab else: # return RGB rgb2 = lab2rgb(valid_lab.reshape(n, 1, 3)).reshape(n, 3) print("rgb2") print(rgb2) print("rgb2[0-255]") print(rgb2 * 256) return rgb2 * 256
def _tsne_visualize(self, embeds, labels): ax, fig = plt.subplots() tsne_vecs = TSNE(n_components=2).fit_transform(embeds) tsne_vecs /= tsne_vecs.max() axes = plt.gca() axes.set_xlim([-1.1, 1.1]) axes.set_ylim([-1.1, 1.1]) random.seed(0) colors = { int(label): "#" + ''.join([random.choice('0123456789ABCDEF') for j in range(6)]) for label in np.unique(labels) } # add dots in plot for (x, y), label in zip(tsne_vecs, labels): plt.plot(x, y, color=colors[int(label)], marker='o', linestyle='dashed', linewidth=2, markersize=4) # add colorized legend handles = [ mpatches.Patch(color=color, label=f"id_{label}") for label, color in colors.items() ] legend = plt.legend(handles=handles, loc='upper right') for legend_text, color in zip(legend.get_texts(), colors.values()): plt.setp(legend_text, color=color) return fig
#%% dense1_layer_model = Model(inputs=model.input,outputs=model.get_layer('Conv4_3').output) #Conv4_3 Conv1_2 dense1_output = dense1_layer_model.predict(test_data[5:6, :, :, :]) f, ax = plt.subplots(3,4, figsize=(30,10),) f.subplots_adjust(wspace =0.1, hspace =0.1) for i in range(11): ax[i // 4, i % 4].imshow(dense1_output[0,:,:,i], cmap='gray') ax[i // 4, i % 4].axis('off') ax[2,3].imshow(np.squeeze(test_data[5:6, :, :, :]), cmap='gray') ax[2,3].axis('off') #%% fc_layer_model = Model(inputs=model.input,outputs=model.get_layer('fc3').output) # fc1 fc2 fc_output = fc_layer_model.predict(test_data) from sklearn.manifold import TSNE X_tsne = TSNE(n_components=3).fit_transform(fc_output) #%% x_min, x_max = X_tsne.min(0), X_tsne.max(0) X_norm = (X_tsne - x_min) / (x_max - x_min) plt.figure(figsize=(8, 8)) for i in range(X_norm.shape[0]): plt.text(X_norm[i, 0], X_norm[i, 1], str(true_label[i]), color=plt.cm.Set1(true_label[i]), fontdict={'weight': 'bold', 'size': 9}) plt.xticks([]) plt.yticks([]) plt.show()
image_names = [image_names[i] for i in indx] data_vectors = data_vectors[indx, :] ## load subset of images, resize imdata = [] for im in image_names: temp = Image.open(im) temp.thumbnail([100, 100]) imdata.append(np.array(temp)) imdata = np.array(imdata) ## run tsne on fasttext pca vectors embeddings = TSNE(init='pca', verbose=2, random_state=200).fit_transform(data_vectors) embeddings -= embeddings.min(axis=0) embeddings /= embeddings.max(axis=0) ## plot scatter t-sne plt.figure(figsize=(17, 9)) plt.scatter(embeddings[:, 0], embeddings[:, 1], c=indx) cb = plt.colorbar(fraction=0.05, pad=0.0125) plt.xticks([]) plt.yticks([]) # plot images as scatter t-sne plt.figure(figsize=(24, 12)) plt.gca().set_facecolor("black") for pos, img in zip(embeddings, imdata): ab = AnnotationBbox(OffsetImage(img), 0.03 + pos * 0.94, xycoords="axes fraction",
def plot_transfer_embeddings(self): output_image_path = os.path.join(self.args.output_image_path, self.args.val_set, self.args.load_iteration) os.makedirs(output_image_path, exist_ok=True) speakers = [] utts = [] # in_test # self.samples = ['252', '240', '237', '341', '274', '236', '272', '329', '271', '301'] # out_test # self.samples = ['232', '305', '227', '238', '263', '339', '376', '318', '286', '312'] for speaker in self.samples: speakers += [speaker] * len(self.indexes[speaker]) utts += self.indexes[speaker] dataset = TransferDateset(os.path.join(self.args.data_dir, self.args.dataset), speakers, utts, self.indexes, segment_size=None) dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=4, pin_memory=True) embs = [] embs_tran = [] for data in dataloader: spec_tar = cc(data['tar']) spec_tar_d = cc(data['tar_dmel']) spec_src = cc(data['src']) emb = self.model.get_speaker_embeddings(spec_tar) with torch.no_grad(): mu, emb, spec_tran = self.model.patch(spec_src, spec_tar) if self.args.model_type == 'AdaVAEGAN': spec_residual = self.patch_model(mu, emb) spec_tran = spec_tran + spec_residual emb_tran = self.model.get_speaker_embeddings(spec_tran) embs += emb.detach().cpu().numpy().tolist() embs_tran += emb_tran.detach().cpu().numpy().tolist() print('Evaluate: {}/{}'.format(len(embs), len(dataloader)), end='\r') embs_all = embs + embs_tran embs_all = np.array(embs_all) norms = np.sqrt(np.sum(embs_all**2, axis=1, keepdims=True)) embs_all = embs_all / norms # t-SNE print('\nt-SNE...') embs_2d = TSNE(n_components=2, init='pca', perplexity=50).fit_transform(embs_all) x_min, x_max = embs_2d.min(0), embs_2d.max(0) embs_2d = (embs_2d - x_min) / (x_max - x_min) embs_2d_src = embs_2d[:len(embs)] embs_2d_tran = embs_2d[len(embs):] # plot to figure female_cluster = [ i for i, speaker in enumerate(speakers) if self.speaker_infos[speaker][0] == 'F' ] male_cluster = [ i for i, speaker in enumerate(speakers) if self.speaker_infos[speaker][0] == 'M' ] colors = np.array( [self.samples_index[speaker] for speaker in speakers]) # plt.scatter(embs_2d_src[female_cluster, 0], embs_2d_src[female_cluster, 1], c=colors[female_cluster], marker='s') # plt.scatter(embs_2d_src[male_cluster, 0], embs_2d_src[male_cluster, 1], c=colors[male_cluster], marker='o') plt.scatter(embs_2d_tran[female_cluster, 0], embs_2d_tran[female_cluster, 1], c=colors[female_cluster], marker='x') plt.scatter(embs_2d_tran[male_cluster, 0], embs_2d_tran[male_cluster, 1], c=colors[male_cluster], marker='o') plt.savefig(os.path.join(output_image_path, 'transfer.png')) plt.clf() plt.cla() plt.close() return
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # initialize network net.init(sess) # preprare tsne with open('mukai_dataset.pickle', mode='rb') as f: data_set = pickle.load(f) # 100で割ってあるやつ X_reduced = TSNE(n_components=2, random_state=0, perplexity=perp).fit_transform(data_set) X = torch.tensor(data_set, requires_grad=True, dtype=torch.double) Y = torch.tensor(X_reduced, requires_grad=True, dtype=torch.double) ini_set = { 'area_min': X_reduced.min(axis=0), 'area_max': X_reduced.max(axis=0) } with open(mb + '/initial_setting.pickle', mode='wb') as f: pickle.dump(ini_set, f) # server mode s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind(("127.0.0.1", port)) while True: print("waitnig... port:{}".format(port)) s.listen(1) cli, addr = s.accept() print('connected from ', str(addr)) data = np.array([]) for _ in range(113): new = cli.recv(8 * 2048)
from numpy import array, dot, diag, nan_to_num from numpy.random import randn import sys features = 'CADD1,CADD2,RecA,EssA,CADD3,CADD4,RecB,EssB,Path'.split(',') df_data = pd.read_csv("dida_posey_to_predict.csv") df_data.head() combination = sys.argv[1] X = array(df_data[features]) X = X[:, [c == '1' for c in combination]] X = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0)) if len(X.T) > 2: X = TSNE(n_components=2, init="pca").fit_transform(X) X = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0)) X = nan_to_num(X) df_data_vs = df_data.copy(False) df_data_vs['x'] = X[:, 0] df_data_vs['y'] = X[:, 1] if len(X.T) > 1 else 0 df_data_vs = df_data_vs.drop('Pair', 1) with open("exports/p_file_" + combination + ".csv", "w") as out: out.write('id,x,y\n') for line in array(df_data_vs): out.write(','.join(map(str, line[[0, -2, -1]])) + '\n')
alpha = 0.5 hit = np.append(train.rowmeta['Tissue'] == tissue, valid.rowmeta['Tissue'] == tissue) ax.plot(T[hit, 0], T[hit, 1], linestyle='None', linewidth=0, marker='o', markerfacecolor=color, markeredgecolor=color, markersize=2, markeredgewidth=0, alpha=alpha, zorder=zorder, label=tissue) ax.set_xlim(T.min(0)[0], 1.5 * (T.max(0)[0] - T.min(0)[0]) - T.max(0)[0]) #ax.set_ylim(T.min(0)[1], T.max(0)[1]+1*(T.max(0)[1]-T.min(0)[1])) ax.legend(loc='best', ncol=2, numpoints=1, markerscale=2, fontsize=8, labelspacing=0.1) ax.tick_params(axis='both', which='major', bottom='off', top='off', labelbottom='off', labeltop='off', left='off', right='off',
def plot_segment_embeddings(self): output_image_path = os.path.join(self.args.output_image_path, self.args.val_set, self.args.load_iteration) os.makedirs(output_image_path, exist_ok=True) speakers = [] utts = [] for speaker in self.samples: speakers += [speaker] * len(self.indexes[speaker]) utts += self.indexes[speaker] dataset = EvaluateDateset( os.path.join(self.args.data_dir, self.args.dataset), speakers, utts, segment_size=self.config['data_loader']['segment_size'], load_spectrogram='dmel') dataloader = DataLoader(dataset, batch_size=128, shuffle=False, num_workers=0, pin_memory=True) batchiter = infinite_iter(dataloader) embs = [] speakers = [] # run the model while (len(embs) < self.args.n_segments): data = next(batchiter) speakers += data['speaker'] data = cc(data['spectrogram'].permute(0, 2, 1)) emb = self.model.get_speaker_embeddings(data) embs += emb.detach().cpu().numpy().tolist() print('Evaluate: {}/{}'.format(len(embs), self.args.n_segments), end='\r') embs = np.array(embs) norms = np.sqrt(np.sum(embs**2, axis=1, keepdims=True)) embs = embs / norms # t-SNE print('\nt-SNE...') embs_2d = TSNE(n_components=2, init='pca', perplexity=50).fit_transform(embs) x_min, x_max = embs_2d.min(0), embs_2d.max(0) embs_2d = (embs_2d - x_min) / (x_max - x_min) # plot to figure female_cluster = [ i for i, speaker in enumerate(speakers) if self.speaker_infos[speaker][1] == 'F' ] male_cluster = [ i for i, speaker in enumerate(speakers) if self.speaker_infos[speaker][1] == 'M' ] colors = np.array( [self.samples_index[speaker] for speaker in speakers]) plt.scatter(embs_2d[female_cluster, 0], embs_2d[female_cluster, 1], c=colors[female_cluster], marker='x') plt.scatter(embs_2d[male_cluster, 0], embs_2d[male_cluster, 1], c=colors[male_cluster], marker='o') plt.savefig(os.path.join(output_image_path, 'segment.png')) plt.clf() plt.cla() plt.close() return
from sklearn.manifold import TSNE import numpy as np import json with open('mv_data.json') as f: j = json.loads(f.read()) def distance(a, b): return np.linalg.norm(a.reshape((40, 40))-b.reshape((40, 40))) data = np.array(list(map(lambda x: np.array(x).flatten(), map(lambda x: x['viewMatrix'], j['papers'])))) embed = TSNE(metric=distance).fit_transform(data) embed -= embed.min(axis=0) embed /= embed.max(axis=0) embed *= 2 embed -= 1 with open('tsne.json', 'w') as f: f.write(json.dumps(embed.tolist()))
def convert_to_dict(clusters_to_filter, ru_idfs, fi_idfs, start_time): print(start_time) if isinstance(clusters_to_filter, dict): clusters_to_filter = clusters_to_filter.values() clusters_to_save = filter_interesting_clusters(clusters_to_filter) json_formatted = [] cdata = [c.center / c.norm for c in clusters_to_save] if len(cdata) < 5: return json_formatted t_sne_space = TSNE(n_components=2, metric='cosine').fit_transform(cdata) # normalize T-SNE space to -1 to 1 minimums = t_sne_space.min(axis=0) maximums = t_sne_space.max(axis=0) for v in t_sne_space: v[0] = 2 * (v[0] - minimums[0]) / (maximums[0] - minimums[0]) - 1 v[1] = 2 * (v[1] - minimums[1]) / (maximums[1] - minimums[1]) - 1 for cluster_index in range(len(clusters_to_save)): c = clusters_to_save[cluster_index] idfs = ru_idfs if c.lang == 'ru' else fi_idfs # TODO remove temporary filtering # if (c.created_at < (start_time - len(c.hourly_growth_rate) * 3600 * 1000) ): #1405555200000): # 17/07/2014 00:00:00 continue #if (c.created_at < 1503014400000): # 18/08/2017 00:00:00 #continue if len(c.hourly_growth_rate) < 1: continue start_idx = max(int((c.created_at - start_time) / 3600 / 1000), 1) for i in range(start_idx, len(c.hourly_growth_rate)): update = {} # timestamp update['t'] = int(c.first_growth_time / 1000) + i * 60 * 60 # start with a new cluster event if i == start_idx: total_sentiment = c.hourly_accum_sentiment[ len(c.hourly_accum_sentiment) - 1] tags = c.hourly_tags[len(c.hourly_tags) - 1] if tags is not None: tags = [ tag_label_overrides.get(t, t.title()) for t in tags ] #get_keywords(c, idfs)[:4], update['n'] = {c.id: \ { \ 's': round(c.hourly_growth_rate[i]), \ 'k': c.hourly_keywords[i], \ 'lang': c.lang, \ 'sentiment': round(c.hourly_sentiment[i], 3), \ 'sentiment_total': round(total_sentiment, 3), \ 'tags': tags if tags is not None else [], \ 't_sne': [float(t_sne_space[cluster_index][0]), \ float(t_sne_space[cluster_index][1])] \ } \ } elif i == len(c.hourly_growth_rate): # insert a negative number at the end to mark the end of the cluster update['u'] = {c.id: {'s': -1}} else: update['u'] = { c.id: { 's': int(round(c.hourly_growth_rate[i])), 'sentiment': round(c.hourly_sentiment[i], 3), 'sentiment_accum': round(c.hourly_accum_sentiment[i], 3), 'k': c.hourly_keywords[i - 1] } } json_formatted.append(update) json_formatted.sort(key=lambda update: update['t']) return json_formatted
x = conv_net(x) x = x.contiguous().view(x.shape[0], -1) x = fc_net[0](x) x = fc_net[1](x) x = fc_net[2](x) if opt.layer_idx >= 1: x = fc_net[3](x) x = fc_net[4](x) x = torch.nn.functional.softmax(x, dim=-1) features.append(copy.deepcopy(x.detach())) labels.append(copy.deepcopy(y)) features = torch.cat(features, dim=0) labels = torch.cat(labels, dim=0) Y = TSNE(init='pca').fit_transform(features[:800].numpy()) labels = labels[:800].numpy() #for i in range(26): # plt.scatter(Y[labels==i, 0], Y[labels==i, 1], 20, color=(float(i)/26, 0, 0)) letters = list(string.ascii_letters[-26:]) Y = (Y - Y.min(0)) / (Y.max(0) - Y.min(0)) #plt.legend(string.ascii_letters[-26:]) #plt.scatter(Y[:, 0], Y[:, 1], 5, c=labels, cmap='Spectral') #plt.colorbar(boundaries=np.arange(27)-0.5).set_ticks(np.arange(26)) for i in range(len(labels)): c = plt.cm.rainbow(float(labels[i]) / 26) plt.text(Y[i, 0], Y[i, 1], s=letters[labels[i]], color=c) plt.savefig(os.path.join(tsne_dir, 'tsne_%d.jpg' % layer_idx), dpi=300) plt.show() print('Results are saved as {}'.format( os.path.join(tsne_dir, 'tsne_%d.jpg' % opt.layer_idx)))
ax_hsv.set_title("HSV Channels") ax_hsv.set_xlabel("H Channel") ax_hsv.set_ylabel("S Channel") ax_hsv.set_zlabel("V Channel") ax_hsv.legend(custom_markers, ["No Skin", "Skin"]) # Using dimensionality reduction to transform a 6D dataset to a 3D dataset. tsne = TSNE(n_components=3).fit_transform(color_data) # Decision boundary of RGB subset. logreg.fit(tsne, labels) intercept = logreg.intercept_[0] coeff = logreg.coef_[0] tmp = np.linspace(tsne.min(), tsne.max(), 50) x, y = np.meshgrid(tmp, tmp) # Plot the RGB+HSV dataset in 3D. fig = plt.figure() ax_tsne = fig.add_subplot(111, projection="3d") ax_tsne.plot_surface(x, y, z(x, y), alpha=0.2) for i, c, m in zip(range(2), ("r", "b"), ("o", "^")): xs = tsne[:, 0][labels == i] ys = tsne[:, 1][labels == i] zs = tsne[:, 2][labels == i] ax_tsne.scatter(xs, ys, zs, c=c, marker=m) ax_tsne.set_title("3D t-SNE") ax_tsne.set_xlabel("X")
def main_train(self): with tf.Graph().as_default(): with tf.Session() as sess: img_data = facenet.get_dataset(self.datadir) path, label = facenet.get_image_paths_and_labels(img_data) print("label") print(label) print('Classes: %d' % len(img_data)) print('Images: %d' % len(path)) facenet.load_model(self.modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] print('Extracting features of images for model') batch_size = 10000 image_size = 160 nrof_images = len(path) nrof_batches_per_epoch = int( math.ceil(1.0 * nrof_images / batch_size)) emb_array = np.zeros((nrof_images, embedding_size)) #print(nrof_batches_per_epoch) #for i in range(nrof_batches_per_epoch): start_index = 0 * batch_size end_index = min((0 + 1) * batch_size, nrof_images) paths_batch = path[start_index:end_index] images = facenet.load_data(paths_batch, False, False, image_size) feed_dict = { images_placeholder: images, phase_train_placeholder: False } emb_array[start_index:end_index, :] = sess.run( embeddings, feed_dict=feed_dict) print("emb_array[0]") print(emb_array[0]) class_names = [cls.name.replace('_', ' ') for cls in img_data] classifier_file_name = os.path.expanduser( self.classifier_filename) print('emb_array') print(emb_array) X_embedded = TSNE(n_components=2).fit_transform(emb_array) X_embedded -= X_embedded.min(axis=0) X_embedded /= X_embedded.max(axis=0) print("X_embedded") print(X_embedded) #for i in range(0, nrof_images-1): # plt.plot(X_embedded[i, 0], X_embedded[i, 1],'bo') plt.legend(bbox_to_anchor=(1, 1)) plt.show() out_dim = round(math.sqrt(nrof_images)) out_res = 160 to_plot = np.square(out_dim) grid = np.dstack( np.meshgrid(np.linspace(0, 1, out_dim), np.linspace(0, 1, out_dim))).reshape(-1, 2) cost_matrix = cdist(grid, X_embedded, "sqeuclidean").astype(np.float32) cost_matrix = cost_matrix * (100000 / cost_matrix.max()) print(cost_matrix) #rids, cids = solve_dense(costs) #print(rids) print("zaczalem to robic") #row_ind, col_ind = linear_sum_assignment(cost_matrix) row_asses, col_asses, _ = lapjv(cost_matrix) #print("To cos") #print (col_asses) print("teraz to!") #print (row_ind) #print (col_ind) #for r,c in zip(row_ind, col_asses): # print(r,c) # Row/column pairings grid_jv = grid[col_asses] out = np.ones((out_dim * out_res, out_dim * out_res, 3)) print(grid_jv) for pos, img in zip(grid_jv, images[0:to_plot]): h_range = int(np.floor(pos[0] * (out_dim - 1) * out_res)) w_range = int(np.floor(pos[1] * (out_dim - 1) * out_res)) out[h_range:h_range + out_res, w_range:w_range + out_res] = image.img_to_array(img) print(out) im = image.array_to_img(out) im.save("obrazekV2.jpg", quality=100)
emb_array = np.zeros((nrof_images, embedding_size)) start_index = 0 * batch_size end_index = min((0 + 1) * batch_size, nrof_images) paths_batch = path[start_index:end_index] images = facenet.load_data(paths_batch, False, False, image_size) feed_dict = {images_placeholder: images, phase_train_placeholder: False} emb_array[start_index:end_index, :] = sess.run(embeddings, feed_dict=feed_dict) print("emb_array[0]") print(emb_array[0]) class_names = [cls.name.replace('_', ' ') for cls in img_data] print('emb_array') print(emb_array) X_embedded = TSNE(n_components=2).fit_transform(emb_array) X_embedded -= X_embedded.min(axis=0) X_embedded /= X_embedded.max(axis=0) print("X_embedded") print(X_embedded) for i in range(0, nrof_images - 1): plt.plot(X_embedded[i, 0], X_embedded[i, 1], 'bo') plt.legend(bbox_to_anchor=(1, 1)) plt.show() out_dim = round(math.sqrt(nrof_images)) out_res = 160 to_plot = np.square(out_dim) grid = np.dstack( np.meshgrid(np.linspace(0, 1, out_dim), np.linspace(0, 1, out_dim))).reshape(-1, 2) cost_matrix = cdist(grid, X_embedded, "sqeuclidean").astype(np.float32) cost_matrix = cost_matrix * (100000 / cost_matrix.max()) print(cost_matrix)
embed2 = TSNE(n_components=2).fit_transform(feas_normalized) height = 128 width = 64 embed2 = embed2 - embed2.min(axis=0) # np.median(np.abs(np.diff(embed2, axis=0)), axis=0) space = 64 + 16 embed2 *= space embed2 = embed2.astype(int) extend = np.array([ height, width, ]) shape = tuple((embed2.max(axis=0).astype(int) + extend).tolist()) + (3, ) print('res shape', shape) res = np.ones(shape).astype(np.uint8) * 255 for ind in range(feas.shape[0]): img_name = feask[ind] img = cv2.imread(img_name) img2 = cvb.resize_keep_ar(img, height, width) if not (img2.shape[0] <= height and img2.shape[1] <= width): img2 = cvb.resize_keep_ar(img, width, width) assert (img2.shape[0] <= height and img2.shape[1] <= width) # if img2.shape[0] < height: # img2 = np.concatenate((img2, np.ones((height - img2.shape[0], img2.shape[1], 3)) * 255), axis=0) # if img2.shape[1] < width: # img2 = np.concatenate((img2, np.ones((img2.shape[0], width - img2.shape[1], 3)) * 255), axis=1) # assert img2.shape[0] == height and img2.shape[1] == width