def evaluate(self, timestamp, batch_idx, run_kmeans=False): data_y, label_y, _ = self.y_sampler.load_all() #data_y, label_y = self.y_sampler.tst_data, self.y_sampler.tst_label N = data_y.shape[0] data_x_, data_x_onehot_ = self.predict_x(data_y) np.savez('{}/data_at_{}.npz'.format(self.save_dir, batch_idx + 1), data_x_, data_x_onehot_, label_y) label_infer = np.argmax(data_x_onehot_, axis=1) purity = metric.compute_purity(label_infer, label_y) nmi = normalized_mutual_info_score(label_y, label_infer) ari = adjusted_rand_score(label_y, label_infer) #self.cluster_heatmap(batch_idx, label_infer, label_y) print('RTM: Purity = {}, NMI = {}, ARI = {}'.format(purity, nmi, ari)) f = open('%s/log.txt' % self.save_dir, 'a+') f.write('%.4f\t%.4f\t%.4f\t%d\n' % (purity, nmi, ari, batch_idx)) f.close() #k-means if run_kmeans: km = KMeans(n_clusters=nb_classes, random_state=0).fit(data_y) label_kmeans = km.labels_ purity = metric.compute_purity(label_kmeans, label_y) nmi = normalized_mutual_info_score(label_y, label_kmeans) ari = adjusted_rand_score(label_y, label_kmeans) print('K-means: Purity = {}, NMI = {}, ARI = {}'.format( purity, nmi, ari)) f = open('%s/log.txt' % self.save_dir, 'a+') f.write('%.4f\t%.4f\t%.4f\n' % (purity, nmi, ari)) f.close()
def evaluate(self, timestamp, epoch, run_kmeans=False): data_y1, label_y1, data_y2, label_y2 = self.y_sampler.load_all() data_x1_, data_x_onehot1_ = self.predict_x1(data_y1) data_x2_, data_x_onehot2_ = self.predict_x2(data_y2) np.savez('{}/data_at_{}.npz'.format(self.save_dir, epoch + 1), data_x1_, data_x_onehot1_, label_y1, data_x2_, data_x_onehot2_, label_y2) #scRNA-seq label_infer1 = np.argmax(data_x_onehot1_, axis=1) purity1 = metric.compute_purity(label_infer1, label_y1) nmi1 = normalized_mutual_info_score(label_y1, label_infer1) ari1 = adjusted_rand_score(label_y1, label_infer1) self.cluster_heatmap(epoch, label_infer1, label_y1, 'scRNA') print('CoupleRTM scRNA-seq: NMI = {}, ARI = {}, Purity = {}'.format( nmi1, ari1, purity1)) #scATAC-seq label_infer2 = np.argmax(data_x_onehot2_, axis=1) purity2 = metric.compute_purity(label_infer2, label_y2) nmi2 = normalized_mutual_info_score(label_y2, label_infer2) ari2 = adjusted_rand_score(label_y2, label_infer2) self.cluster_heatmap(epoch, label_infer2, label_y2, 'scATAC') print('CoupleRTM scATAC-seq: NMI = {}, ARI = {}, Purity = {}'.format( nmi2, ari2, purity2)) f = open('%s/log.txt' % self.save_dir, 'a+') f.write('%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%d\n' % (nmi1, ari1, purity1, nmi2, ari2, purity2, epoch)) f.close() #k-means if run_kmeans: #scRNA-seq km = KMeans(n_clusters=nb_classes, random_state=0).fit(data_y1) label_kmeans = km.labels_ purity = metric.compute_purity(label_kmeans, label_y1) nmi = normalized_mutual_info_score(label_y1, label_kmeans) ari = adjusted_rand_score(label_y1, label_kmeans) print('K-means scRNA-seq: NMI = {}, ARI = {}, Purity = {}'.format( nmi, ari, purity)) f = open('%s/log.txt' % self.save_dir, 'a+') f.write( 'K-means scRNA-seq: NMI = {}, ARI = {}, Purity = {}'.format( nmi, ari, purity)) #scATAC-seq km = KMeans(n_clusters=nb_classes, random_state=0).fit(data_y2) label_kmeans = km.labels_ purity = metric.compute_purity(label_kmeans, label_y2) nmi = normalized_mutual_info_score(label_y2, label_kmeans) ari = adjusted_rand_score(label_y2, label_kmeans) print( 'K-means scATAC-seq: NMI = {}, ARI = {}, Purity = {}\n'.format( nmi, ari, purity)) f = open('%s/log.txt' % self.save_dir, 'a+') f.write( 'K-means scATAC-seq: NMI = {}, ARI = {}, Purity = {}'.format( nmi, ari, purity)) f.close()
def evaluate(self, timestamp, epoch, run_kmeans=False): data_y, label_y = self.y_sampler.load_all() N = data_y.shape[0] data_x_, data_x_onehot_ = self.predict_x(data_y) np.savez('{}/data_at_{}.npz'.format(self.save_dir, epoch + 1), data_x_, data_x_onehot_, label_y) label_infer = np.argmax(data_x_onehot_, axis=1) purity = metric.compute_purity(label_infer, label_y) nmi = normalized_mutual_info_score(label_y, label_infer) ari = adjusted_rand_score(label_y, label_infer) self.cluster_heatmap(epoch, label_infer, label_y) print('RTM: NMI = {}, ARI = {}, Purity = {}'.format(nmi, ari, purity)) f = open('%s/log.txt' % self.save_dir, 'a+') f.write('RTM\t%.4f\t%.4f\t%.4f\t%d\n' % (nmi, ari, purity, epoch)) km = KMeans(n_clusters=nb_classes, random_state=0).fit(data_x_) label_kmeans = km.labels_ purity = metric.compute_purity(label_kmeans, label_y) nmi = normalized_mutual_info_score(label_y, label_kmeans) ari = adjusted_rand_score(label_y, label_kmeans) print('Latent-kmeans: NMI = {}, ARI = {}, Purity = {}'.format( nmi, ari, purity)) f.write('Latent-kmeans\t%.4f\t%.4f\t%.4f\t%d\n' % (nmi, ari, purity, epoch)) #k-means if run_kmeans: from sklearn.decomposition import PCA pca = PCA(n_components=10) pca.fit(data_y) data_pca_y = pca.fit_transform(data_y) km = KMeans(n_clusters=nb_classes, random_state=0).fit(data_pca_y) label_kmeans = km.labels_ purity = metric.compute_purity(label_kmeans, label_y) nmi = normalized_mutual_info_score(label_y, label_kmeans) ari = adjusted_rand_score(label_y, label_kmeans) print('PCA + K-means: NMI = {}, ARI = {}, Purity = {}'.format( nmi, ari, purity)) f.write('PCA+Kmeans%.4f\t%.4f\t%.4f\n' % (nmi, ari, purity)) km = KMeans(n_clusters=nb_classes, random_state=0).fit(data_y) label_kmeans = km.labels_ purity = metric.compute_purity(label_kmeans, label_y) nmi = normalized_mutual_info_score(label_y, label_kmeans) ari = adjusted_rand_score(label_y, label_kmeans) print('K-means: NMI = {}, ARI = {}, Purity = {}'.format( nmi, ari, purity)) f.write('Kmeans%.4f\t%.4f\t%.4f\n' % (nmi, ari, purity)) f.close()
def correlation(self, X, Y, heatmap=False): nb_classes = len(set(Y)) print nb_classes km = KMeans(n_clusters=nb_classes, random_state=0).fit(X) label_kmeans = km.labels_ purity = metric.compute_purity(label_kmeans, Y) nmi = normalized_mutual_info_score(Y, label_kmeans) ari = adjusted_rand_score(Y, label_kmeans) homogeneity = homogeneity_score(Y, label_kmeans) ami = adjusted_mutual_info_score(Y, label_kmeans) print('NMI = {}, ARI = {}, Purity = {},AMI = {}, Homogeneity = {}'. format(nmi, ari, purity, ami, homogeneity)) if heatmap: x_ticks = [''] * len(Y) y_ticks = [''] * len(Y) idx = [] for i in range(nb_classes): sub_idx = [j for j, item in enumerate(Y) if item == i] idx += [j for j, item in enumerate(Y) if item == i] x_ticks[len(idx) - 1] = str(i) assert len(idx) == len(Y) X = X[idx, :] Y = Y[idx] #similarity_mat = pairwise_distances(X,metric='cosine') similarity_mat = cosine_similarity(X) #sns.heatmap(similarity_mat,cmap='Blues') fig, ax = plt.subplots() #ax.set_yticks(range(len(y_ticks))) ax.set_yticklabels(y_ticks) ax.set_xticks(range(len(x_ticks))) ax.set_xticklabels(x_ticks) im = ax.imshow(similarity_mat, cmap='Blues') plt.colorbar(im) plt.savefig('heatmap_%s_dim%d.png' % (self.name, X.shape[1]), dpi=600)
def _eval_cluster(self, latent_rep, labels_true, timestamp, val): map_labels = {0: 0, 1: 1, 2: 2, 4: 3, 6: 4, 7: 5, 8: 6, 9: 7} labels_true = np.array([map_labels[i] for i in labels_true]) km = KMeans(n_clusters=max(self.num_classes, len(np.unique(labels_true))), random_state=0).fit(latent_rep) labels_pred = km.labels_ purity = metric.compute_purity(labels_pred, labels_true) ari = adjusted_rand_score(labels_true, labels_pred) nmi = normalized_mutual_info_score(labels_true, labels_pred) if val: data_split = 'Validation' else: data_split = 'Test' #data_split = 'All' print('Data = {}, Model = {}, sampler = {}, z_dim = {}, beta_reg = {}'. format(self.data, self.model, self.sampler, self.z_dim, self.beta_reg)) print(' #Points = {}, K = {}, Purity = {}, NMI = {}, ARI = {}, '. format(latent_rep.shape[0], self.num_classes, purity, nmi, ari)) with open('logs/Res_{}_{}.txt'.format(self.data, self.model), 'a+') as f: f.write( '{}, {} : K = {}, z_dim = {}, beta_reg = {}, sampler = {}, Purity = {}, NMI = {}, ARI = {}\n' .format(timestamp, data_split, self.num_classes, self.z_dim, self.beta_reg, self.sampler, purity, nmi, ari)) f.flush()
def evaluate(self,timestamp,batch_idx): if has_label: data_y, label_y = self.y_sampler.load_all() else: data_y = self.y_sampler.load_all() data_x_, data_x_onehot_ = self.predict_x(data_y) label_infer = np.argmax(data_x_onehot_, axis=1) print([list(label_infer).count(item) for item in np.unique(label_infer)]) print([list(label_y).count(item) for item in np.unique(label_y)]) # label_kmeans = KMeans(n_clusters=4, n_init = 200).fit_predict(data_y) # ari = adjusted_rand_score(label_y, label_kmeans) # nmi = normalized_mutual_info_score(label_y, label_kmeans) # print('kmeans',ari,nmi,self.nb_classes) # sys.exit() if has_label: purity = metric.compute_purity(label_infer, label_y) nmi = normalized_mutual_info_score(label_y, label_infer) ari = adjusted_rand_score(label_y, label_infer) h**o = homogeneity_score(label_y,label_infer) print('scDEC: NMI = {}, ARI = {}, Homogeneity = {}'.format(nmi,ari,h**o)) if is_train: f = open('%s/log.txt'%self.save_dir,'a+') f.write('NMI = {}\tARI = {}\tHomogeneity = {}\t batch_idx = {}\n'.format(nmi,ari,h**o,batch_idx)) f.close() np.savez('{}/data_at_{}.npz'.format(self.save_dir, batch_idx+1),data_x_,data_x_onehot_,label_y) else: np.savez('results/{}/data_pre.npz'.format(self.data),data_x_,data_x_onehot_,label_y) else: if is_train: np.savez('{}/data_at_{}.npz'.format(self.save_dir, batch_idx+1),data_x_,data_x_onehot_) else: np.savez('results/{}/data_pre.npz'.format(self.data),data_x_,data_x_onehot_)
def analyze_cistopic(): ratio = 0.7 df = pd.read_csv('cisTopic/results/%s' % path, sep=' ', header=0, index_col=[0]) data = df.values data = data * 1.0 / np.sum(data, axis=0) data = data.T #mouse atlas labels = [ item for item in open('datasets/scATAC/Mouse_atlas/label_%s.txt' % str(ratio)).readlines() ] #forebrain #labels = loadmat('datasets/scATAC/scATAC-seq_data_for_liuqiao/REN/original/cell_labels.mat')['cell_labels'] uniq_labels = list(np.unique(labels)) Y = np.array([uniq_labels.index(item) for item in labels]) #GMvsHL, #name='GMvsHL' #Y = np.load('datasets/scATAC/%s_label.npy'%name).astype('int64') print(data.shape) assert len(Y) == data.shape[0] km = KMeans(n_clusters=len(uniq_labels), random_state=0).fit(data) label_kmeans = km.labels_ purity = metric.compute_purity(label_kmeans, Y) nmi = normalized_mutual_info_score(Y, label_kmeans) ari = adjusted_rand_score(Y, label_kmeans) print(nmi, ari, purity)
def cluster_eval(labels_true, labels_infer): purity = metric.compute_purity(labels_infer, labels_true) nmi = normalized_mutual_info_score(labels_true, labels_infer) ari = adjusted_rand_score(labels_true, labels_infer) homogeneity = homogeneity_score(labels_true, labels_infer) ami = adjusted_mutual_info_score(labels_true, labels_infer) #print('NMI = {}, ARI = {}, Purity = {},AMI = {}, Homogeneity = {}'.format(nmi,ari,purity,ami,homogeneity)) return nmi, ari, homogeneity
def correlation(self, X, Y, heatmap=False): nb_classes = len(set(Y)) print(nb_classes) km = KMeans(n_clusters=nb_classes, random_state=0).fit(X) label_kmeans = km.labels_ purity = metric.compute_purity(label_kmeans, Y) nmi = normalized_mutual_info_score(Y, label_kmeans) ari = adjusted_rand_score(Y, label_kmeans) homogeneity = homogeneity_score(Y, label_kmeans) ami = adjusted_mutual_info_score(Y, label_kmeans) print('NMI = {}, ARI = {}, Purity = {},AMI = {}, Homogeneity = {}'. format(nmi, ari, purity, ami, homogeneity))
def eval_cluster(pathname1, labels_pred, labels_true, no_of_spk, timestamp, z_dim, sampler, beta_cycle_label, beta_cycle_gen): purity = metric.compute_purity(labels_pred, labels_true) ari = adjusted_rand_score(labels_true, labels_pred) nmi = normalized_mutual_info_score(labels_true, labels_pred) print(' #Points = {}, K = {}, Purity = {}, NMI = {}, ARI = {},'.format(labels_pred.shape[0], no_of_spk, purity, nmi, ari)) with open(pathname1 + '/Result.txt', 'a+') as f: f.write('{}, K = {}, z_dim = {}, beta_label = {}, beta_gen = {}, sampler = {}, Purity = {}, NMI = {}, ARI = {}\n' .format(timestamp, no_of_spk, z_dim, beta_cycle_label, beta_cycle_gen, sampler, purity, nmi, ari)) f.flush()
def cluster_eval(labels_true, latent_feat): assert len(labels_true) == latent_feat.shape[0] n_clusters = len(np.unique(labels_true)) km = KMeans(n_clusters=n_clusters, n_init=20, random_state=0).fit(latent_feat) label_kmeans = km.labels_ confusion_mat(labels_true, label_kmeans) purity = metric.compute_purity(label_kmeans, labels_true) nmi = normalized_mutual_info_score(labels_true, label_kmeans) ari = adjusted_rand_score(labels_true, label_kmeans) homogeneity = homogeneity_score(labels_true, label_kmeans) ami = adjusted_mutual_info_score(labels_true, label_kmeans) print('NMI = {}, ARI = {}, Purity = {},AMI = {}, Homogeneity = {}'.format( nmi, ari, purity, ami, homogeneity))
def evaluate(self, timestamp, batch_idx, run_kmeans=False): data_y, label_y = self.y_sampler.load_all() N = data_y.shape[0] data_x_, data_x_onehot_ = self.predict_x(data_y) np.savez('{}/data_at_{}.npz'.format(self.save_dir, batch_idx + 1), data_x_, data_x_onehot_, label_y) label_infer = np.argmax(data_x_onehot_, axis=1) purity = metric.compute_purity(label_infer, label_y) nmi = normalized_mutual_info_score(label_y, label_infer) ari = adjusted_rand_score(label_y, label_infer) #print('scDEC: NMI = {}, ARI = {}, Purity = {}'.format(nmi,ari,purity)) f = open('%s/log.txt' % self.save_dir, 'a+') f.write('NMI = {}\tARI = {}\tPurity = {}\t batch_idx = {}\n'.format( nmi, ari, purity, batch_idx)) f.close()
def analyze_SCALE(): ratio = 0.1 data = np.loadtxt('SCALE/%s/feature.txt' % path, delimiter='\t', usecols=range(1, 1 + 10)) labels = [ item.strip() for item in open('datasets/scATAC/Mouse_atlas/label_%s.txt' % str(ratio)).readlines() ] uniq_labels = list(np.unique(labels)) Y = np.array([uniq_labels.index(item) for item in labels]) print(data.shape) assert len(Y) == data.shape[0] km = KMeans(n_clusters=len(uniq_labels), random_state=0).fit(data) label_kmeans = km.labels_ purity = metric.compute_purity(label_kmeans, Y) nmi = normalized_mutual_info_score(Y, label_kmeans) ari = adjusted_rand_score(Y, label_kmeans) print(nmi, ari, purity) plot_embedding(data, np.array(labels), save='SCALE/embedding_tsne_cont_%s.png' % path)