Пример #1
0
 def evaluate(self, timestamp, batch_idx, run_kmeans=False):
     data_y, label_y, _ = self.y_sampler.load_all()
     #data_y, label_y = self.y_sampler.tst_data, self.y_sampler.tst_label
     N = data_y.shape[0]
     data_x_, data_x_onehot_ = self.predict_x(data_y)
     np.savez('{}/data_at_{}.npz'.format(self.save_dir, batch_idx + 1),
              data_x_, data_x_onehot_, label_y)
     label_infer = np.argmax(data_x_onehot_, axis=1)
     purity = metric.compute_purity(label_infer, label_y)
     nmi = normalized_mutual_info_score(label_y, label_infer)
     ari = adjusted_rand_score(label_y, label_infer)
     #self.cluster_heatmap(batch_idx, label_infer, label_y)
     print('RTM: Purity = {}, NMI = {}, ARI = {}'.format(purity, nmi, ari))
     f = open('%s/log.txt' % self.save_dir, 'a+')
     f.write('%.4f\t%.4f\t%.4f\t%d\n' % (purity, nmi, ari, batch_idx))
     f.close()
     #k-means
     if run_kmeans:
         km = KMeans(n_clusters=nb_classes, random_state=0).fit(data_y)
         label_kmeans = km.labels_
         purity = metric.compute_purity(label_kmeans, label_y)
         nmi = normalized_mutual_info_score(label_y, label_kmeans)
         ari = adjusted_rand_score(label_y, label_kmeans)
         print('K-means: Purity = {}, NMI = {}, ARI = {}'.format(
             purity, nmi, ari))
         f = open('%s/log.txt' % self.save_dir, 'a+')
         f.write('%.4f\t%.4f\t%.4f\n' % (purity, nmi, ari))
         f.close()
Пример #2
0
    def evaluate(self, timestamp, epoch, run_kmeans=False):
        data_y1, label_y1, data_y2, label_y2 = self.y_sampler.load_all()
        data_x1_, data_x_onehot1_ = self.predict_x1(data_y1)
        data_x2_, data_x_onehot2_ = self.predict_x2(data_y2)
        np.savez('{}/data_at_{}.npz'.format(self.save_dir, epoch + 1),
                 data_x1_, data_x_onehot1_, label_y1, data_x2_,
                 data_x_onehot2_, label_y2)
        #scRNA-seq
        label_infer1 = np.argmax(data_x_onehot1_, axis=1)
        purity1 = metric.compute_purity(label_infer1, label_y1)
        nmi1 = normalized_mutual_info_score(label_y1, label_infer1)
        ari1 = adjusted_rand_score(label_y1, label_infer1)
        self.cluster_heatmap(epoch, label_infer1, label_y1, 'scRNA')
        print('CoupleRTM scRNA-seq: NMI = {}, ARI = {}, Purity = {}'.format(
            nmi1, ari1, purity1))
        #scATAC-seq
        label_infer2 = np.argmax(data_x_onehot2_, axis=1)
        purity2 = metric.compute_purity(label_infer2, label_y2)
        nmi2 = normalized_mutual_info_score(label_y2, label_infer2)
        ari2 = adjusted_rand_score(label_y2, label_infer2)
        self.cluster_heatmap(epoch, label_infer2, label_y2, 'scATAC')
        print('CoupleRTM scATAC-seq: NMI = {}, ARI = {}, Purity = {}'.format(
            nmi2, ari2, purity2))

        f = open('%s/log.txt' % self.save_dir, 'a+')
        f.write('%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%d\n' %
                (nmi1, ari1, purity1, nmi2, ari2, purity2, epoch))
        f.close()
        #k-means
        if run_kmeans:
            #scRNA-seq
            km = KMeans(n_clusters=nb_classes, random_state=0).fit(data_y1)
            label_kmeans = km.labels_
            purity = metric.compute_purity(label_kmeans, label_y1)
            nmi = normalized_mutual_info_score(label_y1, label_kmeans)
            ari = adjusted_rand_score(label_y1, label_kmeans)
            print('K-means scRNA-seq: NMI = {}, ARI = {}, Purity = {}'.format(
                nmi, ari, purity))
            f = open('%s/log.txt' % self.save_dir, 'a+')
            f.write(
                'K-means scRNA-seq: NMI = {}, ARI = {}, Purity = {}'.format(
                    nmi, ari, purity))
            #scATAC-seq
            km = KMeans(n_clusters=nb_classes, random_state=0).fit(data_y2)
            label_kmeans = km.labels_
            purity = metric.compute_purity(label_kmeans, label_y2)
            nmi = normalized_mutual_info_score(label_y2, label_kmeans)
            ari = adjusted_rand_score(label_y2, label_kmeans)
            print(
                'K-means scATAC-seq: NMI = {}, ARI = {}, Purity = {}\n'.format(
                    nmi, ari, purity))
            f = open('%s/log.txt' % self.save_dir, 'a+')
            f.write(
                'K-means scATAC-seq: NMI = {}, ARI = {}, Purity = {}'.format(
                    nmi, ari, purity))
            f.close()
Пример #3
0
    def evaluate(self, timestamp, epoch, run_kmeans=False):
        data_y, label_y = self.y_sampler.load_all()
        N = data_y.shape[0]
        data_x_, data_x_onehot_ = self.predict_x(data_y)
        np.savez('{}/data_at_{}.npz'.format(self.save_dir, epoch + 1), data_x_,
                 data_x_onehot_, label_y)
        label_infer = np.argmax(data_x_onehot_, axis=1)
        purity = metric.compute_purity(label_infer, label_y)
        nmi = normalized_mutual_info_score(label_y, label_infer)
        ari = adjusted_rand_score(label_y, label_infer)
        self.cluster_heatmap(epoch, label_infer, label_y)
        print('RTM: NMI = {}, ARI = {}, Purity = {}'.format(nmi, ari, purity))
        f = open('%s/log.txt' % self.save_dir, 'a+')
        f.write('RTM\t%.4f\t%.4f\t%.4f\t%d\n' % (nmi, ari, purity, epoch))
        km = KMeans(n_clusters=nb_classes, random_state=0).fit(data_x_)
        label_kmeans = km.labels_
        purity = metric.compute_purity(label_kmeans, label_y)
        nmi = normalized_mutual_info_score(label_y, label_kmeans)
        ari = adjusted_rand_score(label_y, label_kmeans)
        print('Latent-kmeans: NMI = {}, ARI = {}, Purity = {}'.format(
            nmi, ari, purity))
        f.write('Latent-kmeans\t%.4f\t%.4f\t%.4f\t%d\n' %
                (nmi, ari, purity, epoch))
        #k-means
        if run_kmeans:
            from sklearn.decomposition import PCA
            pca = PCA(n_components=10)
            pca.fit(data_y)
            data_pca_y = pca.fit_transform(data_y)

            km = KMeans(n_clusters=nb_classes, random_state=0).fit(data_pca_y)
            label_kmeans = km.labels_
            purity = metric.compute_purity(label_kmeans, label_y)
            nmi = normalized_mutual_info_score(label_y, label_kmeans)
            ari = adjusted_rand_score(label_y, label_kmeans)
            print('PCA + K-means: NMI = {}, ARI = {}, Purity = {}'.format(
                nmi, ari, purity))
            f.write('PCA+Kmeans%.4f\t%.4f\t%.4f\n' % (nmi, ari, purity))

            km = KMeans(n_clusters=nb_classes, random_state=0).fit(data_y)
            label_kmeans = km.labels_
            purity = metric.compute_purity(label_kmeans, label_y)
            nmi = normalized_mutual_info_score(label_y, label_kmeans)
            ari = adjusted_rand_score(label_y, label_kmeans)
            print('K-means: NMI = {}, ARI = {}, Purity = {}'.format(
                nmi, ari, purity))
            f.write('Kmeans%.4f\t%.4f\t%.4f\n' % (nmi, ari, purity))
            f.close()
Пример #4
0
 def correlation(self, X, Y, heatmap=False):
     nb_classes = len(set(Y))
     print nb_classes
     km = KMeans(n_clusters=nb_classes, random_state=0).fit(X)
     label_kmeans = km.labels_
     purity = metric.compute_purity(label_kmeans, Y)
     nmi = normalized_mutual_info_score(Y, label_kmeans)
     ari = adjusted_rand_score(Y, label_kmeans)
     homogeneity = homogeneity_score(Y, label_kmeans)
     ami = adjusted_mutual_info_score(Y, label_kmeans)
     print('NMI = {}, ARI = {}, Purity = {},AMI = {}, Homogeneity = {}'.
           format(nmi, ari, purity, ami, homogeneity))
     if heatmap:
         x_ticks = [''] * len(Y)
         y_ticks = [''] * len(Y)
         idx = []
         for i in range(nb_classes):
             sub_idx = [j for j, item in enumerate(Y) if item == i]
             idx += [j for j, item in enumerate(Y) if item == i]
             x_ticks[len(idx) - 1] = str(i)
         assert len(idx) == len(Y)
         X = X[idx, :]
         Y = Y[idx]
         #similarity_mat = pairwise_distances(X,metric='cosine')
         similarity_mat = cosine_similarity(X)
         #sns.heatmap(similarity_mat,cmap='Blues')
         fig, ax = plt.subplots()
         #ax.set_yticks(range(len(y_ticks)))
         ax.set_yticklabels(y_ticks)
         ax.set_xticks(range(len(x_ticks)))
         ax.set_xticklabels(x_ticks)
         im = ax.imshow(similarity_mat, cmap='Blues')
         plt.colorbar(im)
         plt.savefig('heatmap_%s_dim%d.png' % (self.name, X.shape[1]),
                     dpi=600)
Пример #5
0
    def _eval_cluster(self, latent_rep, labels_true, timestamp, val):

        map_labels = {0: 0, 1: 1, 2: 2, 4: 3, 6: 4, 7: 5, 8: 6, 9: 7}
        labels_true = np.array([map_labels[i] for i in labels_true])

        km = KMeans(n_clusters=max(self.num_classes,
                                   len(np.unique(labels_true))),
                    random_state=0).fit(latent_rep)
        labels_pred = km.labels_

        purity = metric.compute_purity(labels_pred, labels_true)
        ari = adjusted_rand_score(labels_true, labels_pred)
        nmi = normalized_mutual_info_score(labels_true, labels_pred)

        if val:
            data_split = 'Validation'
        else:
            data_split = 'Test'
            #data_split = 'All'

        print('Data = {}, Model = {}, sampler = {}, z_dim = {}, beta_reg = {}'.
              format(self.data, self.model, self.sampler, self.z_dim,
                     self.beta_reg))
        print(' #Points = {}, K = {}, Purity = {},  NMI = {}, ARI = {},  '.
              format(latent_rep.shape[0], self.num_classes, purity, nmi, ari))

        with open('logs/Res_{}_{}.txt'.format(self.data, self.model),
                  'a+') as f:
            f.write(
                '{}, {} : K = {}, z_dim = {}, beta_reg = {}, sampler = {}, Purity = {}, NMI = {}, ARI = {}\n'
                .format(timestamp, data_split, self.num_classes, self.z_dim,
                        self.beta_reg, self.sampler, purity, nmi, ari))
            f.flush()
Пример #6
0
    def evaluate(self,timestamp,batch_idx):
        if has_label:
            data_y, label_y = self.y_sampler.load_all()
        else:
            data_y = self.y_sampler.load_all()
        data_x_, data_x_onehot_ = self.predict_x(data_y)
        label_infer = np.argmax(data_x_onehot_, axis=1)
        print([list(label_infer).count(item) for item in np.unique(label_infer)])
        print([list(label_y).count(item) for item in np.unique(label_y)])

        # label_kmeans = KMeans(n_clusters=4, n_init = 200).fit_predict(data_y)
        # ari = adjusted_rand_score(label_y, label_kmeans)
        # nmi = normalized_mutual_info_score(label_y, label_kmeans)
        # print('kmeans',ari,nmi,self.nb_classes)
        # sys.exit()

        if has_label:
            purity = metric.compute_purity(label_infer, label_y)
            nmi = normalized_mutual_info_score(label_y, label_infer)
            ari = adjusted_rand_score(label_y, label_infer)
            h**o = homogeneity_score(label_y,label_infer)
            print('scDEC: NMI = {}, ARI = {}, Homogeneity = {}'.format(nmi,ari,h**o))
            if is_train:
                f = open('%s/log.txt'%self.save_dir,'a+')
                f.write('NMI = {}\tARI = {}\tHomogeneity = {}\t batch_idx = {}\n'.format(nmi,ari,h**o,batch_idx))
                f.close()
                np.savez('{}/data_at_{}.npz'.format(self.save_dir, batch_idx+1),data_x_,data_x_onehot_,label_y)
            else:
                np.savez('results/{}/data_pre.npz'.format(self.data),data_x_,data_x_onehot_,label_y)    

        else:
            if is_train:
                np.savez('{}/data_at_{}.npz'.format(self.save_dir, batch_idx+1),data_x_,data_x_onehot_)
            else:
                np.savez('results/{}/data_pre.npz'.format(self.data),data_x_,data_x_onehot_)
Пример #7
0
def analyze_cistopic():
    ratio = 0.7
    df = pd.read_csv('cisTopic/results/%s' % path,
                     sep=' ',
                     header=0,
                     index_col=[0])
    data = df.values
    data = data * 1.0 / np.sum(data, axis=0)
    data = data.T
    #mouse atlas
    labels = [
        item for item in open('datasets/scATAC/Mouse_atlas/label_%s.txt' %
                              str(ratio)).readlines()
    ]
    #forebrain
    #labels = loadmat('datasets/scATAC/scATAC-seq_data_for_liuqiao/REN/original/cell_labels.mat')['cell_labels']
    uniq_labels = list(np.unique(labels))
    Y = np.array([uniq_labels.index(item) for item in labels])
    #GMvsHL,
    #name='GMvsHL'
    #Y = np.load('datasets/scATAC/%s_label.npy'%name).astype('int64')
    print(data.shape)
    assert len(Y) == data.shape[0]
    km = KMeans(n_clusters=len(uniq_labels), random_state=0).fit(data)
    label_kmeans = km.labels_
    purity = metric.compute_purity(label_kmeans, Y)
    nmi = normalized_mutual_info_score(Y, label_kmeans)
    ari = adjusted_rand_score(Y, label_kmeans)
    print(nmi, ari, purity)
Пример #8
0
def cluster_eval(labels_true, labels_infer):
    purity = metric.compute_purity(labels_infer, labels_true)
    nmi = normalized_mutual_info_score(labels_true, labels_infer)
    ari = adjusted_rand_score(labels_true, labels_infer)
    homogeneity = homogeneity_score(labels_true, labels_infer)
    ami = adjusted_mutual_info_score(labels_true, labels_infer)
    #print('NMI = {}, ARI = {}, Purity = {},AMI = {}, Homogeneity = {}'.format(nmi,ari,purity,ami,homogeneity))
    return nmi, ari, homogeneity
Пример #9
0
 def correlation(self, X, Y, heatmap=False):
     nb_classes = len(set(Y))
     print(nb_classes)
     km = KMeans(n_clusters=nb_classes, random_state=0).fit(X)
     label_kmeans = km.labels_
     purity = metric.compute_purity(label_kmeans, Y)
     nmi = normalized_mutual_info_score(Y, label_kmeans)
     ari = adjusted_rand_score(Y, label_kmeans)
     homogeneity = homogeneity_score(Y, label_kmeans)
     ami = adjusted_mutual_info_score(Y, label_kmeans)
     print('NMI = {}, ARI = {}, Purity = {},AMI = {}, Homogeneity = {}'.
           format(nmi, ari, purity, ami, homogeneity))
Пример #10
0
def eval_cluster(pathname1, labels_pred, labels_true, no_of_spk, timestamp, z_dim, sampler, beta_cycle_label, beta_cycle_gen):

    purity = metric.compute_purity(labels_pred, labels_true)
    ari = adjusted_rand_score(labels_true, labels_pred)
    nmi = normalized_mutual_info_score(labels_true, labels_pred)

    print(' #Points = {}, K = {}, Purity = {},  NMI = {}, ARI = {},'.format(labels_pred.shape[0], no_of_spk, purity, nmi, ari))

    with open(pathname1 + '/Result.txt', 'a+') as f:
        f.write('{}, K = {}, z_dim = {}, beta_label = {}, beta_gen = {}, sampler = {}, Purity = {}, NMI = {}, ARI = {}\n'
                .format(timestamp, no_of_spk, z_dim, beta_cycle_label, beta_cycle_gen,
                        sampler, purity, nmi, ari))
        f.flush()
Пример #11
0
def cluster_eval(labels_true, latent_feat):
    assert len(labels_true) == latent_feat.shape[0]
    n_clusters = len(np.unique(labels_true))
    km = KMeans(n_clusters=n_clusters, n_init=20,
                random_state=0).fit(latent_feat)
    label_kmeans = km.labels_
    confusion_mat(labels_true, label_kmeans)
    purity = metric.compute_purity(label_kmeans, labels_true)
    nmi = normalized_mutual_info_score(labels_true, label_kmeans)
    ari = adjusted_rand_score(labels_true, label_kmeans)
    homogeneity = homogeneity_score(labels_true, label_kmeans)
    ami = adjusted_mutual_info_score(labels_true, label_kmeans)
    print('NMI = {}, ARI = {}, Purity = {},AMI = {}, Homogeneity = {}'.format(
        nmi, ari, purity, ami, homogeneity))
Пример #12
0
 def evaluate(self, timestamp, batch_idx, run_kmeans=False):
     data_y, label_y = self.y_sampler.load_all()
     N = data_y.shape[0]
     data_x_, data_x_onehot_ = self.predict_x(data_y)
     np.savez('{}/data_at_{}.npz'.format(self.save_dir, batch_idx + 1),
              data_x_, data_x_onehot_, label_y)
     label_infer = np.argmax(data_x_onehot_, axis=1)
     purity = metric.compute_purity(label_infer, label_y)
     nmi = normalized_mutual_info_score(label_y, label_infer)
     ari = adjusted_rand_score(label_y, label_infer)
     #print('scDEC: NMI = {}, ARI = {}, Purity = {}'.format(nmi,ari,purity))
     f = open('%s/log.txt' % self.save_dir, 'a+')
     f.write('NMI = {}\tARI = {}\tPurity = {}\t batch_idx = {}\n'.format(
         nmi, ari, purity, batch_idx))
     f.close()
Пример #13
0
def analyze_SCALE():
    ratio = 0.1
    data = np.loadtxt('SCALE/%s/feature.txt' % path,
                      delimiter='\t',
                      usecols=range(1, 1 + 10))
    labels = [
        item.strip()
        for item in open('datasets/scATAC/Mouse_atlas/label_%s.txt' %
                         str(ratio)).readlines()
    ]
    uniq_labels = list(np.unique(labels))
    Y = np.array([uniq_labels.index(item) for item in labels])
    print(data.shape)
    assert len(Y) == data.shape[0]
    km = KMeans(n_clusters=len(uniq_labels), random_state=0).fit(data)
    label_kmeans = km.labels_
    purity = metric.compute_purity(label_kmeans, Y)
    nmi = normalized_mutual_info_score(Y, label_kmeans)
    ari = adjusted_rand_score(Y, label_kmeans)
    print(nmi, ari, purity)
    plot_embedding(data,
                   np.array(labels),
                   save='SCALE/embedding_tsne_cont_%s.png' % path)