Python clustering_metrics示例，metrics.clustering_metrics Python示例

示例#1

0

显示文件

    def train(self):
        for i in range(self.epochs):
            print(f"epoch:{i}")
            pred_top, pred_att = self.train_loop()

            # measure accuracy on the train edges
            top_acc_function = tf.keras.metrics.BinaryAccuracy()
            top_acc_function.update_state(self.y_actual[0], pred_top)
            top_acc_function = top_acc_function.result().numpy()

            # measure the accuracy on the attributes
            att_acc_function = tf.keras.metrics.BinaryAccuracy()
            att_acc_function.update_state(self.y_actual[1].flatten(),
                                          tf.reshape(pred_att, [-1]))
            att_train_accuracy = att_acc_function.result().numpy()

            print(f"train top acc: {top_acc_function}")
            print(f"train att acc: {att_train_accuracy}")

            # get the labels from the embedding layer
            pred_labels_z = self.Z_np.argmax(1)
            pred_labels_x = self.X2_np.argmax(1)

            # get the accuracy pf the predicted labels
            cm = clustering_metrics(self.clustering_labels, pred_labels_z)
            res = cm.clusteringAcc()
            print("acc_z:{}, f1_z:{}".format(res[0], res[1]))

            cm = clustering_metrics(self.clustering_labels, pred_labels_x)
            res = cm.clusteringAcc()
            print("acc_x:{}, f1_x:{}".format(res[0], res[1]))

示例#2

0

显示文件

    def clustering_metrics(self, n_runs=10, compare_node_types=True):
        loader = self.trainvalidtest_dataloader()
        X_all, y_all, _ = next(iter(loader))
        self.cpu().forward(preprocess_input(X_all, device="cpu"))

        if not isinstance(self._embeddings, dict):
            self._embeddings = {
                list(self._node_ids.keys())[0]: self._embeddings
            }

        embeddings_all, types_all, y_true = self.dataset.get_embeddings_labels(
            self._embeddings, self._node_ids)

        # Record metrics for each run in a list of dict's
        res = [
            {},
        ] * n_runs
        for i in range(n_runs):
            y_pred = self.dataset.predict_cluster(n_clusters=len(
                y_true.unique()),
                                                  seed=i)

            if compare_node_types and len(self.dataset.node_types) > 1:
                res[i].update(
                    clustering_metrics(
                        y_true=types_all,
                        # Match y_pred to type_all's index
                        y_pred=types_all.index.map(
                            lambda idx: y_pred.get(idx, "")),
                        metrics=[
                            "homogeneity_ntype", "completeness_ntype",
                            "nmi_ntype"
                        ]))

            if y_pred.shape[0] != y_true.shape[0]:
                y_pred = y_pred.loc[y_true.index]
            res[i].update(
                clustering_metrics(
                    y_true,
                    y_pred,
                    metrics=["homogeneity", "completeness", "nmi"]))

        res_df = pd.DataFrame(res)
        metrics = res_df.mean(0).to_dict()
        return metrics

示例#3

0

显示文件

文件： clustering.py 项目： MGwave/deepwalk

def evaluate(emb, labels, K=2):
    kmeans = KMeans(K, random_state=0).fit(emb)
    predict_labels = kmeans.predict(emb)
    cm = clustering_metrics(labels, predict_labels)
    cm.evaluationClusterModelFromLabel()
    with open(
            '/home/zmm/advGraph/nettack-master/ourDefense/clusterLabel/dw_labels_polblogs',
            'wb') as f:
        pkl.dump(predict_labels, f)

示例#4

0

显示文件

文件： clustering.py 项目： illhyhl1111/ARGA

    def erun(self):
        model_str = self.model

        # formatted data
        feas = format_data(self.data_name)

        # Define placeholders
        placeholders = get_placeholder(feas['adj'])

        # construct model
        d_real, discriminator, ae_model = get_model(model_str, placeholders,
                                                    feas['num_features'],
                                                    feas['num_nodes'],
                                                    feas['features_nonzero'])

        # Optimizer
        opt = get_optimizer(model_str, ae_model, discriminator, placeholders,
                            feas['pos_weight'], feas['norm'], d_real,
                            feas['num_nodes'])

        # Initialize session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)
        # sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
        sess.run(tf.global_variables_initializer())

        # Train model
        for epoch in range(self.iteration):
            emb, _ = update(ae_model, opt, sess, feas['adj_norm'],
                            feas['adj_label'], feas['features'], placeholders,
                            feas['adj'])

            if (epoch + 1) % 2 == 0:
                kmeans = KMeans(n_clusters=self.n_clusters,
                                random_state=0).fit(emb)
                print("Epoch:", '%04d' % (epoch + 1))
                predict_labels = kmeans.predict(emb)
                cm = clustering_metrics(feas['true_labels'], predict_labels)
                cm.evaluationClusterModelFromLabel()

        kmeans = KMeans(n_clusters=self.n_clusters, random_state=0).fit(emb)
        predict_labels = kmeans.predict(emb)

        import numpy as np
        np.save('results/emb.npy', emb, allow_pickle=False)
        np.save('results/labels.npy', predict_labels, allow_pickle=False)

        sess.close()

示例#5

0

显示文件

    def erun(self):
        model_str = self.model

        # formatted data
        feas = format_data(self.data_name)

        # Define placeholders
        placeholders = get_placeholder(feas['adj'])

        # construct model
        d_real, discriminator, ae_model = get_model(model_str, placeholders, feas['num_features'], feas['num_nodes'], feas['features_nonzero'], num_classes=self.n_clusters, cat=self.cat)

        # Optimizer
        opt = get_optimizer(model_str, ae_model, discriminator, placeholders, feas['pos_weight'], feas['norm'], d_real, feas['num_nodes'])

        # Initialize session
        sess = tf.Session()
        # sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
        sess.run(tf.global_variables_initializer())

        # Train model
        for epoch in range(self.iteration):
            if model_str in ['arga', 'arvga', 'vgcg']:
                emb, avg_loss = update_with_gan(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj'])
            elif model_str in ['gae', 'vgae', 'vgc']:
                emb, avg_loss = update(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj'])
            else:
                print('ERROR: model has not be included!')

            if (epoch+1) % 1 == 0:
                kmeans = KMeans(n_clusters=self.n_clusters, random_state=10).fit(emb)
                print("Epoch: {:04d} Loss: {:.4f}".format(epoch + 1, avg_loss))
                predict_labels = kmeans.predict(emb)
                # predict_labels = classes
                cm = clustering_metrics(feas['true_labels'], predict_labels)
                # cm.evaluationClusterModelFromLabel()
                acc, f1_macro, precision_macro, recall_macro, f1_micro, precision_micro, recall_micro, nmi, adjscore = cm.evaluationClusterModelFromLabel()
                print('ACC=%f, f1_macro=%f, precision_macro=%f, recall_macro=%f, f1_micro=%f, precision_micro=%f, recall_micro=%f, NMI=%f, ADJ_RAND_SCORE=%f' % (acc, f1_macro, precision_macro, recall_macro, f1_micro, precision_micro, recall_micro, nmi, adjscore))
        self._vis(emb, feas['true_labels'], self.data_name, self.n_clusters, label_flag= True)
        self._vis(emb, predict_labels, self.data_name, self.n_clusters, label_flag= False)

示例#6

0

显示文件

def main(X, av, gnd, m, a, k, ind):

    N = X.shape[0]
    begin_time_filter = time()
    types = len(np.unique(gnd))
    S, begin_time = FGC_cora_modified(X, av, gnd, a, k, ind)
    D = np.sum(S, axis=1)
    D = np.power(D, -0.5)
    D[np.isinf(D)] = 0
    D[np.isnan(D)] = 0
    D = np.diagflat(D)  # (m,m)

    S_hat = D.dot(S)  # (m,n)

    S_hat_tmp = S_hat.dot(S_hat.T)  # (m,m)
    S_hat_tmp[np.isinf(S_hat_tmp)] = 0
    S_hat_tmp[np.isnan(S_hat_tmp)] = 0
    # sigma, E = scipy.linalg.eig(S_hat_tmp)
    E, sigma, v = sp.linalg.svds(S_hat_tmp, k=types, which='LM')
    sigma = sigma.T
    sigma = np.power(sigma, -0.5)
    sigma[np.isinf(sigma)] = 0
    sigma[np.isnan(sigma)] = 0
    sigma = np.diagflat(sigma)
    C_hat = (sigma.dot(E.T)).dot(S_hat)
    C_hat[np.isinf(C_hat)] = 0
    C_hat[np.isnan(C_hat)] = 0
    C_hat = C_hat.astype(float)
    kmeans = KMeans(n_clusters=types, random_state=37).fit(C_hat.T)

    predict_labels = kmeans.predict(C_hat.T)

    cm = clustering_metrics(gnd, predict_labels)
    ac, nm, f1, adj = cm.evaluationClusterModelFromLabel(m, a, k)
    end_time = time()
    tot_time = end_time - begin_time
    tot_time_filter = end_time - begin_time_filter
    return ac, nm, f1, adj, tot_time, tot_time_filter

示例#7

0

显示文件

     # v -> right singular values
     u, s, v = sp.linalg.svds(feature, k=k, which='LM')
 
     predict_labels = None
     for i in range(rep):
         # run kmeans on the current features
         kmeans = KMeans(n_clusters=k).fit(u)
         # get the predicted labels
         predict_labels = kmeans.predict(u)
         
         # measure the intra distance of the clusters
         intraD[i] = square_dist(predict_labels, feature)
         #intraD[i] = dist(predict_labels, feature)
         
         # measure the accuracy, F1-score and nmi
         cm = clustering_metrics(gnd, predict_labels)
         ac[i], nm[i], f1[i] = cm.evaluationClusterModelFromLabel()
     
     # save the scores 
     intra_list.append(np.mean(intraD))
     acc_list.append(np.mean(ac))
     stdacc_list.append(np.std(ac))
     nmi_list.append(np.mean(nm))
     stdnmi_list.append(np.std(nm))
     f1_list.append(np.mean(f1))
     stdf1_list.append(np.std(f1))
     
     print(f'power: {power}',
           f'intra_dist: {intra_list[-1]}',
           f'acc_mean: {acc_list[-1]}',
           f'acc_std: {stdacc_list[-1]}',

示例#8

0

显示文件

    def erun(self):
        tf.reset_default_graph()
        model_str = self.model
        # formatted data
        feas = format_data(self.data_name)
        placeholders = get_placeholder(feas['adjs'], feas['numView'])
        # construct model
        ae_model = get_model(model_str, placeholders, feas['numView'],
                             feas['num_features'], feas['num_nodes'],
                             self.n_clusters)
        # Optimizer
        opt = get_optimizer(model_str, ae_model, feas['numView'], placeholders,
                            feas['num_nodes'])
        # Initialize session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True  #设置tf模式为按需赠长模式
        sess = tf.Session(config=config)

        sess.run(tf.global_variables_initializer())
        # Train model

        pos_weights = feas['pos_weights']
        fea_pos_weights = feas['fea_pos_weights']

        for epoch in range(self.iterations):
            reconstruct_loss = update(ae_model,
                                      opt,
                                      sess,
                                      feas['adjs'],
                                      feas['adjs_label'],
                                      feas['features'],
                                      placeholders,
                                      pos_weights,
                                      fea_pos_weights,
                                      feas['norms'],
                                      attn_drop=0.,
                                      ffd_drop=0.)

            print('reconstruct_loss', reconstruct_loss)

            if (epoch + 1) % 10 == 0:
                emb_ind = update_test(ae_model,
                                      opt,
                                      sess,
                                      feas['adjs'],
                                      feas['adjs_label'],
                                      feas['features'],
                                      placeholders,
                                      pos_weights=pos_weights,
                                      fea_pos_weights=fea_pos_weights,
                                      norm=feas['norms'],
                                      attn_drop=0,
                                      ffd_drop=0)
                kmeans = KMeans(n_clusters=self.n_clusters).fit(emb_ind)
                print("PAP Epoch:", '%04d' % (epoch + 1))
                predict_labels = kmeans.predict(emb_ind)
                #print('emb1', emb_ind[1])
                label_num = count_num(predict_labels)
                print('view1 label_num:', label_num)
                cm = clustering_metrics(label_mask(feas['true_labels']),
                                        predict_labels)
                acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel(
                )
                NMIs.append(nmi)
                loss.append(reconstruct_loss)
        kmeans = KMeans(n_clusters=self.n_clusters).fit(emb_ind)
        y_pred_last = kmeans.labels_
        cm = clustering_metrics(label_mask(feas['true_labels']), y_pred_last)
        acc, f1_macro, precision_macro, nmi, adjscore, idx = cm.evaluationClusterModelFromLabel(
        )
        init_cluster = tf.constant(kmeans.cluster_centers_)
        sess.run(
            tf.assign(ae_model.cluster_layer.vars['clusters'], init_cluster))
        q = compute_q(ae_model,
                      opt,
                      sess,
                      feas['adjs'],
                      feas['adjs_label'],
                      feas['features'],
                      placeholders,
                      pos_weights,
                      fea_pos_weights,
                      feas['norms'],
                      attn_drop=0.,
                      ffd_drop=0.)
        p = target_distribution(q)
        for epoch in range(self.kl_iterations):
            emb, kl_loss = update_kl(ae_model,
                                     opt,
                                     sess,
                                     feas['adjs'],
                                     feas['adjs_label'],
                                     feas['features'],
                                     p,
                                     placeholders,
                                     pos_weights,
                                     fea_pos_weights,
                                     feas['norms'],
                                     attn_drop=0.,
                                     ffd_drop=0.,
                                     idx=idx,
                                     label=label_mask(feas['true_labels']))
            if epoch % 10 == 0:
                kmeans = KMeans(n_clusters=self.n_clusters).fit(emb)
                predict_labels = kmeans.predict(emb)
                cm = clustering_metrics(label_mask(feas['true_labels']),
                                        predict_labels)
                acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel(
                )
                NMIs.append(nmi)
                loss.append(kl_loss)
            if epoch % 5 == 0:
                q = compute_q(ae_model,
                              opt,
                              sess,
                              feas['adjs'],
                              feas['adjs_label'],
                              feas['features'],
                              placeholders,
                              pos_weights,
                              fea_pos_weights,
                              feas['norms'],
                              attn_drop=0.,
                              ffd_drop=0.)
                p = target_distribution(q)
                y_pred = q.argmax(1)
                delta_label = np.sum(y_pred != y_pred_last).astype(
                    np.float32) / y_pred.shape[0]
                y_pred_last = y_pred
                print('delta_label', delta_label)
                print("Epoch:", '%04d' % (epoch + 1))
                kmeans = KMeans(n_clusters=self.n_clusters).fit(emb)
                predict_labels = kmeans.predict(emb)
                cm = clustering_metrics(label_mask(feas['true_labels']),
                                        predict_labels)
                acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel(
                )
                if epoch > 0 and delta_label < self.tol:
                    print("early_stop")
                    break
        print('NMI', NMIs)
        print('loss', loss)
        return acc, f1_macro, precision_macro, nmi, adjscore

示例#9

0

显示文件

    def erun(self):
        model_str = self.model

        # formatted data
        feas = format_data(self.data_name)

        # Define placeholders
        placeholders = get_placeholder(feas['adj'], feas['num_features'])
        
        #定义由Dpp和密度估计出来的混合高斯
        DPP = FiniteDPP('correlation',**{'K': feas['adj'].toarray()})
        #DPP.sample_exact_k_dpp(size=4)
        pca = PCA(n_components = FLAGS.hidden2)
        
        #index = DPP.list_of_samples[0]
        
        if self.data_name == 'cora':
            DPP.sample_exact_k_dpp(size=24)
            index = DPP.list_of_samples[0]
        elif self.data_name == 'citeseer':
            #'''
            index = np.array([481, 1763, 1701,  171, 1425,  842])#epoch 36时最高 0.571
            #'''
            #'''
            index = np.array([3165,  589, 1283, 1756, 2221, 2409])#50时可以达到0.545 
            #'''
            #'''
            index = np.array([2300, 2725, 3313, 1216, 2821, 2432])#50 
            #'''
            '''index = np.array([1718, 3241,  787, 2727,  624, 3110, 1503, 1867, 2410, 1594, 1203,
        2711,  171, 1790, 1778,  294,  685,   39, 1700, 2650, 2028, 2573,
         375, 2744, 2302, 1876,  784, 2233, 2546, 1793, 1677, 3278, 2587,
        2623, 1018, 1160, 3166,  668, 1663, 3007,  864, 2893,  743, 3129,
        3104, 3277, 1643, 3047,  322,  298, 2894,   35, 2578, 2031, 3316,
        1815,  361, 1868, 1546, 1895, 1514,  636])#这个性能最高'''
        
        elif self.data_name == 'pubmed':
            index = np.array([  842,  3338,  5712, 17511, 10801,  2714,  6970, 13296,  5466,
         2230, 14052])
        feature_sample = feas['features_dense']
        feature_sample = pca.fit_transform(feature_sample)
        
        featuresCompress = np.array([feature_sample[i] for i in index])
        kde = KernelDensity(bandwidth=0.7).fit(featuresCompress)

        # construct model
        d_real, discriminator, ae_model, model_z2g, D_Graph, GD_real = get_model(model_str, placeholders, feas['num_features'], feas['num_nodes'], feas['features_nonzero'])

        # Optimizer
        opt = get_optimizer(model_str, ae_model, model_z2g, D_Graph, discriminator, placeholders, feas['pos_weight'], feas['norm'], d_real, feas['num_nodes'], GD_real)

        # Initialize session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.Session(config = config)
        sess.run(tf.global_variables_initializer())
        
        #record list
        record = []
        record_emb = []
        # Train model
        for epoch in range(self.iteration):
            emb, avg_cost = update(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj'],kde, feas['features_dense'])

            if (epoch+1) % 2 == 0:
                record_emb.append(emb)
                kmeans = KMeans(n_clusters=self.n_clusters, random_state=0).fit(emb)
                print("Epoch:", '%04d' % (epoch + 1))
                predict_labels = kmeans.predict(emb)
                cm = clustering_metrics(feas['true_labels'], predict_labels)
                [a,b,c] = cm.evaluationClusterModelFromLabel()
                record.append([a,b,c])
        rec = np.array(record)
        index = rec[:,0].tolist().index(max(rec[:,0].tolist()))
        ana = record[index]
        print('------------------------------------',index)
        emb = record_emb[index]
        scio.savemat('result/{}.mat'.format(self.data_name),{'embedded':emb,
                                     'labels':feas['true_labels']})
        print('The peak ACC=%f, NMI=%f, ADJ_RAND_SCORE=%f' % (ana[0], ana[1], ana[2]))

示例#10

0

显示文件

    def erun(self):
        tf.reset_default_graph()
        model_str = self.model
        # formatted data
        feas = format_data(self.data_name)
        placeholders = get_placeholder(feas['adjs'], feas['numView'])
        # construct model
        ae_model = get_model(model_str, placeholders, feas['numView'],
                             feas['num_features'], feas['num_nodes'],
                             self.n_clusters)
        # Optimizer
        opt = get_optimizer(model_str, ae_model, feas['numView'], placeholders,
                            feas['num_nodes'])
        # Initialize session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True  #设置tf模式为按需赠长模式
        sess = tf.Session(config=config)

        sess.run(tf.global_variables_initializer())
        # Train model

        pos_weights = feas['pos_weights']
        fea_pos_weights = feas['fea_pos_weights']
        for epoch in range(self.warm_iteration):
            reconstruct_loss = warm_update(ae_model,
                                           opt,
                                           sess,
                                           feas['numView'],
                                           feas['adjs'],
                                           feas['adjs_label'],
                                           feas['features'],
                                           placeholders,
                                           pos_weights,
                                           fea_pos_weights,
                                           feas['norms'],
                                           attn_drop=0.,
                                           ffd_drop=0.)
            print('reconstruct_loss', reconstruct_loss)
            if (epoch + 1) == 50:
                emb = warm_update_test(ae_model,
                                       opt,
                                       sess,
                                       feas['adjs'],
                                       feas['adjs_label'],
                                       feas['features'],
                                       placeholders,
                                       pos_weights=feas['pos_weights'],
                                       fea_pos_weights=fea_pos_weights,
                                       norm=feas['norms'],
                                       attn_drop=0,
                                       ffd_drop=0)
                avg_emb = (emb[0] + emb[1]) / 2

                kmeans = KMeans(n_clusters=self.n_clusters).fit(emb[0])
                print("Epoch:", '%04d' % (epoch + 1))
                predict_labels0 = kmeans.predict(emb[0])
                label_num = count_num(predict_labels0)

                cm = clustering_metrics(label_mask(feas['true_labels']),
                                        predict_labels0)
                acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel(
                )
                #enc = preprocessing.OneHotEncoder()
                #onehot_predict0 = enc.fit_transform(predict_labels0.reshape(-1, 1))
                #Q = eng.modul(adjs0, onehot_predict)
                #print('view0 Q', Q)

                kmeans = KMeans(n_clusters=self.n_clusters).fit(emb[1])
                print("Epoch:", '%04d' % (epoch + 1))
                predict_labels1 = kmeans.predict(emb[1])
                label_num = count_num(predict_labels1)

                cm = clustering_metrics(label_mask(feas['true_labels']),
                                        predict_labels1)
                acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel(
                )
                #onehot_predict1 = enc.fit_transform(predict_labels1.reshape(-1, 1))

                #Q = eng.modul(adjs1, onehot_predict)
                #print('view1 Q', Q)

                #scio.savemat('acm_modurity.mat', {'adj0':feas['adjs_label'][0],'onehot_predict0':onehot_predict0, 'adj1':feas['adjs_label'][1],'onehot_predict1':onehot_predict1})
                NMIs.append(nmi)
        print('NMIs', NMIs)
        print('warm up done!')

        for epoch in range(self.iterations):
            reconstruct_loss = update(ae_model,
                                      opt,
                                      sess,
                                      feas['adjs'],
                                      feas['adjs_label'],
                                      feas['features'],
                                      placeholders,
                                      pos_weights,
                                      fea_pos_weights,
                                      feas['norms'],
                                      attn_drop=0.,
                                      ffd_drop=0.)

            print('reconstruct_loss', reconstruct_loss)

            if (epoch + 1) % 10 == 0:
                emb_ind = update_test(ae_model,
                                      opt,
                                      sess,
                                      feas['adjs'],
                                      feas['adjs_label'],
                                      feas['features'],
                                      placeholders,
                                      pos_weights=pos_weights,
                                      fea_pos_weights=fea_pos_weights,
                                      norm=feas['norms'],
                                      attn_drop=0,
                                      ffd_drop=0)
                kmeans = KMeans(n_clusters=self.n_clusters).fit(
                    emb_ind[FLAGS.input_view])
                print("PAP Epoch:", '%04d' % (epoch + 1))
                predict_labels = kmeans.predict(emb_ind[FLAGS.input_view])
                #print('emb1', emb_ind[1])
                label_num = count_num(predict_labels)
                print('view1 label_num:', label_num)
                cm = clustering_metrics(label_mask(feas['true_labels']),
                                        predict_labels)
                acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel(
                )
                NMIs.append(nmi)
                loss.append(reconstruct_loss)
        kmeans = KMeans(n_clusters=self.n_clusters).fit(
            emb_ind[FLAGS.input_view])
        y_pred_last = kmeans.labels_
        cm = clustering_metrics(label_mask(feas['true_labels']), y_pred_last)
        acc, f1_macro, precision_macro, nmi, adjscore, idx = cm.evaluationClusterModelFromLabel(
        )
        init_cluster = tf.constant(kmeans.cluster_centers_)
        sess.run(
            tf.assign(ae_model.cluster_layer.vars['clusters'], init_cluster))
        q = compute_q(ae_model,
                      opt,
                      sess,
                      feas['adjs'],
                      feas['adjs_label'],
                      feas['features'],
                      placeholders,
                      pos_weights,
                      fea_pos_weights,
                      feas['norms'],
                      attn_drop=0.,
                      ffd_drop=0.)
        p = target_distribution(q)
        for epoch in range(self.kl_iterations):
            emb, kl_loss = update_kl(ae_model,
                                     opt,
                                     sess,
                                     feas['adjs'],
                                     feas['adjs_label'],
                                     feas['features'],
                                     p,
                                     placeholders,
                                     pos_weights,
                                     fea_pos_weights,
                                     feas['norms'],
                                     attn_drop=0.,
                                     ffd_drop=0.,
                                     idx=idx,
                                     label=label_mask(feas['true_labels']))
            if epoch % 10 == 0:
                kmeans = KMeans(n_clusters=self.n_clusters).fit(
                    emb[FLAGS.input_view])
                predict_labels = kmeans.predict(emb[FLAGS.input_view])
                cm = clustering_metrics(label_mask(feas['true_labels']),
                                        predict_labels)
                acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel(
                )
                NMIs.append(nmi)
                loss.append(kl_loss)
            if epoch % 5 == 0:
                q = compute_q(ae_model,
                              opt,
                              sess,
                              feas['adjs'],
                              feas['adjs_label'],
                              feas['features'],
                              placeholders,
                              pos_weights,
                              fea_pos_weights,
                              feas['norms'],
                              attn_drop=0.,
                              ffd_drop=0.)
                p = target_distribution(q)
                y_pred = q.argmax(1)
                delta_label = np.sum(y_pred != y_pred_last).astype(
                    np.float32) / y_pred.shape[0]
                y_pred_last = y_pred
                print('delta_label', delta_label)
                print("Epoch:", '%04d' % (epoch + 1))
                kmeans = KMeans(n_clusters=self.n_clusters).fit(
                    emb[FLAGS.input_view])
                predict_labels = kmeans.predict(emb[FLAGS.input_view])
                cm = clustering_metrics(label_mask(feas['true_labels']),
                                        predict_labels)
                acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel(
                )
                if epoch > 0 and delta_label < self.tol:
                    print("early_stop")
                    break
        print('NMI', NMIs)
        print('loss', loss)
        save_embed(emb[FLAGS.input_view], 'emb_10.txt')
        return acc, f1_macro, precision_macro, nmi, adjscore

示例#11

0

显示文件

文件： clustering.py 项目： MGwave/deepwalk

mat = loadmat(matfile)
A = mat['network']
graph = A.A  #1490*1490

# 1. Load Embeddings and labels
model = KeyedVectors.load_word2vec_format(embeddings_file, binary=False)
vocab = [int(i) for i in list(model.wv.vocab.keys())]
vocab.sort()
emb = numpy.asarray([model[str(node)] for node in vocab])  #1224*64
empty = [i for i in range(graph.shape[0]) if i not in vocab]  # 266
labels_all = mat['group'].nonzero()[1]  #(1490,)
labels = labels_all[vocab]  #(1224,)

kmeans = KMeans(2, random_state=0).fit(emb)
predict_labels = kmeans.predict(emb)
cm = clustering_metrics(labels, predict_labels)
cm.evaluationClusterModelFromLabel()
predict_labels_all = numpy.ones(graph.shape[0]) * (-1)
print(predict_labels_all)
print(predict_labels_all.shape)
print(predict_labels.shape)
predict_labels_all[vocab] = predict_labels
predict_labels_all = predict_labels_all.astype(int)
print(predict_labels_all)
with open(
        '/home/zmm/advGraph/nettack-master/ourDefense/clusterLabel/dw_labels_polblogs',
        'wb') as f:
    pkl.dump(predict_labels_all, f)

# python example_graphs/scoring.py --emb example_graphs/polblogs.embeddings --network example_graphs/polblogs.mat --num-shuffle 10 --all

示例#12

0

显示文件

def train(features, adj_train, adj_train_norm, train_edges, train_false_edges, clustering_labels , K):
    print("training")

    # intialize the adam optimizer
    optimizer = tf.keras.optimizers.Adam(learning_rate=LR)

    max_acc = 0
    max_f1 = 0
    max_top_acc = 0

    n_nodes = adj_train.shape[0]

    # convert the normalized adj and the features to tensors
    adj_train_norm_tensor = convert_sparse_matrix_to_sparse_tensor(adj_train_norm)
    feature_tensor = convert_sparse_matrix_to_sparse_tensor(features)
    
    # define the model
    model = MyModel(K, adj_train_norm_tensor)

    for i in range(epochs):
        
        with tf.GradientTape() as tape:
            # forward pass
            pred = model(feature_tensor)

            # get the predictions for edges that are not in the adj_train
            train_edges_p_pred = [pred[x[0]*adj_train.shape[0]+x[1]] for x in train_edges]
            train_edges_n_pred = [pred[x[0]*adj_train.shape[0] +x[1]] for x in train_false_edges]

            train_edges_p_l = [1]*len(train_edges_p_pred)
            train_edges_n_l = [0]*len(train_edges_n_pred)

            pred = train_edges_p_pred + train_edges_n_pred

            y_actual = train_edges_p_l+train_edges_n_l
            
            # if you whant to train on the entire original adj use the below line
            #y_actual = adj_train.toarray().flatten()

            # get the embeddings
            embeddings_np = model.get_encode().numpy()

            mu = model.get_mu()
            logvar = model.get_logvar()

            # get loss
            loss = total_loss(y_actual, pred, mu, logvar, n_nodes)

            # get gradient from loss 
            grad = tape.gradient(loss, model.trainable_variables)

            # optimize the weights
            optimizer.apply_gradients(zip(grad, model.trainable_variables))
        

        print("#"*30)
        print("epoch:{}, train loss: {}".format(i, loss))

        # get adj accuracy
        top_acc_function = tf.keras.metrics.BinaryAccuracy()
        top_acc_function.update_state(y_actual, pred)
        top_train_accuracy = top_acc_function.result().numpy()
        
        if(max_top_acc < top_train_accuracy):
            max_top_acc = top_train_accuracy

        print("train top acc: {}".format(top_train_accuracy))

        # get labels accuracy 
        pred_labels_x = embeddings_np.argmax(1)
            
        cm = clustering_metrics(labels, pred_labels_x)
        res = cm.clusteringAcc()
        print("acc:{}, f1:{}".format(res[0], res[2]))

        if(res[0] > max_acc):
            max_acc = res[0]
        if(res[2] > max_f1):
            max_f1 = res[2]
    
    print("max_acc:{}, max_f1:{}, max_top_acc: {}".format(max_acc, max_f1, max_top_acc))