def erun(self):
        model_str = self.model
        # formatted data
        feas = format_data(self.data_name)

        # Define placeholders
        placeholders = get_placeholder(feas['adj'])

        # construct model
        d_real, discriminator, ae_model = get_model(model_str, placeholders,
                                                    feas['num_features'],
                                                    feas['num_nodes'],
                                                    feas['features_nonzero'])

        # Optimizer
        opt = get_optimizer(model_str, ae_model, discriminator, placeholders,
                            feas['pos_weight'], feas['norm'], d_real,
                            feas['num_nodes'])

        # Initialize session
        sess = tf.Session()
        sess.run(tf.global_variables_initializer())

        val_roc_score = []

        # Train model
        max_acc = 0
        for epoch in range(self.iteration):

            emb, avg_cost = update(ae_model, opt, sess, feas['adj_norm'],
                                   feas['adj_label'], feas['features'],
                                   placeholders, feas['adj'])

            lm_train = linkpred_metrics(feas['val_edges'],
                                        feas['val_edges_false'])
            roc_curr, ap_curr, _ = lm_train.get_roc_score(emb, feas)
            val_roc_score.append(roc_curr)

            print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
                  "{:.5f}".format(avg_cost), "val_roc=",
                  "{:.5f}".format(val_roc_score[-1]), "val_ap=",
                  "{:.5f}".format(ap_curr))

            # print(emb[feas['test_mask']].shape)
            # print(feas['y_train'].shape)
            # print(feas['y_test'].shape)
            if (epoch + 1) % 1 == 0:
                # train_mask = ~feas['test_mask']
                lin_model = LogisticRegression().fit(
                    emb[feas['train_mask']], np.argmax(feas['y_train'],
                                                       axis=1))
                # lm_test = nodepred_metrics(feas['test_mask'], lin_model.predict(emb['test_mask']))
                ac_score = accuracy_score(
                    np.argmax(feas['y_test'], axis=1),
                    lin_model.predict(emb[feas['test_mask']]))
                if ac_score > max_acc:
                    max_acc = ac_score
                print('Accuracy: ' + str(ac_score))
                print('Max Accuracy: ' + str(max_acc))
示例#2
0
    def erun(self):
        model_str = self.model
        # formatted data
        feas = format_data(self.data_name)
        # print('feas:{}'.format(feas))
        # Define placeholders
        placeholders = get_placeholder(feas['adj'])

        # construct model
        d_real, discriminator, ae_model = get_model(model_str,
                                                    placeholders,
                                                    feas['num_features'],
                                                    feas['num_nodes'],
                                                    feas['features_nonzero'],
                                                    cat=self.cat)

        # Optimizer
        opt = get_optimizer(model_str, ae_model, discriminator, placeholders,
                            feas['pos_weight'], feas['norm'], d_real,
                            feas['num_nodes'])

        # Initialize session
        sess = tf.Session()
        sess.run(tf.global_variables_initializer())

        val_roc_score = []

        # Train model
        for epoch in range(self.iteration):

            emb, avg_cost = update(ae_model, opt, sess, feas['adj_norm'],
                                   feas['adj_label'], feas['features'],
                                   placeholders, feas['adj'])

            lm_train = linkpred_metrics(feas['val_edges'],
                                        feas['val_edges_false'])
            roc_curr, ap_curr, _ = lm_train.get_roc_score(emb, feas)
            val_roc_score.append(roc_curr)

            print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
                  "{:.5f}".format(avg_cost), "val_roc=",
                  "{:.5f}".format(val_roc_score[-1]), "val_ap=",
                  "{:.5f}".format(ap_curr))

            fh = open('recoder_link_prediction.txt', 'a')
            fh.write('Epoch=%04d, train_loss=%.5f, val_roc=%.5f, val_ap=%.5f' %
                     (epoch + 1, avg_cost, val_roc_score[-1], ap_curr))
            fh.write('\r\n')
            if (epoch + 1) % 10 == 0:
                lm_test = linkpred_metrics(feas['test_edges'],
                                           feas['test_edges_false'])
                roc_score, ap_score, _ = lm_test.get_roc_score(emb, feas)
                print('Test ROC score: ' + str(roc_score))
                print('Test AP score: ' + str(ap_score))
                fh.write('Test ROC score=%f, Test AP score=%f' %
                         (roc_score, ap_score))
                fh.write('\r\n')
            fh.flush()
            fh.close()
示例#3
0
    def erun(self):
        # formatted data
        feas = format_data(self.data_name, self.train_length, FLAGS.time_decay)

        # Define placeholders
        placeholders = get_placeholder(feas['struct_adj_norms'][0])

        # construct model
        ae_model = get_model(placeholders, feas['feature_dim'],
                             feas['struct_features_nonzeros'][0],
                             feas['num_node'], self.train_length)

        # Optimizer
        opt = get_optimizer(ae_model, placeholders, self.train_length)

        # Initialize session
        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=True))
        sess.run(tf.global_variables_initializer())

        # Train model
        for epoch in range(self.iteration):
            for i in range(1):  # mean only use the first batch;
                avg_cost, feed_dict, struct_loss, temporal_loss = update(
                    opt, sess, feas, i, placeholders)
                print('dataname ', self.data_name, ' epoch ', epoch, 'batch ',
                      i, 'total ', avg_cost, 'struct ', struct_loss,
                      'temporal ', temporal_loss)

        embeddings = predict(ae_model, sess, feas, placeholders)
        embeddings = np.reshape(
            np.array(embeddings)[:, -1, :],
            [feas['num_node'], FLAGS.hidden3[-1]])
        print(embeddings)

        time_decay = FLAGS.time_decay
        alpha = FLAGS.alpha
        emb = FLAGS.hidden3
        emb = '-'.join(list(map(str, emb)))
        subdir = emb + '-' + str(time_decay) + '-' + str(alpha)

        write_path = '/home/huawei/risehuang/paper2/gcn_tcn/embedding/{}/'.format(
            subdir)
        if not os.path.exists(write_path):
            os.mkdir(write_path)
        write_path = '/home/huawei/risehuang/paper2/gcn_tcn/embedding/{}/{}/'.format(
            subdir, self.train_length)
        if not os.path.exists(write_path):
            os.mkdir(write_path)

        np.savetxt(write_path + '{}.txt'.format(self.data_name), embeddings)
示例#4
0
    def erun(self):
        model_str = self.model
        # formatted data
        feas = format_data(self.data_name)

        # Define placeholders
        placeholders = get_placeholder(feas['adj'])

        # construct model
        d_real, discriminator, ae_model = get_model(model_str, placeholders,
                                                    feas['num_features'],
                                                    feas['num_nodes'],
                                                    feas['features_nonzero'])

        # Optimizer
        opt = get_optimizer(model_str, ae_model, discriminator, placeholders,
                            feas['pos_weight'], feas['norm'], d_real,
                            feas['num_nodes'])

        # Initialize session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)
        sess.run(tf.global_variables_initializer())

        val_roc_score = []

        # Train model
        for epoch in range(self.iteration):

            emb, avg_cost = update(ae_model, opt, sess, feas['adj_norm'],
                                   feas['adj_label'], feas['features'],
                                   placeholders, feas['adj'])

            lm_train = linkpred_metrics(feas['val_edges'],
                                        feas['val_edges_false'])
            roc_curr, ap_curr, _ = lm_train.get_roc_score(emb, feas)
            val_roc_score.append(roc_curr)

            print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
                  "{:.5f}".format(avg_cost), "val_roc=",
                  "{:.5f}".format(val_roc_score[-1]), "val_ap=",
                  "{:.5f}".format(ap_curr))

            if (epoch + 1) % 10 == 0:
                lm_test = linkpred_metrics(feas['test_edges'],
                                           feas['test_edges_false'])
                roc_score, ap_score, _ = lm_test.get_roc_score(emb, feas)
                print('Test ROC score: ' + str(roc_score))
                print('Test AP score: ' + str(ap_score))

        sess.close()
示例#5
0
    def erun(self):
        model_str = self.model
        # formatted data
        feas = format_data(self.data_name)
        print("node num:" + str(feas['num_nodes']))
        # Define placeholders
        placeholders = get_placeholder(feas['adj'])

        # construct model
        d_real, discriminator, ae_model = get_model(model_str, placeholders,
                                                    feas['num_features'],
                                                    feas['num_nodes'],
                                                    feas['features_nonzero'])

        # Optimizer
        opt = get_optimizer(model_str, ae_model, discriminator, placeholders,
                            feas['pos_weight'], feas['norm'], d_real,
                            feas['num_nodes'])

        # Initialize session
        sess = tf.Session()
        sess.run(tf.global_variables_initializer())

        val_roc_score = []

        # Train model
        for epoch in range(self.iteration):

            emb, avg_cost = update(ae_model, opt, sess, feas['adj_norm'],
                                   feas['adj_label'], feas['features'],
                                   placeholders, feas['adj'])
            # lm_train = linkpred_metrics(feas['val_edges'], feas['val_edges_false'])
            # roc_curr, ap_curr, _ = lm_train.get_roc_score(emb, feas)
            # val_roc_score.append(roc_curr)

            #print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(avg_cost), "val_roc=", "{:.5f}".format(val_roc_score[-1]), "val_ap=", "{:.5f}".format(ap_curr))

            if (epoch + 1) % 10 == 0:
                print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
                      "{:.5f}".format(avg_cost))

                #lm_test = linkpred_metrics(feas['test_edges'], feas['test_edges_false'])
                #roc_score, ap_score,_ = lm_test.get_roc_score(emb, feas)
                #print('Test ROC score: ' + str(roc_score))
                #print('Test AP score: ' + str(ap_score))
        df = pd.DataFrame(emb)
        df.to_csv('output/{}-embedding.csv'.format(self.data_name),
                  index=True,
                  sep=',')
示例#6
0
    def erun(self):
        model_str = self.model

        # formatted data
        feas = format_data(self.data_name)

        # Define placeholders
        placeholders = get_placeholder(feas['adj'])

        # construct model
        d_real, discriminator, ae_model = get_model(model_str, placeholders,
                                                    feas['num_features'],
                                                    feas['num_nodes'],
                                                    feas['features_nonzero'])

        # Optimizer
        opt = get_optimizer(model_str, ae_model, discriminator, placeholders,
                            feas['pos_weight'], feas['norm'], d_real,
                            feas['num_nodes'])

        # Initialize session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)
        # sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
        sess.run(tf.global_variables_initializer())

        # Train model
        for epoch in range(self.iteration):
            emb, _ = update(ae_model, opt, sess, feas['adj_norm'],
                            feas['adj_label'], feas['features'], placeholders,
                            feas['adj'])

            if (epoch + 1) % 2 == 0:
                kmeans = KMeans(n_clusters=self.n_clusters,
                                random_state=0).fit(emb)
                print("Epoch:", '%04d' % (epoch + 1))
                predict_labels = kmeans.predict(emb)
                cm = clustering_metrics(feas['true_labels'], predict_labels)
                cm.evaluationClusterModelFromLabel()

        kmeans = KMeans(n_clusters=self.n_clusters, random_state=0).fit(emb)
        predict_labels = kmeans.predict(emb)

        import numpy as np
        np.save('results/emb.npy', emb, allow_pickle=False)
        np.save('results/labels.npy', predict_labels, allow_pickle=False)

        sess.close()
示例#7
0
    def erun(self):
        model_str = self.model
        # load data
        feas = format_data(self.data_name)
        print("feature number: {}".format(feas['num_features']))

        # Define placeholders
        placeholders = get_placeholder()

        # construct model
        gcn_model = get_model(model_str, placeholders, feas['num_features'],
                              feas['num_nodes'], feas['features_nonzero'])

        # Optimizer
        opt = get_optimizer(model_str, gcn_model, placeholders,
                            feas['num_nodes'], FLAGS.alpha)

        # Initialize session
        sess = tf.Session()
        sess.run(tf.global_variables_initializer())

        val_roc_score = []

        # Train model
        for epoch in range(1, self.iteration + 1):

            reconstruction_errors, reconstruction_loss = update(
                gcn_model, opt, sess, feas['adj_norm'], feas['adj_label'],
                feas['features'], placeholders, feas['adj'])
            if epoch % 10 == 0:
                print("Epoch:", '%04d' % (epoch), "train_loss=",
                      "{:.5f}".format(reconstruction_loss))

            if epoch % 100 == 0:
                y_true = [label[0] for label in feas['labels']]
                auc = roc_auc_score(y_true, reconstruction_errors)
                print(auc)

        sorted_errors = np.argsort(-reconstruction_errors, axis=0)
        with open('output/{}-ranking.txt'.format(self.data_name), 'w') as f:
            for index in sorted_errors:
                f.write("%s\n" % feas['labels'][index][0])

        df = pd.DataFrame({'AD-GCA': reconstruction_errors})
        df.to_csv('output/{}-scores.csv'.format(self.data_name),
                  index=False,
                  sep=',')
    def erun(self):
        model_str = self.model
        # load data
        feas = format_data(self.data_name)
        #print("feature number: {}".format(feas['num_features']))

        # Define placeholders
        placeholders = get_placeholder()

        # construct model
        gcn_model = get_model(model_str, placeholders, feas['num_features'],
                              feas['num_nodes'], feas['features_nonzero'])

        # Optimizer
        opt = get_optimizer(model_str, gcn_model, placeholders,
                            feas['num_nodes'], self.alpha)

        # Initialize session
        sess = tf.Session()
        sess.run(tf.global_variables_initializer())

        # Train model
        for epoch in range(1, self.iteration + 1):

            reconstruction_errors, reconstruction_loss = update(
                gcn_model, opt, sess, feas['adj_norm'], feas['adj_label'],
                feas['features'], placeholders, feas['adj'])
            '''
            if epoch % 10 == 0:
                print("Epoch:", '%04d' % (epoch), "train_loss=", "{:.5f}".format(reconstruction_loss))

            if epoch % 100 == 0:
                y_true = [label[0] for label in feas['labels']]
                auc = roc_auc_score(y_true, reconstruction_errors)
                print(auc)
            '''

        y_true = [label[0] for label in feas['labels']]
        auc = roc_auc_score(y_true, reconstruction_errors)
        return [self.data_name, self.alpha, auc]
        '''
示例#9
0
    def erun(self):
        model_str = self.model

        # formatted data
        feas = format_data(self.data_name)

        # Define placeholders
        placeholders = get_placeholder(feas['adj'])

        # construct model
        d_real, discriminator, ae_model = get_model(model_str, placeholders, feas['num_features'], feas['num_nodes'], feas['features_nonzero'], num_classes=self.n_clusters, cat=self.cat)

        # Optimizer
        opt = get_optimizer(model_str, ae_model, discriminator, placeholders, feas['pos_weight'], feas['norm'], d_real, feas['num_nodes'])

        # Initialize session
        sess = tf.Session()
        # sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
        sess.run(tf.global_variables_initializer())

        # Train model
        for epoch in range(self.iteration):
            if model_str in ['arga', 'arvga', 'vgcg']:
                emb, avg_loss = update_with_gan(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj'])
            elif model_str in ['gae', 'vgae', 'vgc']:
                emb, avg_loss = update(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj'])
            else:
                print('ERROR: model has not be included!')

            if (epoch+1) % 1 == 0:
                kmeans = KMeans(n_clusters=self.n_clusters, random_state=10).fit(emb)
                print("Epoch: {:04d} Loss: {:.4f}".format(epoch + 1, avg_loss))
                predict_labels = kmeans.predict(emb)
                # predict_labels = classes
                cm = clustering_metrics(feas['true_labels'], predict_labels)
                # cm.evaluationClusterModelFromLabel()
                acc, f1_macro, precision_macro, recall_macro, f1_micro, precision_micro, recall_micro, nmi, adjscore = cm.evaluationClusterModelFromLabel()
                print('ACC=%f, f1_macro=%f, precision_macro=%f, recall_macro=%f, f1_micro=%f, precision_micro=%f, recall_micro=%f, NMI=%f, ADJ_RAND_SCORE=%f' % (acc, f1_macro, precision_macro, recall_macro, f1_micro, precision_micro, recall_micro, nmi, adjscore))
        self._vis(emb, feas['true_labels'], self.data_name, self.n_clusters, label_flag= True)
        self._vis(emb, predict_labels, self.data_name, self.n_clusters, label_flag= False)
示例#10
0
文件: encoder.py 项目: basiralab/HADA
    def erun(self, adj, features):
        tf.reset_default_graph()
        model_str = self.model
        # formatted data
        feas = format_data_new(adj, coo_matrix(features))

        # Define placeholders
        placeholders = get_placeholder(feas['adj'])

        # construct model
        d_real, discriminator, ae_model = get_model(model_str, placeholders,
                                                    feas['num_features'],
                                                    feas['num_nodes'],
                                                    feas['features_nonzero'])

        # Optimizer
        opt = get_optimizer(model_str, ae_model, discriminator, placeholders,
                            feas['pos_weight'], feas['norm'], d_real,
                            feas['num_nodes'])

        # Initialize session
        sess = tf.Session()
        sess.run(tf.global_variables_initializer())

        # Train model
        for epoch in range(self.iteration):

            emb, avg_cost = update(ae_model, opt, sess, feas['adj_norm'],
                                   feas['adj_label'], feas['features'],
                                   placeholders, feas['adj'], features)
            print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
                  "{:.5f}".format(avg_cost))
            if (epoch + 1) == 5:
                break

        return emb
示例#11
0
features_tuple = sparse_to_tuple(features_lil.tocoo())
num_nodes = adj.shape[0]
features_sp = sparse_to_tuple(features_lil.tocoo())
num_features = features_sp[2][1]
features_nonzero = features_sp[1].shape[0]

pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
norm = 1
adj_label = adj_train + sp.eye(adj_train.shape[0])
adj_label = sparse_to_tuple(adj_label)

# In[2]:

# Define placeholders

placeholders = get_placeholder(adj)
d_real, discriminator, ae_model = get_model(placeholders, num_features,
                                            num_nodes, features_nonzero,
                                            attr_labels_list[-1], dim_attr)
opt = get_optimizer(ae_model, discriminator, placeholders, pos_weight, norm,
                    d_real, num_nodes, attr_labels_list)

# In[3]:

#train model
preds_all = None
labels_all = None
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for epoch in range(FLAGS.epochs):
    emb, emb_long, avg_cost, attr_loss, pri_loss, link_loss = update(
示例#12
0
    def erun(self):
        model_str = self.model
        # formatted data
        feas = format_data(self.data_name)

        # Define placeholders
        # 定义placeholders,get_placeholder函数中只需要传入一个参数,即adj,函数中需要用到adj.shape
        placeholders = get_placeholder(feas['adj'], feas['num_features'])

        #定义由Dpp和密度估计出来的混合高斯
        DPP = FiniteDPP('correlation', **{'K': feas['adj'].toarray()})
        #DPP.sample_exact_k_dpp(size=4)
        pca = PCA(n_components=FLAGS.hidden2)

        #index = DPP.list_of_samples[0]

        if self.data_name == 'cora':
            DPP.sample_exact_k_dpp(size=21)
            index = DPP.list_of_samples[0]
            pass
        elif self.data_name == 'citeseer':

            index = np.array([
                1782, 741, 3258, 3189, 3112, 2524, 2895, 1780, 1100, 2735,
                1318, 2944, 1825, 18, 987, 2564, 463, 6, 3173, 701, 1901, 2349,
                2786, 2412, 646, 2626, 2648, 1793, 432, 538, 1729, 1217, 1397,
                1932, 2850, 458, 2129, 702, 2934, 2030, 2882, 1393, 308, 1271,
                1106, 2688, 629, 1145, 3251, 1903, 1004, 1149, 1385, 285, 858,
                2977, 844, 335, 532, 404, 3174, 528
            ])

        elif self.data_name == 'pubmed':
            index = np.array(
                [842, 3338, 5712, 17511, 10801, 2714, 6970, 13296, 5466, 2230])
        feature_sample = feas['features_dense']
        feature_sample = pca.fit_transform(feature_sample)

        featuresCompress = np.array([feature_sample[i] for i in index])
        #featuresCompress = np.array(feature_sample)
        kde = KernelDensity(bandwidth=0.7).fit(featuresCompress)

        # construct model
        d_real, discriminator, ae_model, model_z2g, D_Graph, GD_real = get_model(
            model_str, placeholders, feas['num_features'], feas['num_nodes'],
            feas['features_nonzero'])

        # Optimizer
        opt = get_optimizer(model_str, ae_model, model_z2g, D_Graph,
                            discriminator, placeholders, feas['pos_weight'],
                            feas['norm'], d_real, feas['num_nodes'], GD_real)

        # Initialize session

        #config = tf.ConfigProto()
        #config.gpu_options.allow_growth = True
        #sess = tf.Session(config = config)
        sess = tf.Session()
        sess.run(tf.global_variables_initializer())

        val_roc_score = []
        record = []
        record_emb = []
        # Train model
        for epoch in range(self.iteration):

            emb, avg_cost = update(ae_model, opt, sess, feas['adj_norm'],
                                   feas['adj_label'], feas['features'],
                                   placeholders, feas['adj'], kde,
                                   feas['features_dense'])

            lm_train = linkpred_metrics(feas['val_edges'],
                                        feas['val_edges_false'])
            roc_curr, ap_curr, _ = lm_train.get_roc_score(emb, feas)
            val_roc_score.append(roc_curr)
            print(
                "Epoch:", '%04d' % (epoch + 1),
                "train_loss= {:.5f}, d_loss= {:.5f}, g_loss= {:.5f}, GD_loss= {:.5f}, GG_loss= {:.5f}"
                .format(avg_cost[0], avg_cost[1], avg_cost[2], avg_cost[3],
                        avg_cost[4]), "val_roc=",
                "{:.5f}".format(val_roc_score[-1]), "val_ap=",
                "{:.5f}".format(ap_curr))

            if (epoch + 1) % 10 == 0:
                lm_test = linkpred_metrics(feas['test_edges'],
                                           feas['test_edges_false'])
                roc_score, ap_score, _ = lm_test.get_roc_score(emb, feas)
                print('Test ROC score: ' + str(roc_score))
                print('Test AP score: ' + str(ap_score))
                record.append([roc_score, ap_score])
                record_emb.append(emb)
        rec = np.array(record)
        index = rec[:, 0].tolist().index(max(rec[:, 0].tolist()))
        emb = record_emb[index]
        ana = record[index]
        scio.savemat('result/{}_link_64_64_new.mat'.format(self.data_name), {
            'embedded': emb,
            'labels': feas['true_labels']
        })
        print('The peak val_roc=%f, ap = %f' % (ana[0], ana[1]))
示例#13
0
    def erun(self):
        tf.reset_default_graph()
        model_str = self.model
        # formatted data
        feas = format_data(self.data_name)
        placeholders = get_placeholder(feas['adjs'], feas['numView'])
        # construct model
        ae_model = get_model(model_str, placeholders, feas['numView'],
                             feas['num_features'], feas['num_nodes'],
                             self.n_clusters)
        # Optimizer
        opt = get_optimizer(model_str, ae_model, feas['numView'], placeholders,
                            feas['num_nodes'])
        # Initialize session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True  #设置tf模式为按需赠长模式
        sess = tf.Session(config=config)

        sess.run(tf.global_variables_initializer())
        # Train model

        pos_weights = feas['pos_weights']
        fea_pos_weights = feas['fea_pos_weights']

        for epoch in range(self.iterations):
            reconstruct_loss = update(ae_model,
                                      opt,
                                      sess,
                                      feas['adjs'],
                                      feas['adjs_label'],
                                      feas['features'],
                                      placeholders,
                                      pos_weights,
                                      fea_pos_weights,
                                      feas['norms'],
                                      attn_drop=0.,
                                      ffd_drop=0.)

            print('reconstruct_loss', reconstruct_loss)

            if (epoch + 1) % 10 == 0:
                emb_ind = update_test(ae_model,
                                      opt,
                                      sess,
                                      feas['adjs'],
                                      feas['adjs_label'],
                                      feas['features'],
                                      placeholders,
                                      pos_weights=pos_weights,
                                      fea_pos_weights=fea_pos_weights,
                                      norm=feas['norms'],
                                      attn_drop=0,
                                      ffd_drop=0)
                kmeans = KMeans(n_clusters=self.n_clusters).fit(emb_ind)
                print("PAP Epoch:", '%04d' % (epoch + 1))
                predict_labels = kmeans.predict(emb_ind)
                #print('emb1', emb_ind[1])
                label_num = count_num(predict_labels)
                print('view1 label_num:', label_num)
                cm = clustering_metrics(label_mask(feas['true_labels']),
                                        predict_labels)
                acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel(
                )
                NMIs.append(nmi)
                loss.append(reconstruct_loss)
        kmeans = KMeans(n_clusters=self.n_clusters).fit(emb_ind)
        y_pred_last = kmeans.labels_
        cm = clustering_metrics(label_mask(feas['true_labels']), y_pred_last)
        acc, f1_macro, precision_macro, nmi, adjscore, idx = cm.evaluationClusterModelFromLabel(
        )
        init_cluster = tf.constant(kmeans.cluster_centers_)
        sess.run(
            tf.assign(ae_model.cluster_layer.vars['clusters'], init_cluster))
        q = compute_q(ae_model,
                      opt,
                      sess,
                      feas['adjs'],
                      feas['adjs_label'],
                      feas['features'],
                      placeholders,
                      pos_weights,
                      fea_pos_weights,
                      feas['norms'],
                      attn_drop=0.,
                      ffd_drop=0.)
        p = target_distribution(q)
        for epoch in range(self.kl_iterations):
            emb, kl_loss = update_kl(ae_model,
                                     opt,
                                     sess,
                                     feas['adjs'],
                                     feas['adjs_label'],
                                     feas['features'],
                                     p,
                                     placeholders,
                                     pos_weights,
                                     fea_pos_weights,
                                     feas['norms'],
                                     attn_drop=0.,
                                     ffd_drop=0.,
                                     idx=idx,
                                     label=label_mask(feas['true_labels']))
            if epoch % 10 == 0:
                kmeans = KMeans(n_clusters=self.n_clusters).fit(emb)
                predict_labels = kmeans.predict(emb)
                cm = clustering_metrics(label_mask(feas['true_labels']),
                                        predict_labels)
                acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel(
                )
                NMIs.append(nmi)
                loss.append(kl_loss)
            if epoch % 5 == 0:
                q = compute_q(ae_model,
                              opt,
                              sess,
                              feas['adjs'],
                              feas['adjs_label'],
                              feas['features'],
                              placeholders,
                              pos_weights,
                              fea_pos_weights,
                              feas['norms'],
                              attn_drop=0.,
                              ffd_drop=0.)
                p = target_distribution(q)
                y_pred = q.argmax(1)
                delta_label = np.sum(y_pred != y_pred_last).astype(
                    np.float32) / y_pred.shape[0]
                y_pred_last = y_pred
                print('delta_label', delta_label)
                print("Epoch:", '%04d' % (epoch + 1))
                kmeans = KMeans(n_clusters=self.n_clusters).fit(emb)
                predict_labels = kmeans.predict(emb)
                cm = clustering_metrics(label_mask(feas['true_labels']),
                                        predict_labels)
                acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel(
                )
                if epoch > 0 and delta_label < self.tol:
                    print("early_stop")
                    break
        print('NMI', NMIs)
        print('loss', loss)
        return acc, f1_macro, precision_macro, nmi, adjscore
示例#14
0
    def erun(self, writer):
        model_str = self.model
        # load data
        feas = format_data(self.data_name)

        print("feature number: {}".format(feas['num_features']))
        # Define placeholders
        placeholders = get_placeholder()

        num_features = feas['num_features']
        features_nonzero = feas['features_nonzero']
        num_nodes = feas['num_nodes']

        if model_str == 'Dominant':
            model = GCNModelAE(placeholders, num_features, features_nonzero)
            opt = OptimizerAE(preds_attribute=model.attribute_reconstructions,
                              labels_attribute=tf.sparse_tensor_to_dense(placeholders['features']),
                              preds_structure=model.structure_reconstructions,
                              labels_structure=tf.sparse_tensor_to_dense(placeholders['adj_orig']), alpha=FLAGS.alpha)

        elif model_str == 'AnomalyDAE':
            print(placeholders, num_features, num_nodes, features_nonzero, self.decoder_act)
            model = AnomalyDAE(placeholders, num_features, num_nodes, features_nonzero, self.decoder_act, self.k)
            opt = OptimizerDAE(preds_attribute=model.attribute_reconstructions,
                               labels_attribute=tf.sparse_tensor_to_dense(placeholders['features']),
                               preds_structure=model.structure_reconstructions,
                               labels_structure=tf.sparse_tensor_to_dense(placeholders['adj_orig']), alpha=FLAGS.alpha,
                               eta=FLAGS.eta, theta=FLAGS.theta)

        # Initialize session
        sess = tf.Session()
        sess.run(tf.global_variables_initializer())
        tf.reset_default_graph()

        # Train model
        for epoch in range(1, self.iteration+1):

            train_loss, loss_struc, loss_attr, rec_error = update(model, opt, sess,
                                                                feas['adj_norm'],
                                                                feas['adj_label'],
                                                                feas['features'],
                                                                placeholders, feas['adj'])
#            print(train_loss, loss_struc, loss_attr, rec_error)

            if epoch % 1 == 0:

                y_true = [label[0] for label in feas['labels']]

                auc=0
                try:
                    scores = np.array(rec_error)
                    scores = (scores - np.min(scores)) / (
                            np.max(scores) - np.min(scores))

                    auc = roc_auc_score(y_true, scores)

                except Exception:
                    print("[ERROR] for auc calculation!!!")
#                print("Accuracy:", accuracy_score(y_true, scores.round()))
                print("Epoch:", '%04d' % (epoch),
                      "AUC={:.5f}".format(round(auc,4)),
                      "train_loss={:.5f}".format(train_loss))
#                      "loss_struc={:.5f}".format(loss_struc),
#                      "loss_attr={:.5f}".format(loss_attr))

                writer.add_scalar('loss_total', train_loss, epoch)
                writer.add_scalar('loss_struc', loss_struc, epoch)
                writer.add_scalar('loss_attr', loss_attr, epoch)
                writer.add_scalar('auc', auc, epoch)
                anomaly_count = sum(y_true)
                print("There are", anomaly_count,  "Anomalous nodes:", np.sort(scores.argsort()[-anomaly_count:]))
示例#15
0
    def erun(self):
        model_str = self.model

        # formatted data
        feas = format_data(self.data_name)

        # Define placeholders
        placeholders = get_placeholder(feas['adj'], feas['num_features'])
        
        #定义由Dpp和密度估计出来的混合高斯
        DPP = FiniteDPP('correlation',**{'K': feas['adj'].toarray()})
        #DPP.sample_exact_k_dpp(size=4)
        pca = PCA(n_components = FLAGS.hidden2)
        
        #index = DPP.list_of_samples[0]
        
        if self.data_name == 'cora':
            DPP.sample_exact_k_dpp(size=24)
            index = DPP.list_of_samples[0]
        elif self.data_name == 'citeseer':
            #'''
            index = np.array([481, 1763, 1701,  171, 1425,  842])#epoch 36时最高 0.571
            #'''
            #'''
            index = np.array([3165,  589, 1283, 1756, 2221, 2409])#50时可以达到0.545 
            #'''
            #'''
            index = np.array([2300, 2725, 3313, 1216, 2821, 2432])#50 
            #'''
            '''index = np.array([1718, 3241,  787, 2727,  624, 3110, 1503, 1867, 2410, 1594, 1203,
        2711,  171, 1790, 1778,  294,  685,   39, 1700, 2650, 2028, 2573,
         375, 2744, 2302, 1876,  784, 2233, 2546, 1793, 1677, 3278, 2587,
        2623, 1018, 1160, 3166,  668, 1663, 3007,  864, 2893,  743, 3129,
        3104, 3277, 1643, 3047,  322,  298, 2894,   35, 2578, 2031, 3316,
        1815,  361, 1868, 1546, 1895, 1514,  636])#这个性能最高'''
        
        elif self.data_name == 'pubmed':
            index = np.array([  842,  3338,  5712, 17511, 10801,  2714,  6970, 13296,  5466,
         2230, 14052])
        feature_sample = feas['features_dense']
        feature_sample = pca.fit_transform(feature_sample)
        
        featuresCompress = np.array([feature_sample[i] for i in index])
        kde = KernelDensity(bandwidth=0.7).fit(featuresCompress)

        # construct model
        d_real, discriminator, ae_model, model_z2g, D_Graph, GD_real = get_model(model_str, placeholders, feas['num_features'], feas['num_nodes'], feas['features_nonzero'])

        # Optimizer
        opt = get_optimizer(model_str, ae_model, model_z2g, D_Graph, discriminator, placeholders, feas['pos_weight'], feas['norm'], d_real, feas['num_nodes'], GD_real)

        # Initialize session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.Session(config = config)
        sess.run(tf.global_variables_initializer())
        
        #record list
        record = []
        record_emb = []
        # Train model
        for epoch in range(self.iteration):
            emb, avg_cost = update(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj'],kde, feas['features_dense'])

            if (epoch+1) % 2 == 0:
                record_emb.append(emb)
                kmeans = KMeans(n_clusters=self.n_clusters, random_state=0).fit(emb)
                print("Epoch:", '%04d' % (epoch + 1))
                predict_labels = kmeans.predict(emb)
                cm = clustering_metrics(feas['true_labels'], predict_labels)
                [a,b,c] = cm.evaluationClusterModelFromLabel()
                record.append([a,b,c])
        rec = np.array(record)
        index = rec[:,0].tolist().index(max(rec[:,0].tolist()))
        ana = record[index]
        print('------------------------------------',index)
        emb = record_emb[index]
        scio.savemat('result/{}.mat'.format(self.data_name),{'embedded':emb,
                                     'labels':feas['true_labels']})
        print('The peak ACC=%f, NMI=%f, ADJ_RAND_SCORE=%f' % (ana[0], ana[1], ana[2]))
示例#16
0
    def erun(self, writer):
        model_str = self.model
        # load data
        feas = format_data(self.data_name)

        print("feature number: {}".format(feas['num_features']))
        # Define placeholders
        placeholders = get_placeholder()

        num_features = feas['num_features']
        features_nonzero = feas['features_nonzero']
        num_nodes = feas['num_nodes']
        adj = feas['adj']
        m = 171743  #239738 flikr #71980 acm#171743 blog
        my_graph = Graph()
        my_graph.load_graph_from_weighted_edgelist(
            './data/BlogCatalog/BlogCatalog.edgelist')
        if self.detection_method == 'infomap':
            my_graph.infomap()
        elif self.detection_method == 'lpa':
            my_graph.lpa()
        x_1, y_1 = my_graph.output_data()
        print(x_1.shape, '!!!!@@@@@@@@@@@@@@@@@@@@')
        k1 = np.sum(x_1, axis=1)
        k2 = k1.reshape(k1.shape[0], 1)
        k1k2 = k1 * k2
        Eij = k1k2 / (2 * m)
        B = np.array(x_1 - Eij)
        #print(B.shape,'NNNNNNNNNNNNNNNNNNNNNNN')
        #-----------------------------------------------------read_graph(folder + 'citeseer.edgelist')
        if model_str == 'Dominant':
            model = GCNModelAE(placeholders, num_features, num_nodes, adj,
                               features_nonzero, 2000, 500, 128, 256, 128,
                               self.at)
            opt = OptimizerAE(preds_community=model.community_reconstructions,
                              labels_community=model.B,
                              z_mean=model.z,
                              z_arg=model.z_a,
                              preds_attribute=model.attribute_reconstructions,
                              labels_attribute=tf.sparse_tensor_to_dense(
                                  placeholders['features']),
                              preds_structure=model.structure_reconstructions,
                              labels_structure=tf.sparse_tensor_to_dense(
                                  placeholders['adj_orig']),
                              alpha=FLAGS.alpha,
                              eta=FLAGS.eta,
                              theta=FLAGS.theta,
                              num_nodes=num_nodes)

        elif model_str == 'AnomalyDAE':
            model = AnomalyDAE(placeholders, num_features, num_nodes,
                               features_nonzero, self.decoder_act)
            opt = OptimizerDAE(preds_attribute=model.attribute_reconstructions,
                               labels_attribute=tf.sparse_tensor_to_dense(
                                   placeholders['features']),
                               preds_structure=model.structure_reconstructions,
                               labels_structure=tf.sparse_tensor_to_dense(
                                   placeholders['adj_orig']),
                               alpha=FLAGS.alpha,
                               eta=FLAGS.eta,
                               theta=FLAGS.theta)

        # Initialize session
        sess = tf.Session()
        sess.run(tf.global_variables_initializer())
        tf.reset_default_graph()
        AVER_auc = 0
        # Train model
        for epoch in range(1, self.iteration + 1):

            train_loss, re_loss, kl_loss, loss_stru, loss_attr, rec_error = update(
                model, opt, sess, feas['adj_norm'], feas['adj_label'],
                feas['features'], placeholders, feas['adj'], B)

            if epoch % 1 == 0:
                y_true = [label[0] for label in feas['labels']]
                #print(y_true,'111111SSSSSSSSSSSSS')
                auc = 0
                try:
                    scores = np.array(rec_error)
                    scores = (scores - np.min(scores)) / (np.max(scores) -
                                                          np.min(scores))
                    #print(scores,'2222222222##########')
                    auc = roc_auc_score(y_true, scores)
                    AVER_auc = AVER_auc + auc
                except Exception:
                    print("[ERROR] for auc calculation!!!")

                print(
                    "Epoch:",
                    '%04d' % (epoch),
                    "AUC={:.5f}".format(round(auc, 4)),
                    #"train_loss={:.5f}".format(train_loss),
                    #"re_loss={:.5f}".format(re_loss),
                    "kl_loss={:.5f}".format(kl_loss),
                    "loss_struc={:.5f}".format(loss_stru),
                    "loss_attr={:.5f}".format(loss_attr))

                #writer.add_scalar('loss_total', train_loss, epoch)
                #writer.add_scalar('loss_re', re_loss, epoch)
                writer.add_scalar('loss_kl', kl_loss, epoch)
                writer.add_scalar('loss_struc', loss_stru, epoch)
                writer.add_scalar('loss_attr', loss_attr, epoch)
                writer.add_scalar('auc', auc, epoch)
        Aver = AVER_auc / self.iteration
        print(Aver, 'XXXXXXXXXXXXXXXXXXXXXXX')
示例#17
0
    def erun(self):
        tf.reset_default_graph()
        model_str = self.model
        # formatted data
        feas = format_data(self.data_name)
        placeholders = get_placeholder(feas['adjs'], feas['numView'])
        # construct model
        ae_model = get_model(model_str, placeholders, feas['numView'],
                             feas['num_features'], feas['num_nodes'],
                             self.n_clusters)
        # Optimizer
        opt = get_optimizer(model_str, ae_model, feas['numView'], placeholders,
                            feas['num_nodes'])
        # Initialize session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True  #设置tf模式为按需赠长模式
        sess = tf.Session(config=config)

        sess.run(tf.global_variables_initializer())
        # Train model

        pos_weights = feas['pos_weights']
        fea_pos_weights = feas['fea_pos_weights']
        for epoch in range(self.warm_iteration):
            reconstruct_loss = warm_update(ae_model,
                                           opt,
                                           sess,
                                           feas['numView'],
                                           feas['adjs'],
                                           feas['adjs_label'],
                                           feas['features'],
                                           placeholders,
                                           pos_weights,
                                           fea_pos_weights,
                                           feas['norms'],
                                           attn_drop=0.,
                                           ffd_drop=0.)
            print('reconstruct_loss', reconstruct_loss)
            if (epoch + 1) == 50:
                emb = warm_update_test(ae_model,
                                       opt,
                                       sess,
                                       feas['adjs'],
                                       feas['adjs_label'],
                                       feas['features'],
                                       placeholders,
                                       pos_weights=feas['pos_weights'],
                                       fea_pos_weights=fea_pos_weights,
                                       norm=feas['norms'],
                                       attn_drop=0,
                                       ffd_drop=0)
                avg_emb = (emb[0] + emb[1]) / 2

                kmeans = KMeans(n_clusters=self.n_clusters).fit(emb[0])
                print("Epoch:", '%04d' % (epoch + 1))
                predict_labels0 = kmeans.predict(emb[0])
                label_num = count_num(predict_labels0)

                cm = clustering_metrics(label_mask(feas['true_labels']),
                                        predict_labels0)
                acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel(
                )
                #enc = preprocessing.OneHotEncoder()
                #onehot_predict0 = enc.fit_transform(predict_labels0.reshape(-1, 1))
                #Q = eng.modul(adjs0, onehot_predict)
                #print('view0 Q', Q)

                kmeans = KMeans(n_clusters=self.n_clusters).fit(emb[1])
                print("Epoch:", '%04d' % (epoch + 1))
                predict_labels1 = kmeans.predict(emb[1])
                label_num = count_num(predict_labels1)

                cm = clustering_metrics(label_mask(feas['true_labels']),
                                        predict_labels1)
                acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel(
                )
                #onehot_predict1 = enc.fit_transform(predict_labels1.reshape(-1, 1))

                #Q = eng.modul(adjs1, onehot_predict)
                #print('view1 Q', Q)

                #scio.savemat('acm_modurity.mat', {'adj0':feas['adjs_label'][0],'onehot_predict0':onehot_predict0, 'adj1':feas['adjs_label'][1],'onehot_predict1':onehot_predict1})
                NMIs.append(nmi)
        print('NMIs', NMIs)
        print('warm up done!')

        for epoch in range(self.iterations):
            reconstruct_loss = update(ae_model,
                                      opt,
                                      sess,
                                      feas['adjs'],
                                      feas['adjs_label'],
                                      feas['features'],
                                      placeholders,
                                      pos_weights,
                                      fea_pos_weights,
                                      feas['norms'],
                                      attn_drop=0.,
                                      ffd_drop=0.)

            print('reconstruct_loss', reconstruct_loss)

            if (epoch + 1) % 10 == 0:
                emb_ind = update_test(ae_model,
                                      opt,
                                      sess,
                                      feas['adjs'],
                                      feas['adjs_label'],
                                      feas['features'],
                                      placeholders,
                                      pos_weights=pos_weights,
                                      fea_pos_weights=fea_pos_weights,
                                      norm=feas['norms'],
                                      attn_drop=0,
                                      ffd_drop=0)
                kmeans = KMeans(n_clusters=self.n_clusters).fit(
                    emb_ind[FLAGS.input_view])
                print("PAP Epoch:", '%04d' % (epoch + 1))
                predict_labels = kmeans.predict(emb_ind[FLAGS.input_view])
                #print('emb1', emb_ind[1])
                label_num = count_num(predict_labels)
                print('view1 label_num:', label_num)
                cm = clustering_metrics(label_mask(feas['true_labels']),
                                        predict_labels)
                acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel(
                )
                NMIs.append(nmi)
                loss.append(reconstruct_loss)
        kmeans = KMeans(n_clusters=self.n_clusters).fit(
            emb_ind[FLAGS.input_view])
        y_pred_last = kmeans.labels_
        cm = clustering_metrics(label_mask(feas['true_labels']), y_pred_last)
        acc, f1_macro, precision_macro, nmi, adjscore, idx = cm.evaluationClusterModelFromLabel(
        )
        init_cluster = tf.constant(kmeans.cluster_centers_)
        sess.run(
            tf.assign(ae_model.cluster_layer.vars['clusters'], init_cluster))
        q = compute_q(ae_model,
                      opt,
                      sess,
                      feas['adjs'],
                      feas['adjs_label'],
                      feas['features'],
                      placeholders,
                      pos_weights,
                      fea_pos_weights,
                      feas['norms'],
                      attn_drop=0.,
                      ffd_drop=0.)
        p = target_distribution(q)
        for epoch in range(self.kl_iterations):
            emb, kl_loss = update_kl(ae_model,
                                     opt,
                                     sess,
                                     feas['adjs'],
                                     feas['adjs_label'],
                                     feas['features'],
                                     p,
                                     placeholders,
                                     pos_weights,
                                     fea_pos_weights,
                                     feas['norms'],
                                     attn_drop=0.,
                                     ffd_drop=0.,
                                     idx=idx,
                                     label=label_mask(feas['true_labels']))
            if epoch % 10 == 0:
                kmeans = KMeans(n_clusters=self.n_clusters).fit(
                    emb[FLAGS.input_view])
                predict_labels = kmeans.predict(emb[FLAGS.input_view])
                cm = clustering_metrics(label_mask(feas['true_labels']),
                                        predict_labels)
                acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel(
                )
                NMIs.append(nmi)
                loss.append(kl_loss)
            if epoch % 5 == 0:
                q = compute_q(ae_model,
                              opt,
                              sess,
                              feas['adjs'],
                              feas['adjs_label'],
                              feas['features'],
                              placeholders,
                              pos_weights,
                              fea_pos_weights,
                              feas['norms'],
                              attn_drop=0.,
                              ffd_drop=0.)
                p = target_distribution(q)
                y_pred = q.argmax(1)
                delta_label = np.sum(y_pred != y_pred_last).astype(
                    np.float32) / y_pred.shape[0]
                y_pred_last = y_pred
                print('delta_label', delta_label)
                print("Epoch:", '%04d' % (epoch + 1))
                kmeans = KMeans(n_clusters=self.n_clusters).fit(
                    emb[FLAGS.input_view])
                predict_labels = kmeans.predict(emb[FLAGS.input_view])
                cm = clustering_metrics(label_mask(feas['true_labels']),
                                        predict_labels)
                acc, f1_macro, precision_macro, nmi, adjscore, _ = cm.evaluationClusterModelFromLabel(
                )
                if epoch > 0 and delta_label < self.tol:
                    print("early_stop")
                    break
        print('NMI', NMIs)
        print('loss', loss)
        save_embed(emb[FLAGS.input_view], 'emb_10.txt')
        return acc, f1_macro, precision_macro, nmi, adjscore