Пример #1
0
model = None
if model_str == 'gcn_ae':
    model = GCNModelAE(placeholders, 300, 0)
elif model_str == 'gcn_vae':
    model = GCNModelVAE(placeholders, num_features, num_nodes, features_nonzero)





# Optimizer
with tf.name_scope('optimizer'):
    if model_str == 'gcn_ae':
        labels_placeholders = placeholders['adj_orig_values'] , placeholders['adj_orig_l2_splits'] , placeholders['adj_orig_l1_splits']
        opt = OptimizerAE(preds=model.reconstructions,batch_size=batchsize,
                          labels=labels_placeholders
                          ,pos_weight=placeholders['pos_weight'],
                          norm=placeholders['norm'] , roc=placeholders['ROC_Score'] , ap=placeholders['AP'])                          
    elif model_str == 'gcn_vae':
        opt = OptimizerVAE(preds=model.reconstructions,
                           labels=tf.reshape(tf.sparse_tensor_to_dense(placeholders['adj_orig_values'],
                                                                       validate_indices=False), [-1]),
                           model=model, num_nodes=num_nodes,
                           pos_weight=pos_weight,
                           norm=norm)

# Initialize session
sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)))   #sess = tf.Session()
#sess = tf_debug.LocalCLIDebugWrapperSession(sess)
sess.run(tf.global_variables_initializer())
 
files = os.listdir("C:\\Users\\USER\\Documents\\Projects\\MastersEnv\\GraphAutoEncoder\\gae_batch_update\\myData")
Пример #2
0
# Create model
model = None
if model_str == 'gcn_ae':
    model = GCNModelAE(placeholders, num_features, features_nonzero)
elif model_str == 'gcn_vae':
    model = GCNModelVAE(placeholders, num_features, num_nodes, features_nonzero)

pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

# Optimizer
with tf.name_scope('optimizer'):
    if model_str == 'gcn_ae':
        opt = OptimizerAE(preds=model.reconstructions,
                          labels=tf.reshape(tf.sparse_tensor_to_dense(placeholders['adj_orig'],
                                                                      validate_indices=False), [-1]),
                          pos_weight=pos_weight,
                          norm=norm)
    elif model_str == 'gcn_vae':
        opt = OptimizerVAE(preds=model.reconstructions,
                           labels=tf.reshape(tf.sparse_tensor_to_dense(placeholders['adj_orig'],
                                                                       validate_indices=False), [-1]),
                           model=model, num_nodes=num_nodes,
                           pos_weight=pos_weight,
                           norm=norm)



def load_checkpoints(sess):
  saver = tf.train.Saver()
  checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
Пример #3
0
def gae(filename, output_dir):

    # Settings
    flags = tf.app.flags
    FLAGS = flags.FLAGS
    flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
    flags.DEFINE_integer('epochs', 200, 'Number of epochs to train.')
    flags.DEFINE_integer('hidden1', 32, 'Number of units in hidden layer 1.')
    flags.DEFINE_integer('hidden2', 16, 'Number of units in hidden layer 2.')
    flags.DEFINE_float('weight_decay', 0.,
                       'Weight for L2 loss on embedding matrix.')
    flags.DEFINE_float('dropout', 0., 'Dropout rate (1 - keep probability).')
    flags.DEFINE_string('filename', 'email-Eu-core.mat', 'dataset')
    flags.DEFINE_string('model', 'gcn_vae', 'Model string.')
    flags.DEFINE_string('dataset', 'cora', 'Dataset string.')
    flags.DEFINE_integer('features', 0,
                         'Whether to use features (1) or not (0).')

    model_str = FLAGS.model
    # dataset_str = FLAGS.dataset

    # Load data
    # adj, features = load_data(dataset_str)
    adj, R, edges = load_network_data(filename)

    num_edges = np.sum(adj)
    length = adj.shape[0]
    A = np.array(adj, copy=True)
    adj = sp.csr_matrix(adj)
    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, train_edges = mask_test_edges(adj)
    adj = adj_train

    if FLAGS.features == 0:
        features = sp.identity(adj.shape[0])  # featureless

    # Some preprocessing
    adj_norm = preprocess_graph(adj)

    # Define placeholders
    placeholders = {
        'features': tf.sparse_placeholder(tf.float32),
        'adj': tf.sparse_placeholder(tf.float32),
        'adj_orig': tf.sparse_placeholder(tf.float32),
        'dropout': tf.placeholder_with_default(0., shape=())
    }

    num_nodes = adj.shape[0]

    features = sparse_to_tuple(features.tocoo())
    num_features = features[2][1]
    features_nonzero = features[1].shape[0]

    # Create model
    model = None
    if model_str == 'gcn_ae':
        model = GCNModelAE(placeholders, num_features, features_nonzero)
    elif model_str == 'gcn_vae':
        model = GCNModelVAE(placeholders, num_features, num_nodes,
                            features_nonzero)

    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float(
        (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

    # Optimizer
    with tf.name_scope('optimizer'):
        if model_str == 'gcn_ae':
            opt = OptimizerAE(preds=model.reconstructions,
                              labels=tf.reshape(
                                  tf.sparse_tensor_to_dense(
                                      placeholders['adj_orig'],
                                      validate_indices=False), [-1]),
                              pos_weight=pos_weight,
                              norm=norm)
        elif model_str == 'gcn_vae':
            opt = OptimizerVAE(preds=model.reconstructions,
                               labels=tf.reshape(
                                   tf.sparse_tensor_to_dense(
                                       placeholders['adj_orig'],
                                       validate_indices=False), [-1]),
                               model=model,
                               num_nodes=num_nodes,
                               pos_weight=pos_weight,
                               norm=norm)

    # Initialize session
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    adj_label = adj_train + sp.eye(adj_train.shape[0])
    adj_label = sparse_to_tuple(adj_label)

    # Train model
    for epoch in range(FLAGS.epochs):

        t = time.time()
        # Construct feed dictionary
        feed_dict = construct_feed_dict(adj_norm, adj_label, features,
                                        placeholders)
        feed_dict.update({placeholders['dropout']: FLAGS.dropout})
        # Run single weight update
        outs = sess.run([opt.opt_op, opt.cost, opt.accuracy],
                        feed_dict=feed_dict)
        # Compute average loss
        # avg_cost = outs[1]
        # avg_accuracy = outs[2]
        #
        # if (epoch + 1) % 10 == 0:
        #     print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(avg_cost),
        #           "train_acc=", "{:.5f}".format(avg_accuracy), "time=", "{:.5f}".format(time.time() - t))

    print("GAE Optimization Finished!")

    feed_dict.update({placeholders['dropout']: 0})
    emb = sess.run(model.z_mean, feed_dict=feed_dict)

    def sigmoid(x):
        return 1 / (1 + np.exp(-x))

    # Predict on test set of edges
    adj_rec = np.dot(emb, emb.T)
    adj_rec = np.array(adj_rec)
    # adj_rec = adj_rec[1:length, :][:, 1:length]
    DD = np.sort(adj_rec.flatten())
    threshold = DD[int(-1 * num_edges)]
    network_C = np.array([[
        0 if adj_rec[i, j] < threshold else 1 for i in range(adj_rec.shape[0])
    ] for j in range(adj_rec.shape[1])],
                         dtype=np.int8)
    # np.save('../data/GAE_network.npy', network_C[1:length, :][:, 1:length])
    os.chdir('../')
    np.save('{}/GAE_network.npy'.format(output_dir, filename),
            network_C[1:length, :][:, 1:length])

    A_copy = adj_rec
    final_network = [A_copy]
    # orinal_network = [A]
    for i in range(1, 5):
        adjacent_matrix = tf.placeholder(tf.float32, shape=A_copy.shape)
        R_matrix = tf.placeholder(tf.float32, shape=R[i - 1, 0].shape)
        A_copy = sess.run(tf.matmul(tf.matmul(R_matrix, adjacent_matrix),
                                    tf.transpose(R_matrix)),
                          feed_dict={
                              R_matrix: R[i - 1, 0].todense(),
                              adjacent_matrix: A_copy
                          })
        final_network.append(np.array(A_copy))

        # adjacent_matrix = tf.placeholder(tf.float32, shape=A.shape)
        # R_matrix = tf.placeholder(tf.float32, shape=R[i - 1, 0].shape)
        # A = sess.run(tf.matmul(tf.matmul(R_matrix, adjacent_matrix), tf.transpose(R_matrix)),
        #                        feed_dict={R_matrix: R[i - 1, 0].todense(), adjacent_matrix: A})
        # orinal_network.append(A)
    # draw_graph(final_network, edges, output_dir)
    network_B = final_network[0]
    print('Generating graph by GAE algorithm.')
    DD = np.sort(network_B.flatten())[::-1]
    threshold = DD[edges[0, 0]]
    network_C = np.array([[
        0 if network_B[i, j] < threshold else 1
        for i in range(network_B.shape[0])
    ] for j in range(network_B.shape[1])])
    _A_obs = network_C + network_C.T
    _A_obs[_A_obs > 1] = 1
    _A_obs = np.array(_A_obs)
    print('Computing metrics for graph generated by GAE')
    c = compute_graph_statistics(_A_obs)
    with open('{}/gae_network_statistics.pickle'.format(output_dir),
              'wb') as handle:
        pickle.dump(c, handle, protocol=pickle.HIGHEST_PROTOCOL)
    print(c)
Пример #4
0
    def __init__(self, graph_edgelist, num_actions, dimension, learning_rate=0.01, epochs=300, hidden1=32, hidden2=16,
                 dropout=0., model_str='gcn_vae', use_features=0):

        """Initialize ExactBasis."""
        if graph_edgelist is None:
            raise ValueError('graph cannot be None')

        if dimension < 1:
            raise ValueError('dimension must be >= 1')

        self.__num_actions = BasisFunction._validate_num_actions(num_actions)

        self._dimension = dimension

        adj, features = self.read_graph(graph_edgelist)

        # Store original adjacency matrix (without diagonal entries) for later
        adj_orig = adj
        adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
        adj_orig.eliminate_zeros()

        adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj)
        # adj = adj_train

        if use_features == 0:
            features = sp.identity(features.shape[0])  # featureless

        # Some preprocessing
        adj_norm = preprocess_graph(adj)

        # Define placeholders
        placeholders = {
            'features': tf.sparse_placeholder(tf.float32),
            'adj': tf.sparse_placeholder(tf.float32),
            'adj_orig': tf.sparse_placeholder(tf.float32),
            'dropout': tf.placeholder_with_default(0., shape=())
        }

        num_nodes = adj.shape[0]

        features = sparse_to_tuple(features.tocoo())
        num_features = features[2][1]
        features_nonzero = features[1].shape[0]

        # Create model
        model = None
        if model_str == 'gcn_ae':
            model = GCNModelAE(placeholders, num_features, features_nonzero, hidden1, hidden2, dimension)
        elif model_str == 'gcn_vae':
            model = GCNModelVAE(placeholders, num_features, num_nodes, features_nonzero, hidden1, dimension)

        pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
        norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

        # Optimizer
        with tf.name_scope('optimizer'):
            if model_str == 'gcn_ae':
                opt = OptimizerAE(preds=model.reconstructions,
                                  labels=tf.reshape(tf.sparse_tensor_to_dense(placeholders['adj_orig'],
                                                                              validate_indices=False), [-1]),
                                  pos_weight=pos_weight,
                                  norm=norm, learning_rate=learning_rate)
            elif model_str == 'gcn_vae':
                opt = OptimizerVAE(preds=model.reconstructions,
                                   labels=tf.reshape(tf.sparse_tensor_to_dense(placeholders['adj_orig'],
                                                                               validate_indices=False), [-1]),
                                   model=model, num_nodes=num_nodes,
                                   pos_weight=pos_weight,
                                   norm=norm, learning_rate=learning_rate)

        # Initialize session
        sess = tf.Session()
        sess.run(tf.global_variables_initializer())

        adj_label = adj_train + sp.eye(adj_train.shape[0])
        adj_label = sparse_to_tuple(adj_label)

        # Train model
        for epoch in range(epochs):
            t = time.time()
            # Construct feed dictionary
            feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders)
            feed_dict.update({placeholders['dropout']: dropout})
            # Run single weight update
            outs = sess.run([opt.opt_op, opt.cost, opt.accuracy], feed_dict=feed_dict)

        print("GCN Optimization Finished!")

        feed_dict.update({placeholders['dropout']: 0})
        self.embeddings = sess.run(model.z_mean, feed_dict=feed_dict)
Пример #5
0
    def gcn_multilayer(self):
        """Neural embedding of a multilayer network"""
        all_nodes = self.get_all_nodes()
        tmp_fname = pjoin(self.out_dir, 'tmp.emb')
        for net_name, net in self.nets.items():
            self.log.info('Run GCN For Net: %s' % net_name)
            # =============================================================
            adjacency_matrix = nx.adjacency_matrix(net)
            adjacency_matrix = adjacency_matrix.todense()
            nodes_count = adjacency_matrix.shape[0]
            adj = adjacency_matrix
            features = sp.identity(nodes_count)
            adj = sp.csr_matrix(adj)
            # ----------------myCode-----------------------------------
            # Store original adjacency matrix (without diagonal entries) for later
            adj_orig = adj
            adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
            adj_orig.eliminate_zeros()
            # tst_actual_matrix = adj.toarray()
            adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj)
            adj = adj_train
            # -----------------------------myCode-------------------------
            # if FLAGS.features == 0:
            #    features = sp.identity(features.shape[0])  # featureless
            # -----------------------------myCode-------------------------
            # Some pre processing
            adj_norm = preprocess_graph(adj)
            # Define placeholders
            placeholders = {
                'features': tf.sparse_placeholder(tf.float32),
                'adj': tf.sparse_placeholder(tf.float32),
                'adj_orig': tf.sparse_placeholder(tf.float32),
                'dropout': tf.placeholder_with_default(0., shape=())
            }
            num_nodes = adj.shape[0]
            features = sparse_to_tuple(features.tocoo())
            num_features = features[2][1]
            features_nonzero = features[1].shape[0]
            # Create model
            model = None
            if self.model_str == 'gcn_ae':
                model = GCNModelAE(placeholders, num_features, features_nonzero, self.hidden1, self.hidden2)
            elif self.model_str == 'gcn_vae':
                model = GCNModelVAE(placeholders, num_features, num_nodes, features_nonzero, self.hidden1, self.hidden2)

            pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
            norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

            # Optimizer
            with tf.name_scope('optimizer'):
                if self.model_str == 'gcn_ae':
                    opt = OptimizerAE(preds=model.reconstructions,
                                      labels=tf.reshape(tf.sparse_tensor_to_dense(placeholders['adj_orig'],
                                                                                  validate_indices=False), [-1]),
                                      pos_weight=pos_weight,
                                      norm=norm)
                elif self.model_str == 'gcn_vae':
                    opt = OptimizerVAE(preds=model.reconstructions,
                                       labels=tf.reshape(tf.sparse_tensor_to_dense(placeholders['adj_orig'],
                                                                                   validate_indices=False), [-1]),
                                       model=model, num_nodes=num_nodes,
                                       pos_weight=pos_weight,
                                       norm=norm)

            # Initialize session
            sess = tf.Session()
            sess.run(tf.global_variables_initializer())

            cost_val = []
            acc_val = []

            def get_roc_score(edges_pos, edges_neg, emb=None):
                if emb is None:
                    feed_dict.update({placeholders['dropout']: 0})
                    emb = sess.run(model.z_mean, feed_dict=feed_dict)

                def sigmoid(x):
                    return 1 / (1 + np.exp(-x))

                # Predict on test set of edges
                adj_rec = np.dot(emb, emb.T)
                preds = []
                pos = []
                for e in edges_pos:
                    preds.append(sigmoid(adj_rec[e[0], e[1]]))
                    pos.append(adj_orig[e[0], e[1]])

                preds_neg = []
                neg = []
                for e in edges_neg:
                    preds_neg.append(sigmoid(adj_rec[e[0], e[1]]))
                    neg.append(adj_orig[e[0], e[1]])

                preds_all = np.hstack([preds, preds_neg])
                labels_all = np.hstack([np.ones(len(preds)), np.zeros(len(preds_neg))])
                roc_score = roc_auc_score(labels_all, preds_all)
                ap_score = average_precision_score(labels_all, preds_all)

                return roc_score, ap_score

            cost_val = []
            acc_val = []
            val_roc_score = []
            adj_label = adj_train + sp.eye(adj_train.shape[0])
            adj_label = sparse_to_tuple(adj_label)
            # Train model
            # for epoch in range(FLAGS.epochs):
            # epochs = 10
            dropout = 0
            for epoch in range(self.n_iter):
                self.log.info('Iteration: %d' % epoch)
                t = time.time()
                # Construct feed dictionary
                feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders)
                # feed_dict.update({placeholders['dropout']: FLAGS.dropout})
                # -----------myCode------------
                feed_dict.update({placeholders['dropout']: dropout})
                # -----------myCode------------
                # Run single weight update
                outs = sess.run([opt.opt_op, opt.cost, opt.accuracy], feed_dict=feed_dict)

                # Compute average loss
                avg_cost = outs[1]
                avg_accuracy = outs[2]

                roc_curr, ap_curr = get_roc_score(val_edges, val_edges_false)
                val_roc_score.append(roc_curr)

                print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(avg_cost),
                      "train_acc=", "{:.5f}".format(avg_accuracy), "val_roc=", "{:.5f}".format(val_roc_score[-1]),
                      "val_ap=", "{:.5f}".format(ap_curr),
                      "time=", "{:.5f}".format(time.time() - t))

            print("Optimization Finished!")
            roc_score, ap_score = get_roc_score(test_edges, test_edges_false)
            print('Test ROC score: ' + str(roc_score))
            print('Test AP score: ' + str(ap_score))

            # ------vector generation -----------------------------
            vectors = sess.run(model.embeddings, feed_dict=feed_dict)
            fname = self.out_dir + net_name +'vectors.txt'
            # with open(fname, 'a+') as fout:
            #     for line in np.array(vectors):
            #         fout.write(line + "\n")
            np.savetxt(fname, np.array(vectors), fmt="%s", delimiter='  ')
            self.log.info('Saving vectors: %s' % fname)
            # ==============================================================
            self.log.info('after exec gcn : %s' % net_name)

        self.log.info('Done!')
Пример #6
0
    def run(self):
        if self.file_expr == '':
            # text-image-code combination
            n_by_n, x_train, y_train, train_mask, val_mask, test_mask, idx_supernodes, label_encoder = graph_generator.load_combo(
                self.labels_dict)
        else:
            n_by_n, x_train, y_train, train_mask, val_mask, test_mask, idx_supernodes, label_encoder = graph_generator.load_data(
                self.labels_dict,
                self.file_expr,
                min_valid_triples=self.min_valid_triples,
                sep=self.file_sep,
                select_rels=self.select_rels)
        self.idx_supernodes = idx_supernodes
        adj = nx.adjacency_matrix(nx.from_scipy_sparse_matrix(
            n_by_n))  #nx.adjacency_matrix(nx.from_numpy_array(n_by_n))
        features = scipy.sparse.csr.csr_matrix(x_train)

        # Store original adjacency matrix (without diagonal entries) for later
        adj_orig = adj
        adj_orig = adj_orig - sp.dia_matrix(
            (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
        adj_orig.eliminate_zeros()
        self.adj_orig = adj_orig

        adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges2(
            adj)
        adj = adj_train
        # Some preprocessing
        adj_norm = preprocess_graph(adj)
        num_nodes = adj.shape[0]

        if not self.use_features:
            features = sp.identity(features.shape[0])  # featureless
        features = sparse_to_tuple(features.tocoo())
        num_features = features[2][1]
        features_nonzero = features[1].shape[0]

        # Create model
        if model_str == 'gcn_ae':
            self.model = GCNModelAE(self.placeholders, num_features,
                                    features_nonzero)
        elif model_str == 'gcn_vae':
            self.model = GCNModelVAE(self.placeholders, num_features,
                                     num_nodes, features_nonzero)

        pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
        norm = adj.shape[0] * adj.shape[0] / float(
            (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

        # Optimizer
        with tf.name_scope('optimizer'):
            if model_str == 'gcn_ae':
                opt = OptimizerAE(preds=self.model.reconstructions,
                                  labels=tf.reshape(
                                      tf.sparse_tensor_to_dense(
                                          self.placeholders['adj_orig'],
                                          validate_indices=False), [-1]),
                                  pos_weight=pos_weight,
                                  norm=norm)
            elif model_str == 'gcn_vae':
                opt = OptimizerVAE(preds=self.model.reconstructions,
                                   labels=tf.reshape(
                                       tf.sparse_tensor_to_dense(
                                           self.placeholders['adj_orig'],
                                           validate_indices=False), [-1]),
                                   model=self.model,
                                   num_nodes=num_nodes,
                                   pos_weight=pos_weight,
                                   norm=norm)

        # Initialize session
        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())

        val_roc_score = []
        adj_label = adj_train + sp.eye(adj_train.shape[0])
        adj_label = sparse_to_tuple(adj_label)

        #import datetime
        #log_dir="logs/gae/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

        # Train model
        for epoch in range(self.epochs):  #FLAGS.epochs):
            t = time.time()
            # Construct feed dictionary
            self.feed_dict = construct_feed_dict(adj_norm, adj_label, features,
                                                 self.placeholders)
            self.feed_dict.update(
                {self.placeholders['dropout']:
                 self.dropout_rate})  # FLAGS.dropout})
            # Run single weight update
            outs = self.sess.run([opt.opt_op, opt.cost, opt.accuracy],
                                 feed_dict=self.feed_dict)

            # Compute average loss
            avg_cost = outs[1]
            avg_accuracy = outs[2]

            roc_curr, ap_curr = self.get_roc_score(val_edges, val_edges_false)
            val_roc_score.append(roc_curr)

            #    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

            print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
                  "{:.5f}".format(avg_cost), "train_acc=",
                  "{:.5f}".format(avg_accuracy), "val_roc=",
                  "{:.5f}".format(val_roc_score[-1]), "val_ap=",
                  "{:.5f}".format(ap_curr), "time=",
                  "{:.5f}".format(time.time() - t))

        print("Optimization Finished!")

        roc_score, ap_score = self.get_roc_score(test_edges, test_edges_false)
        print('Test ROC score: ' + str(roc_score))
        print('Test AP score: ' + str(ap_score))

        [supernodes, supernodes_embeddings,
         supernodes_labels] = self.get_embeddings(y_train, label_encoder)
        self.supernodes = [
            supernodes, supernodes_embeddings, supernodes_labels
        ]
Пример #7
0
def main(args):
    """ Compute embeddings using GAE/VGAE. """

    # Load edgelist
    oneIndx = False
    E = np.loadtxt(args.inputgraph, delimiter=args.delimiter, dtype=int)
    if np.min(E) == 1:
        oneIndx = True
        E -= 1

    # Create an unweighted graph
    G = nx.Graph()
    G.add_edges_from(E[:, :2])

    # Get adj matrix of the graph
    tr_A = nx.adjacency_matrix(G, weight=None)
    num_nodes = tr_A.shape[0]

    # Set main diag to 1s and normalize (algorithm requirement)
    adj_norm = preprocess_graph(tr_A)

    # Define placeholders
    placeholders = {
        'features': tf.sparse_placeholder(tf.float32),
        'adj': tf.sparse_placeholder(tf.float32),
        'adj_orig': tf.sparse_placeholder(tf.float32),
        'dropout': tf.placeholder_with_default(0., shape=())
    }

    # Create empty feature matrix
    features = sp.identity(num_nodes)  # featureless
    features = sparse_to_tuple(features.tocoo())
    num_features = features[2][1]
    features_nonzero = features[1].shape[0]

    # Create model
    model = None
    if args.model == 'gcn_ae':
        model = GCNModelAE(placeholders, num_features, features_nonzero)
    elif args.model == 'gcn_vae':
        model = GCNModelVAE(placeholders, num_features, num_nodes,
                            features_nonzero)

    pos_weight = float(tr_A.shape[0] * tr_A.shape[0] - tr_A.sum()) / tr_A.sum()
    norm = tr_A.shape[0] * tr_A.shape[0] / float(
        (tr_A.shape[0] * tr_A.shape[0] - tr_A.sum()) * 2)

    # Optimizer
    with tf.name_scope('optimizer'):
        if args.model == 'gcn_ae':
            opt = OptimizerAE(preds=model.reconstructions,
                              labels=tf.reshape(
                                  tf.sparse_tensor_to_dense(
                                      placeholders['adj_orig'],
                                      validate_indices=False), [-1]),
                              pos_weight=pos_weight,
                              norm=norm)
        elif args.model == 'gcn_vae':
            opt = OptimizerVAE(preds=model.reconstructions,
                               labels=tf.reshape(
                                   tf.sparse_tensor_to_dense(
                                       placeholders['adj_orig'],
                                       validate_indices=False), [-1]),
                               model=model,
                               num_nodes=num_nodes,
                               pos_weight=pos_weight,
                               norm=norm)

    # Initialize session
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    adj_label = tr_A + sp.eye(tr_A.shape[0])
    adj_label = sparse_to_tuple(adj_label)

    # Train model
    for epoch in range(FLAGS.epochs):
        # Construct feed dictionary
        feed_dict = construct_feed_dict(adj_norm, adj_label, features,
                                        placeholders)
        feed_dict.update({placeholders['dropout']: FLAGS.dropout})
        # Run single weight update
        outs = sess.run([opt.opt_op, opt.cost, opt.accuracy],
                        feed_dict=feed_dict)
        print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
              "{:.5f}".format(outs[1]), "train_acc=", "{:.5f}".format(outs[2]))

    # Compute predictions
    feed_dict.update({placeholders['dropout']: 0})
    emb = sess.run(model.z_mean, feed_dict=feed_dict)

    def sigmoid(x):
        return 1 / (1 + np.exp(-x))

    # Node similarities
    adj_rec = np.dot(emb, emb.T)

    start = time.time()
    # Read the train edges and compute similarity
    if args.tr_e is not None:
        train_edges = np.loadtxt(args.tr_e,
                                 delimiter=args.delimiter,
                                 dtype=int)
        if oneIndx:
            train_edges -= 1
        scores = list()
        for src, dst in train_edges:
            scores.append(sigmoid(adj_rec[src, dst]))
        np.savetxt(args.tr_pred, scores, delimiter=args.delimiter)

        # Read the test edges and run predictions
        if args.te_e is not None:
            test_edges = np.loadtxt(args.te_e,
                                    delimiter=args.delimiter,
                                    dtype=int)
            if oneIndx:
                test_edges -= 1
            scores = list()
            for src, dst in test_edges:
                scores.append(sigmoid(adj_rec[src, dst]))
            np.savetxt(args.te_pred, scores, delimiter=args.delimiter)

    # If no edge lists provided to predict links, then just store the embeddings
    else:
        np.savetxt(args.output, emb, delimiter=args.delimiter)

    print('Prediction time: {}'.format(time.time() - start))
Пример #8
0
    def fit(self, adj, features, labels):
        adj_orig = adj
        adj_orig = adj_orig - sp.dia_matrix(
            (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
        adj_orig.eliminate_zeros()
        adj_train = gen_train_edges(adj)

        adj = adj_train

        # Some preprocessing
        adj_norm = preprocess_graph(adj)
        num_nodes = adj.shape[0]
        input_feature_dim = features.shape[1]
        features = normalize_vectors(features)

        # Define placeholders
        self.placeholders = {
            'features':
            tf.compat.v1.placeholder(tf.float32,
                                     shape=(None, input_feature_dim)),
            # 'features': tf.compat.v1.sparse_placeholder(tf.float32),
            'adj':
            tf.compat.v1.sparse_placeholder(tf.float32),
            'adj_orig':
            tf.compat.v1.sparse_placeholder(tf.float32),
            'dropout':
            tf.compat.v1.placeholder_with_default(0., shape=())
        }

        if self.model_type == 'gcn_ae':
            self.model = GCNModelAE(self.placeholders, input_feature_dim)
        elif self.model_type == 'gcn_vae':
            self.model = GCNModelVAE(self.placeholders, input_feature_dim,
                                     num_nodes)
        pos_weight = float(adj.shape[0] * adj.shape[0] -
                           adj.sum()) / adj.sum()  # negative edges/pos edges
        # print('positive edge weight', pos_weight)
        norm = adj.shape[0] * adj.shape[0] / float(
            (adj.shape[0] * adj.shape[0] - adj.nnz) * 2)

        # Optimizer
        with tf.compat.v1.name_scope('optimizer'):
            if self.model_type == 'gcn_ae':
                opt = OptimizerAE(preds=self.model.reconstructions,
                                  labels=tf.reshape(
                                      tf.sparse.to_dense(
                                          self.placeholders['adj_orig'],
                                          validate_indices=False), [-1]),
                                  pos_weight=pos_weight,
                                  norm=norm)
            elif self.model_type == 'gcn_vae':
                opt = OptimizerVAE(preds=self.model.reconstructions,
                                   labels=tf.reshape(
                                       tf.sparse.to_dense(
                                           self.placeholders['adj_orig'],
                                           validate_indices=False), [-1]),
                                   model=self.model,
                                   num_nodes=num_nodes,
                                   pos_weight=pos_weight,
                                   norm=norm)

        # Initialize session
        self.sess = tf.compat.v1.Session()
        self.sess.run(tf.compat.v1.global_variables_initializer())

        adj_label = adj_train + sp.eye(adj_train.shape[0])
        adj_label = sparse_to_tuple(adj_label)

        # Train model
        for epoch in range(FLAGS.epochs):

            t = time.time()
            # Construct feed dictionary
            self.feed_dict = construct_feed_dict(adj_norm, adj_label, features,
                                                 self.placeholders)
            self.feed_dict.update(
                {self.placeholders['dropout']: FLAGS.dropout})
            # Run single weight update
            outs = self.sess.run([opt.opt_op, opt.cost, opt.accuracy],
                                 feed_dict=self.feed_dict)

            # Compute average loss
            avg_cost = outs[1]
            avg_accuracy = outs[2]
Пример #9
0
num_features = features[2][1]
features_nonzero = features[1].shape[0]

# Create model
# model = None
GCN1 = GCNModelAE(placeholders, num_features, features_nonzero)
GCN2 = GCNModelAE(placeholders, num_features, features_nonzero)

pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
norm = adj.shape[0] * adj.shape[0] / float(
    (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

# Optimizer
with tf.name_scope('optimizer'):
    opt1 = OptimizerAE(preds=GCN1.reconstructions,
                       labels=placeholders['adj_gnd1'],
                       pos_weight=pos_weight,
                       norm=norm)
    opt2 = OptimizerAE(preds=GCN2.reconstructions,
                       labels=placeholders['adj_gnd2'],
                       pos_weight=pos_weight,
                       norm=norm)

# Initialize session
sess = tf.Session()
sess.run(tf.global_variables_initializer())

cost_val = []
acc_val = []


def get_roc_score(edges_pos, edges_neg, emb=None):