def format_data(data_source): adj, features, labels = load_data(data_source) # Store original adjacency matrix (without diagonal entries) for later # adj_orig = adj # adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) # adj_orig.eliminate_zeros() # adj = adj_orig if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless # Some preprocessing adj_norm = preprocess_graph(adj) num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] adj_label = adj + sp.eye(adj.shape[0]) adj_label = sparse_to_tuple(adj_label) items = [ adj, num_features, num_nodes, features_nonzero, adj_norm, adj_label, features, labels ] feas = {} for item in items: # item_name = [ k for k,v in locals().iteritems() if v == item][0]] item_name = retrieve_name(item) feas[item_name] = item return feas
def format_data(data_source): adj, features, labels = load_data2(data_source) if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless adj_norm = preprocess_graph(adj) num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] adj_label = adj + sp.eye(adj.shape[0]) adj_label = sparse_to_tuple(adj_label) items = [ adj, num_features, num_nodes, features_nonzero, adj_norm, adj_label, features, labels ] feas = {} for item in items: # item_name = [ k for k,v in locals().iteritems() if v == item][0]] item_name = retrieve_name(item) feas[item_name] = item return feas
def format_data_new(adj, features): # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() # Some preprocessing adj_norm = preprocess_graph(adj) num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) adj_label = adj + sp.eye(adj.shape[0]) adj_label = sparse_to_tuple(adj_label) values = [ adj, num_features, num_nodes, features_nonzero, pos_weight, norm, adj_norm, adj_label, features, adj_orig ] keys = [ 'adj', 'num_features', 'num_nodes', 'features_nonzero', 'pos_weight', 'norm', 'adj_norm', 'adj_label', 'features', 'adj_orig' ] feas = {} feas = dict(zip(keys, values)) return feas
def format_data(data_name): # Load data adj, features, y_test, tx, ty, test_maks, true_labels = load_data( data_name) # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj #删除对角线元素 adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj) adj = adj_train adj_dense = adj.toarray() if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless # Some preprocessing adj_norm = preprocess_graph(adj) num_nodes = adj.shape[0] features_dense = features.tocoo().toarray() features = sparse_to_tuple(features.tocoo()) #num_features是feature的维度 num_features = features[2][1] #features_nonzero就是非零feature的个数 features_nonzero = features[1].shape[0] pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) items = [ adj, num_features, num_nodes, features_nonzero, pos_weight, norm, adj_norm, adj_label, features, true_labels, train_edges, val_edges, val_edges_false, test_edges, test_edges_false, adj_orig, features_dense, adj_dense, features_dense ] feas = {} print('num_features is:', num_features) print('num_nodes is:', num_nodes) print('features_nonzero is:', features_nonzero) print('pos_weight is:', pos_weight) print('norm is:', norm) for item in items: #item_name = [ k for k,v in locals().iteritems() if v == item][0] feas[retrieve_name(item)] = item return feas
def format_data(data_name): # Load data adj, features, true_labels = load_data(data_name) # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj) adj = adj_train if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless # Some preprocessing adj_norm = preprocess_graph(adj) num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) adj_label = adj_train + 2 * sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) feas = {} feas['adj'] = adj feas['num_features'] = num_features feas['num_nodes'] = num_nodes feas['features_nonzero'] = features_nonzero feas['pos_weight'] = pos_weight feas['norm'] = norm feas['adj_norm'] = adj_norm feas['adj_label'] = adj_label feas['features'] = features feas['true_labels'] = true_labels feas['train_edges'] = train_edges feas['val_edges'] = val_edges feas['val_edges_false'] = val_edges_false feas['test_edges'] = test_edges feas['test_edges_false'] = test_edges_false feas['adj_orig'] = adj_orig return feas
def mask_edges_prd(adjs_list): pos_edges_l, false_edges_l = [], [] edges_list = [] for i in range(0, len(adjs_list)): # Function to build test set with 10% positive links # NOTE: Splits are randomized and results might slightly deviate from reported numbers in the paper. adj = adjs_list[i] # Remove diagonal elements adj = adj - sp.dia_matrix( (adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape) adj.eliminate_zeros() # Check that diag is zero: assert np.diag(adj.todense()).sum() == 0 adj_triu = sp.triu(adj) adj_tuple = sparse_to_tuple(adj_triu) edges = adj_tuple[0] edges_all = sparse_to_tuple(adj)[0] num_false = int(edges.shape[0]) pos_edges_l.append(edges) def ismember(a, b, tol=5): rows_close = np.all(np.round(a - b[:, None], tol) == 0, axis=-1) return np.any(rows_close) edges_false = [] while len(edges_false) < num_false: idx_i = np.random.randint(0, adj.shape[0]) idx_j = np.random.randint(0, adj.shape[0]) if idx_i == idx_j: continue if ismember([idx_i, idx_j], edges_all): continue if edges_false: if ismember([idx_j, idx_i], np.array(edges_false)): continue if ismember([idx_i, idx_j], np.array(edges_false)): continue edges_false.append([idx_i, idx_j]) assert ~ismember(edges_false, edges_all) false_edges_l.append(edges_false) # NOTE: these edge lists only contain single direction of edge! return pos_edges_l, false_edges_l
def test_one_graph(adj, adj_orig, features_csr, num_node, k_num, model, placeholders, sess, feed_dict): adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) # delete self loop adj_orig.eliminate_zeros() adj_new = adj features = sparse_to_tuple(features_csr.tocoo()) adj_label = adj_new + sp.eye(adj.shape[0]) adj_label = sparse_to_tuple(adj_label) adj_clean = adj_orig.tocsr() k_num = int(k_num * size / noise_ratio) # match the budget size if k_num != 0: adj_norm, adj_norm_sparse = preprocess_graph(adj_new) feed_dict.update({placeholders["adj"]: adj_norm}) feed_dict.update({placeholders["adj_orig"]: adj_label}) feed_dict.update({placeholders["features"]: features}) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) model.k = k_num x_tilde = sess.run(model.realD_tilde, feed_dict=feed_dict, options=run_options) noised_indexes, clean_indexes = get_noised_indexes( x_tilde, adj_new, num_node) feed_dict.update({placeholders["noised_mask"]: noised_indexes}) feed_dict.update({placeholders["clean_mask"]: clean_indexes}) feed_dict.update({placeholders["noised_num"]: len(noised_indexes) / 2}) test1 = model.test_new_indexes.eval(session=sess, feed_dict=feed_dict) test0 = model.test_noised_index.eval(session=sess, feed_dict=feed_dict) new_adj = get_new_adj(feed_dict, sess, model, noised_indexes, adj_new, k_num, num_node) else: # new_adj = adj new_adj = adj.copy() new_adj_sparse = sp.csr_matrix(new_adj) psnr = PSNR(adj_clean[:num_node, :num_node], new_adj_sparse[:num_node, :num_node]) wls = WL_no_label(adj_clean[:num_node, :num_node], new_adj_sparse[:num_node, :num_node]) return psnr, wls
def test(saver, adj, features, meta_dir, checkpoints_dir): adj_norm, adj_norm_sparse = preprocess_graph(adj) placeholders = { 'features': tf.sparse_placeholder(tf.float32), 'adj': tf.sparse_placeholder(tf.float32), 'adj_orig': tf.sparse_placeholder(tf.float32), 'dropout': tf.placeholder_with_default(0., shape=()) } num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # Create model saver = tf.train.Saver(max_to_keep=10) model = None if model_str == "gae_gan": model = gaegan(placeholders, num_features, num_nodes, features_nonzero) pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) global_steps = tf.get_variable(0, name="globals") opt = 0 # Optimizer with tf.name_scope('optimizer'): if model_str == 'gae_gan': opt = Optimizergaegan(preds=model.x_tilde, labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'], validate_indices=False), [-1]), model=model, num_nodes=num_nodes, pos_weight=pos_weight, norm=norm, global_step=global_steps) # session part sess = tf.Session() sess.run(tf.global_variables_initializer()) cost_val = [] acc_val = [] # load network with tf.Session() as sess: saver = tf.train.import_meta_graph(meta_dir) saver.restore(sess, tf.train.latest_checkpoint(checkpoints_dir)) sess.run() new_adj = get_new_adj(feed_dict) return new_adj
def format_data(data_source): adj, features, labels = load_data2(data_source) #print(adj.shape,'1111111111') #print(features.shape,'2222222') #print(features,'XXXXXSSSSSSSSSSSSS') #print(labels.shape,'33333333333') if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless adj_norm = preprocess_graph(adj) #print(adj_norm,'0000000000000') num_nodes = adj.shape[0] #print(num_nodes,'444444444') features = sparse_to_tuple(features.tocoo()) #print(features,'NNNNNNNNNNNNNNNNN') #print(features[0].shape,'66666666666') #print(features[1].shape, '66666666666@@@') #print(features[2], '66666666666###') num_features = features[2][1] #print(num_features,'7777777777777') features_nonzero = features[1].shape[0] #print(features_nonzero,'8888888888888') adj_label = adj + sp.eye(adj.shape[0]) adj_label = sparse_to_tuple(adj_label) #print(adj_label,'AAAAAAAAAAAAAAAAAAAA') items = [ adj, num_features, num_nodes, features_nonzero, adj_norm, adj_label, features, labels ] feas = {} for item in items: # item_name = [ k for k,v in locals().iteritems() if v == item][0]] item_name = retrieve_name(item) feas[item_name] = item return feas
def get_data(all_coor_dict, feat_shape, adj_all_dict): ''' ---------- Parameters ---------- all_coor_dict : dict {"1.jpg":[coordinates], "2.jpg":[coordinates], ...} feat_shape : tuple feat_shape = (args.n_point, args.num_node_features). adj_all_dict : dict {"1.jpg":[reorder_adj(csr_matrix)], "2.jpg":[coordinates], ...} ---------- Returns ---------- data_list : list [Data(adj=[10, 10], adj_label=[10, 10], edge_index=[2, 21], img_name=00001241.jpg, norm=0.6329113924050633, weight_tensor=[100], x=[10, 2], y=[1]), Data(adj=[10, 10], ...) ...] data_images : dict {"1.jpg": Data(adj=[10, 10], adj_label=[10, 10], edge_index=[2, 21], img_name=00001241.jpg, norm=0.6329113924050633, weight_tensor=[100], x=[10, 2], y=[1]), "2.jpg": ..., ...] ''' data_list = [] data_images = {} for key, value in all_coor_dict.items(): features = rd.get_features(feat_shape, key, all_coor_dict) features = sparse_to_tuple(features.tocoo()) features = torch.sparse.FloatTensor(torch.LongTensor(features[0].T), torch.FloatTensor(features[1]), torch.Size(features[2])).to(dev) edge_index = torch.tensor(rd.adj2connection(adj_all_dict[key]), dtype=torch.long) adj = adj_all_dict[key] adj_norm, adj_label, norm, weight_tensor = rd.data_process(adj) data = Data(x=features, edge_index=edge_index.t().contiguous(), norm=norm, y=label, adj=adj_norm, img_name=key, weight_tensor=weight_tensor, adj_label=adj_label) data_list.append(data) data_images[key] = data return data_list, data_images
def format_data(data_source): # adj = load_adj('../data/facebook/0') # features = load_attr('../data/facebook/0') # labels = np.ones(adj.shape[0]) # adj, features, labels = load_data2(data_source) adj, features, labels = load_data('twitter') # print(adj) print(type(adj), type(features)) print(adj.shape, features.shape) features = normalize(features, norm='l1', axis=1) print(features[:5]) if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless adj_norm = preprocess_graph(adj) num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] adj_label = adj + sp.eye(adj.shape[0]) adj_label = sparse_to_tuple(adj_label) items = [ adj, num_features, num_nodes, features_nonzero, adj_norm, adj_label, features, labels ] feas = {} for item in items: # item_name = [ k for k,v in locals().iteritems() if v == item][0]] item_name = retrieve_name(item) feas[item_name] = item return feas
def preprocess_graph(adj): ''' normalize adj ''' adj = sp.coo_matrix(adj) #确保adj为csr_matrix #在计算新特征时没有考虑自己的特征,这肯定是个重大缺陷,so在adj上给对角线元素全部加1 adj_ = adj + sp.eye(adj.shape[0]) #在adj上给对角线元素全部加1 rowsum = np.array(adj_.sum(1)) #按照行来计算和 #np.power(rowsum, -0.5).flatten():将rowsum的元素进行开方后,拉平一行 #sp.diags https://www.cnblogs.com/SupremeBoy/p/12952735.html # degree_mat_inv_sqrt的结果是把()算好的值填写到10×10的主对角线上去。其余位置补0 degree_mat_inv_sqrt = sp.diags(np.power(rowsum, -0.5).flatten()) #.tocoo():Convert this matrix to COOrdinate format #normalize adj: 采用加法规则进行聚合,对于度大的节点特征越来越大,而对于度小的节点却相反,这可能导致网络训练过程中梯度爆炸或者消失的问题。 adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot( degree_mat_inv_sqrt).tocoo() return sparse_to_tuple(adj_normalized)
def preprocess_graph(self, adj): adj = sp.coo_matrix(adj) if adj.shape[0] == adj.shape[1]: adj_ = adj + sp.eye(adj.shape[0]) rowsum = np.array(adj_.sum(1)) degree_mat_inv_sqrt = sp.diags(np.power(rowsum, -0.5).flatten()) adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot( degree_mat_inv_sqrt).tocoo() else: rowsum = np.array(adj.sum(1)) + 0.0001 colsum = np.array(adj.sum(0)) + 0.0001 rowdegree_mat_inv = sp.diags( np.nan_to_num(np.power(rowsum, -0.5)).flatten()) coldegree_mat_inv = sp.diags( np.nan_to_num(np.power(colsum, -0.5)).flatten()) adj_normalized = rowdegree_mat_inv.dot(adj).dot( coldegree_mat_inv).tocoo() return preprocessing.sparse_to_tuple(adj_normalized)
def load_model(placeholders, model, opt, adj_train, test_edges, test_edges_false, features, sess, name="single_fold"): adj = adj_train # This will be calculated for every fold # pos_weight and norm should be tensors print ('----------------') pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() # N/P norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2) # (N+P) x (N+P) / (N) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) # Some preprocessing. adj_norm is D^(-1/2) x adj x D^(-1/2) adj_norm = preprocess_graph(adj) # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) feed_dict.update({placeholders['is_training']: True}) feed_dict.update({placeholders['norm']: norm}) feed_dict.update({placeholders['pos_weight']: pos_weight}) # Some preprocessing. adj_norm is D^(-1/2) x adj x D^(-1/2) adj_norm = preprocess_graph(adj) saver = tf.train.Saver() saver.restore(sess=sess, save_path=(save_dir+name)) print ('Model restored') # Decrease MC samples for pubmed if (dataset_str == 'pubmed'): S = 5 else: S = 15 adj_score, z_activated = get_score_matrix(sess, placeholders, feed_dict, model, S=S, save_qual=True) return adj_score, z_activated
def data_process(adj): adj_norm = preprocess_graph(adj) adj_train = adj # Create Model pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) adj_norm = torch.sparse.FloatTensor(torch.LongTensor(adj_norm[0].T), torch.FloatTensor(adj_norm[1]), torch.Size(adj_norm[2])).to(dev) adj_label = torch.sparse.FloatTensor(torch.LongTensor(adj_label[0].T), torch.FloatTensor(adj_label[1]), torch.Size(adj_label[2])).to(dev) weight_mask = adj_label.to_dense().view(-1) == 1 weight_tensor = torch.ones(weight_mask.size(0)).to(dev) weight_tensor[weight_mask] = pos_weight return adj_norm, adj_label, norm, weight_tensor
def train(): ## add noise label train_adj_list, train_adj_orig_list, train_k_list = add_noises_on_adjs( train_structure_input, train_num_nodes_all) test_adj_list, test_adj_orig_list, test_k_list = add_noises_on_adjs( test_structure_input, test_num_nodes_all) adj = train_adj_list[0] features_csr = train_feature_input[0] features = sparse_to_tuple(features_csr.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] adj_orig = train_adj_orig_list[0] adj_label = train_adj_list[0] + sp.eye(adj.shape[0]) adj_label = sparse_to_tuple(adj_label) num_nodes = adj.shape[0] adj_norm, adj_norm_sparse = preprocess_graph(adj) ############ global_steps = tf.get_variable('global_step', trainable=False, initializer=0) new_learning_rate_dis = tf.train.exponential_decay( FLAGS.learn_rate_init, global_step=global_steps, decay_steps=100, decay_rate=0.95) new_learning_rate_gen = tf.train.exponential_decay( FLAGS.learn_rate_init_gen, global_step=global_steps, decay_steps=100, decay_rate=0.95) new_learn_rate_value = FLAGS.learn_rate_init # set the placeholders placeholders = { 'features': tf.sparse_placeholder(tf.float32, name="ph_features"), 'adj': tf.sparse_placeholder(tf.float32, name="ph_adj"), 'adj_orig': tf.sparse_placeholder(tf.float32, name="ph_orig"), 'dropout': tf.placeholder_with_default(0.3, shape=(), name="ph_dropout"), 'clean_mask': tf.placeholder(tf.int32), 'noised_mask': tf.placeholder(tf.int32), 'noised_num': tf.placeholder(tf.int32), 'node_mask': tf.placeholder(tf.float32) } # build models model = None adj_clean = adj_orig.tocoo() adj_clean_tensor = tf.SparseTensor(indices=np.stack( [adj_clean.row, adj_clean.col], axis=-1), values=adj_clean.data, dense_shape=adj_clean.shape) if model_str == "mask_gvae": model = mask_gvae(placeholders, num_features, num_nodes, features_nonzero, new_learning_rate_dis, new_learning_rate_gen, adj_clean=adj_clean_tensor, k=int(adj.sum() * noise_ratio)) model.build_model() pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) opt = 0 # Optimizer with tf.name_scope('optimizer'): if model_str == 'mask_gvae': opt = Optimizer(preds=tf.reshape(model.x_tilde, [-1]), labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'], validate_indices=False), [-1]), model=model, num_nodes=num_nodes, global_step=global_steps, new_learning_rate=new_learning_rate_dis, new_learning_rate_gen=new_learning_rate_gen, placeholders=placeholders) # init the session sess = tf.Session() # sess.run(tf.global_variables_initializer()) # initial test # initial clean and noised_mask clean_mask = np.array([1, 2, 3, 4, 5]) noised_mask = np.array([6, 7, 8, 9, 10]) noised_num = noised_mask.shape[0] / 2 # ################################## feed_dict = construct_feed_dict(adj_norm, adj_label, features, clean_mask, noised_mask, noised_num, placeholders) node_mask = np.ones([num_nodes, n_class]) node_mask[train_num_nodes_all[0]:, :] = 0 feed_dict.update({placeholders['node_mask']: node_mask}) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # ################################## if if_train: sess.run(tf.global_variables_initializer()) # initial test for epoch in range(FLAGS.epochs): for i in tqdm(range(len(train_feature_input))): train_one_graph(train_adj_list[i], train_adj_orig_list[i], train_feature_input[i], train_num_nodes_all[i], train_k_list[i], model, opt, placeholders, sess, new_learning_rate_gen, feed_dict, epoch, i) saver = tf.train.Saver() # define saver in the loop saver.save(sess, "./checkpoints/{}.ckpt".format(dataset_str)) print("Optimization Finished!") psnr_list = [] wls_list = [] for i in range(len(test_feature_input)): psnr, wls = test_one_graph(test_adj_list[i], test_adj_orig_list[i], test_feature_input[i], test_num_nodes_all[i], test_k_list[i], model, placeholders, sess, feed_dict) psnr_list.append(psnr) wls_list.append(wls) print(psnr_list) else: saver = tf.train.Saver() # define saver in the loop saver.restore(sess, "./checkpoints/{}.ckpt".format(dataset_str)) psnr_list = [] wls_list = [] for i in range(len(test_feature_input)): psnr, wls = test_one_graph(test_adj_list[i], test_adj_orig_list[i], test_feature_input[i], test_num_nodes_all[i], test_k_list[i], model, placeholders, sess, feed_dict) psnr_list.append(psnr) wls_list.append(wls) print(psnr_list) ################################## ################## the PSRN and WL ######################### print("#" * 15) print("The PSNR is:") print(np.mean(psnr_list)) print("The WL is :") print(np.mean(wls_list)) return np.mean(psnr_list), np.mean(wls_list)
import os EGO_USER = 100466178325794757407 # which ego network to look at # Load pickled (adj, feat) tuple network_dir = './g-processed/{0}-adj-feat.pkl'.format(EGO_USER) with open(network_dir, 'rb') as f: adj, features = pickle.load(f, encoding='iso-8859-1') g = nx.Graph(adj) nx.draw_networkx(g, with_labels=False, node_size=50, node_color='r') plt.show() # Train on CPU (hide GPU) due to memory constraints os.environ['CUDA_VISIBLE_DEVICES'] = "" x = sp.lil_matrix(features) features_tuple = sparse_to_tuple(x) features_shape = features_tuple[2] # Get graph attributes (to feed into model) num_nodes = adj.shape[0] # number of nodes in adjacency matrix num_features = features_shape[ 1] # number of features (columsn of features matrix) features_nonzero = features_tuple[1].shape[ 0] # number of non-zero entries in features matrix (or length of values list) # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() np.random.seed(0) # IMPORTANT: guarantees consistent train/test splits adj_train, train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj, test_frac=.3, val_frac=.1)
adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj, test_percent=10., val_percent=5.) adj = adj_train # This is the adj matrix that masked out all validation and testing entries. #print(adj_train.shape) #import pdb;pdb.set_trace() if FLAGS.features == 0: features = sp.identity( features.shape[0]) # featureless. sparse coo_matrix. # Some preprocessing #adj_norm = preprocess_graph(adj) attn_adj_norm = adj + sp.eye(adj.shape[0]) attn_adj_norm = sparse_to_tuple(attn_adj_norm) # a tuple adj_norm = preprocess_graph( adj) # a tuple. Normalization. Identical matrix is added here. #print(type(adj + sp.eye(adj.shape[0]))) #import pdb;pdb.set_trace() # Define placeholders placeholders = { # this is passed directly to the model to build the graph. 'features': tf.sparse_placeholder(tf.float32), 'adj': tf.sparse_placeholder(tf.float32), 'adj_orig': tf.sparse_placeholder(tf.float32), 'in_drop': tf.placeholder_with_default(0., shape=()), 'attn_drop': tf.placeholder_with_default(0., shape=()), 'feat_drop': tf.placeholder_with_default(0., shape=())
def run(seed, gamma, beta, hidden, lr, NB_EPOCH=300): """ Main function. Run the architecture for the initialization defined by seed and by the hyperparameters gamma, beta, hidden, lr Inputs: seed : seed to defined the initialization of the training/testing/validation split, gamma, beta, hidden, lr: hyperparameters of the architecture NB_EPOCH: number of runs to do of the same architecture with different weight initializations. Default: 1000 Outputs: auc_test, auc_train, auc_val: AUC on the test, train and validation sets """ tf.reset_default_graph() training_set_mask, testing_set_mask, idx_training, idx_testing = preprocessing_dataset.split_train_test( 0.8, M_str, seed, labels) #create a training and test mask on the data Otraining = preprocessing_dataset.load_mask(training_set_mask, M_str, nrRows, nrCols) Otest = preprocessing_dataset.load_mask(testing_set_mask, M_str, nrRows, nrCols) new_labels_train = np.copy(labels) new_labels_train[idx_testing] = -1 #split train set into 4 parts to create a validation set training_set_mask, validation_set_mask, idx_training, idx_validation = preprocessing_dataset.split_train_validation_4( 3, M_str, seed, new_labels_train) Otraining = preprocessing_dataset.load_mask(training_set_mask, M_str, nrRows, nrCols) Ovalidation = preprocessing_dataset.load_mask(validation_set_mask, M_str, nrRows, nrCols) Otraining = np.concatenate((Otraining, training_set_mask), axis=1) Ocol = np.zeros((Otest.shape[0], 1)) Otest_support = np.concatenate((Otest, Ocol), axis=1) Ovalidation_support = np.concatenate((Ovalidation, Ocol), axis=1) Osupport_t = Otraining + Otest_support + Ovalidation_support Ovalidation = np.concatenate((Ovalidation, validation_set_mask), axis=1) Otest = np.concatenate((Otest, testing_set_mask), axis=1) u_features, v_features, train_labels, train_u_indices, train_v_indices, val_labels, val_u_indices, val_v_indices, test_labels, test_u_indices, test_v_indices = load_data_monti_tadpole( M, Otraining, Otest, Ovalidation) m, n = M.shape # global normalization support = [] support_t = [] path_support_women = "women_synth_noteasy.csv" women_support, _, _ = read_tadpole.load_csv_no_header(path_support_women) women_support = preprocessing_dataset.str_to_float(women_support) women_support = women_support * M_sup women_support = sp.csr_matrix(women_support, dtype=np.float32) support.append(women_support) support_t.append(women_support.T) path_support_men = "men_synth_noteasy.csv" men_support, _, _ = read_tadpole.load_csv_no_header(path_support_men) men_support = preprocessing_dataset.str_to_float(men_support) men_support = men_support * M_sup men_support = sp.csr_matrix(men_support, dtype=np.float32) support.append(men_support) support_t.append(men_support.T) path_support_women_84 = "age_84_92_women_synth_noteasy.csv" women_84_support, _, _ = read_tadpole.load_csv_no_header( path_support_women_84) women_84_support = preprocessing_dataset.str_to_float(women_84_support) women_84_support = women_84_support * M_sup women_84_support = sp.csr_matrix(women_84_support, dtype=np.float32) support.append(women_84_support) support_t.append(women_84_support.T) path_support_men_84 = "age_84_92_men_synth_noteasy.csv" men_84_support, _, _ = read_tadpole.load_csv_no_header(path_support_men_84) men_84_support = preprocessing_dataset.str_to_float(men_84_support) men_84_support = men_84_support * M_sup men_84_support = sp.csr_matrix(men_84_support, dtype=np.float32) support.append(men_84_support) support_t.append(men_84_support.T) path_support_84 = "age_84_92_synth_noteasy.csv" age84_support, _, _ = read_tadpole.load_csv_no_header(path_support_84) age84_support = preprocessing_dataset.str_to_float(age84_support) age84_support = age84_support * M_sup age84_support = sp.csr_matrix(age84_support, dtype=np.float32) support.append(age84_support) support_t.append(age84_support.T) path_support_women_79 = "age_79_84_women_synth_noteasy.csv" women_79_support, _, _ = read_tadpole.load_csv_no_header( path_support_women_79) women_79_support = preprocessing_dataset.str_to_float(women_79_support) women_79_support = women_79_support * M_sup women_79_support = sp.csr_matrix(women_79_support, dtype=np.float32) support.append(women_79_support) support_t.append(women_79_support.T) path_support_men_79 = "age_79_84_men_synth_noteasy.csv" men_79_support, _, _ = read_tadpole.load_csv_no_header(path_support_men_79) men_79_support = preprocessing_dataset.str_to_float(men_79_support) men_79_support = men_79_support * M_sup men_79_support = sp.csr_matrix(men_79_support, dtype=np.float32) support.append(men_79_support) support_t.append(men_79_support.T) path_support_79 = "age_79_84_synth_noteasy.csv" age79_support, _, _ = read_tadpole.load_csv_no_header(path_support_79) age79_support = preprocessing_dataset.str_to_float(age79_support) age79_support = age79_support * M_sup age79_support = sp.csr_matrix(age79_support, dtype=np.float32) support.append(age79_support) support_t.append(age79_support.T) path_support_women_74 = "age_74_79_women_synth_noteasy.csv" women_74_support, _, _ = read_tadpole.load_csv_no_header( path_support_women_74) women_74_support = preprocessing_dataset.str_to_float(women_74_support) women_74_support = women_74_support * M_sup women_74_support = sp.csr_matrix(women_74_support, dtype=np.float32) support.append(women_74_support) support_t.append(women_74_support.T) path_support_men_74 = "age_74_79_men_synth_noteasy.csv" men_74_support, _, _ = read_tadpole.load_csv_no_header(path_support_men_74) men_74_support = preprocessing_dataset.str_to_float(men_74_support) men_74_support = men_74_support * M_sup men_74_support = sp.csr_matrix(men_74_support, dtype=np.float32) support.append(men_74_support) support_t.append(men_74_support.T) path_support_74 = "age_74_79_synth_noteasy.csv" age74_support, _, _ = read_tadpole.load_csv_no_header(path_support_74) age74_support = preprocessing_dataset.str_to_float(age74_support) age74_support = age74_support * M_sup age74_support = sp.csr_matrix(age74_support, dtype=np.float32) support.append(age74_support) support_t.append(age74_support.T) path_support_women_69 = "age_69_74_women_synth_noteasy.csv" women_69_support, _, _ = read_tadpole.load_csv_no_header( path_support_women_69) women_69_support = preprocessing_dataset.str_to_float(women_69_support) women_69_support = women_69_support * M_sup women_69_support = sp.csr_matrix(women_69_support, dtype=np.float32) support.append(women_69_support) support_t.append(women_69_support.T) path_support_men_69 = "age_69_74_men_synth_noteasy.csv" men_69_support, _, _ = read_tadpole.load_csv_no_header(path_support_men_69) men_69_support = preprocessing_dataset.str_to_float(men_69_support) men_69_support = men_69_support * M_sup men_69_support = sp.csr_matrix(men_69_support, dtype=np.float32) support.append(men_69_support) support_t.append(men_69_support.T) path_support_69 = "age_69_74_synth_noteasy.csv" age69_support, _, _ = read_tadpole.load_csv_no_header(path_support_69) age69_support = preprocessing_dataset.str_to_float(age69_support) age69_support = age69_support * M_sup age69_support = sp.csr_matrix(age69_support, dtype=np.float32) support.append(age69_support) support_t.append(age69_support.T) path_support_women_64 = "age_64_69_women_synth_noteasy.csv" women_64_support, _, _ = read_tadpole.load_csv_no_header( path_support_women_64) women_64_support = preprocessing_dataset.str_to_float(women_64_support) women_64_support = women_64_support * M_sup women_64_support = sp.csr_matrix(women_64_support, dtype=np.float32) support.append(women_64_support) support_t.append(women_64_support.T) path_support_men_64 = "age_64_69_men_synth_noteasy.csv" men_64_support, _, _ = read_tadpole.load_csv_no_header(path_support_men_64) men_64_support = preprocessing_dataset.str_to_float(men_64_support) men_64_support = men_64_support * M_sup men_64_support = sp.csr_matrix(men_64_support, dtype=np.float32) support.append(men_64_support) support_t.append(men_64_support.T) path_support_64 = "age_64_69_synth_noteasy.csv" age64_support, _, _ = read_tadpole.load_csv_no_header(path_support_64) age64_support = preprocessing_dataset.str_to_float(age64_support) age64_support = age64_support * M_sup age64_support = sp.csr_matrix(age64_support, dtype=np.float32) support.append(age64_support) support_t.append(age64_support.T) path_support_women_59 = "age_59_64_women_synth_noteasy.csv" women_59_support, _, _ = read_tadpole.load_csv_no_header( path_support_women_59) women_59_support = preprocessing_dataset.str_to_float(women_59_support) women_59_support = women_59_support * M_sup women_59_support = sp.csr_matrix(women_59_support, dtype=np.float32) support.append(women_59_support) support_t.append(women_59_support.T) path_support_men_59 = "age_59_64_men_synth_noteasy.csv" men_59_support, _, _ = read_tadpole.load_csv_no_header(path_support_men_59) men_59_support = preprocessing_dataset.str_to_float(men_59_support) men_59_support = men_59_support * M_sup men_59_support = sp.csr_matrix(men_59_support, dtype=np.float32) support.append(men_59_support) support_t.append(men_59_support.T) path_support_59 = "age_59_64_synth_noteasy.csv" age59_support, _, _ = read_tadpole.load_csv_no_header(path_support_59) age59_support = preprocessing_dataset.str_to_float(age59_support) age59_support = age59_support * M_sup age59_support = sp.csr_matrix(age59_support, dtype=np.float32) support.append(age59_support) support_t.append(age59_support.T) path_support_women_54 = "age_54_59_women_synth_noteasy.csv" women_54_support, _, _ = read_tadpole.load_csv_no_header( path_support_women_54) women_54_support = preprocessing_dataset.str_to_float(women_54_support) women_54_support = women_54_support * M_sup women_54_support = sp.csr_matrix(women_54_support, dtype=np.float32) support.append(women_54_support) support_t.append(women_54_support.T) path_support_men_54 = "age_54_59_men_synth_noteasy.csv" men_54_support, _, _ = read_tadpole.load_csv_no_header(path_support_men_54) men_54_support = preprocessing_dataset.str_to_float(men_54_support) men_54_support = men_54_support * M_sup men_54_support = sp.csr_matrix(men_54_support, dtype=np.float32) support.append(men_54_support) support_t.append(men_54_support.T) path_support_54 = "age_54_59_synth_noteasy.csv" age54_support, _, _ = read_tadpole.load_csv_no_header(path_support_54) age54_support = preprocessing_dataset.str_to_float(age54_support) age54_support = age54_support * M_sup age54_support = sp.csr_matrix(age54_support, dtype=np.float32) support.append(age54_support) support_t.append(age54_support.T) num_support = len(support) mask_support_t = [] Osupport_t = sp.csr_matrix(Osupport_t, dtype=np.int) for i in range(num_support): mask_support_t.append(Osupport_t.T) mask_support_t = sp.hstack(mask_support_t, format='csr') support = sp.hstack(support, format='csr') support_t = sp.hstack(support_t, format='csr') # Collect all user and item nodes for test set test_u = list(set(test_u_indices)) test_v = list(set(test_v_indices)) test_u_dict = {n: i for i, n in enumerate(test_u)} test_v_dict = {n: i for i, n in enumerate(test_v)} test_u_indices = np.array([test_u_dict[o] for o in test_u_indices]) test_v_indices = np.array([test_v_dict[o] for o in test_v_indices]) test_support = support[np.array(test_u)] for i in range(test_support.shape[0]): for j in range(563, test_support.shape[1], 564): test_support[i, j] = 0.0 test_support_t = sp.csr_matrix.multiply(support_t, mask_support_t) # Collect all user and item nodes for validation set val_u = list(set(val_u_indices)) val_v = list(set(val_v_indices)) val_u_dict = {n: i for i, n in enumerate(val_u)} val_v_dict = {n: i for i, n in enumerate(val_v)} val_u_indices = np.array([val_u_dict[o] for o in val_u_indices]) val_v_indices = np.array([val_v_dict[o] for o in val_v_indices]) val_support = support[np.array(val_u)] for i in range(val_support.shape[0]): for j in range(563, val_support.shape[1], 564): val_support[i, j] = 0.0 val_support_t = sp.csr_matrix.multiply(support_t, mask_support_t) # Collect all user and item nodes for train set train_u = list(set(train_u_indices)) train_v = list(set(train_v_indices)) train_u_dict = {n: i for i, n in enumerate(train_u)} train_v_dict = {n: i for i, n in enumerate(train_v)} train_u_indices = np.array([train_u_dict[o] for o in train_u_indices]) train_v_indices = np.array([train_v_dict[o] for o in train_v_indices]) train_support = support[np.array(train_u)] train_support_t = sp.csr_matrix.multiply(support_t, mask_support_t) placeholders = { 'u_features': tf.sparse_placeholder(tf.float32, shape=np.array(u_features.shape, dtype=np.int64)), 'v_features': tf.sparse_placeholder(tf.float32, shape=np.array(v_features.shape, dtype=np.int64)), 'u_features_nonzero': tf.placeholder(tf.int32, shape=()), 'v_features_nonzero': tf.placeholder(tf.int32, shape=()), 'labels': tf.placeholder(tf.float32, shape=(None, )), 'indices_labels': tf.placeholder(tf.int32, shape=(None, )), 'user_indices': tf.placeholder(tf.int32, shape=(None, )), 'item_indices': tf.placeholder(tf.int32, shape=(None, )), 'dropout': tf.placeholder_with_default(0., shape=()), 'weight_decay': tf.placeholder_with_default(0., shape=()), 'support': tf.sparse_placeholder(tf.float32, shape=(None, None)), 'support_t': tf.sparse_placeholder(tf.float32, shape=(None, None)), } div = hidden[0] // num_support if hidden[0] % num_support != 0: print( """\nWARNING: HIDDEN[0] (=%d) of stack layer is adjusted to %d such that it can be evenly split in %d splits.\n""" % (hidden[0], num_support * div, num_support)) hidden[0] = num_support * div # create model model = MG_GAE(placeholders, input_dim=u_features.shape[1], num_support=num_support, hidden=hidden, num_users=m, num_items=n, learning_rate=lr, gamma=gamma, beta=beta, logging=True) # Convert sparse placeholders to tuples to construct feed_dict test_support = sparse_to_tuple(test_support) test_support_t = sparse_to_tuple(test_support_t) val_support = sparse_to_tuple(val_support) val_support_t = sparse_to_tuple(val_support_t) train_support = sparse_to_tuple(train_support) train_support_t = sparse_to_tuple(train_support_t) u_features = sparse_to_tuple(u_features) v_features = sparse_to_tuple(v_features) assert u_features[2][1] == v_features[2][ 1], 'Number of features of users and items must be the same!' num_features = u_features[2][1] u_features_nonzero = u_features[1].shape[0] v_features_nonzero = v_features[1].shape[0] indices_labels = [563] * train_labels.shape[0] indices_labels_val = [563] * val_labels.shape[0] indices_labels_test = [563] * test_labels.shape[0] # Feed_dicts for validation and test set stay constant over different update steps train_feed_dict = construct_feed_dict(placeholders, u_features, v_features, u_features_nonzero, v_features_nonzero, train_support, train_support_t, train_labels, indices_labels, train_u_indices, train_v_indices, 0.) # No dropout for validation and test runs val_feed_dict = construct_feed_dict(placeholders, u_features, v_features, u_features_nonzero, v_features_nonzero, val_support, val_support_t, val_labels, indices_labels_val, val_u_indices, val_v_indices, 0.) test_feed_dict = construct_feed_dict(placeholders, u_features, v_features, u_features_nonzero, v_features_nonzero, test_support, test_support_t, test_labels, indices_labels_test, test_u_indices, test_v_indices, 0.) # Collect all variables to be logged into summary merged_summary = tf.summary.merge_all() sess = tf.Session() sess.run(tf.global_variables_initializer()) auc_train = [] auc_test = [] auc_val = [] test_pred = [] for epoch in range(NB_EPOCH): t = time.time() # Run single weight update outs = sess.run([ model.training_op, model.loss, model.indices, model.labels, model.outputs, model.labels_class, model.classification, model.inputs, model.gcn_u, model.gcn_v, model.loss_frob, model.binary_entropy, model.u_inputs, model.v_inputs, model.weight, model.input_u, model.input_v, model.u_indices, model.v_indices ], feed_dict=train_feed_dict) train_avg_loss = outs[1] label_train = outs[5] output_train = outs[6] fpr_train, tpr_train, thresholds_train = roc_curve( label_train, output_train, pos_label=label_train.max()) roc_auc_train = auc(fpr_train, tpr_train) auc_train.append(roc_auc_train) val_avg_loss, val_classification, val_labels_corres = sess.run( [model.loss, model.classification, model.labels_class], feed_dict=val_feed_dict) #test_feed_dict)# fpr_val, tpr_val, thresholds_train = roc_curve( val_labels_corres, val_classification, pos_label=label_train.max()) roc_auc_val = auc(fpr_val, tpr_val) auc_val.append(roc_auc_val) test_avg_loss, test_classification, test_labels_corres = sess.run( [model.loss, model.classification, model.labels_class], feed_dict=test_feed_dict) fpr_test, tpr_test, thresholds_test = roc_curve( test_labels_corres, test_classification, pos_label=label_train.max()) roc_auc_test = auc(fpr_test, tpr_test) auc_test.append(roc_auc_test) test_pred.append(test_classification) if VERBOSE: print("[*] Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(train_avg_loss), "train_auc=", "{:.5f}".format(roc_auc_train), "val_loss=", "{:.5f}".format(val_avg_loss), "val_auc=", "{:.5f}".format(roc_auc_val), "\t\ttime=", "{:.5f}".format(time.time() - t)) print('test auc = ', roc_auc_test) sess.close() return auc_test, auc_train, auc_val
def train_gcn(features, adj_train, train_edges, train_edges_false, test_edges, test_edges_false): # Settings flags = tf.app.flags FLAGS = flags.FLAGS flags.DEFINE_float('learning_rate', 0.005, 'Initial learning rate.') flags.DEFINE_integer('epochs', 200, 'Number of epochs to train.') flags.DEFINE_integer('hidden1', 96, 'Number of units in hidden layer 1.') flags.DEFINE_integer('hidden2', 48, 'Number of units in hidden layer 2.') flags.DEFINE_float('weight_decay', 0., 'Weight for L2 loss on embedding matrix.') flags.DEFINE_float('dropout', 0., 'Dropout rate (1 - keep probability).') flags.DEFINE_string('model', 'gcn_vae', 'Model string.') flags.DEFINE_integer('features', 1, 'Whether to use features (1) or not (0).') model_str = FLAGS.model #1-dim index array, used in cost function to only focus on those interactions with high confidence mask_index = construct_optimizer_list(features.shape[0], train_edges, train_edges_false) # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj_train adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj = adj_train if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless # Some preprocessing adj_norm = preprocess_graph(adj) # Define placeholders placeholders = { 'features': tf.sparse_placeholder(tf.float64), 'adj': tf.sparse_placeholder(tf.float64), 'adj_orig': tf.sparse_placeholder(tf.float64), 'dropout': tf.placeholder_with_default(0., shape=()) } num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] # Create model model = None if model_str == 'gcn_ae': model = GCNModelAE(placeholders, num_features, features_nonzero) elif model_str == 'gcn_vae': model = GCNModelVAE(placeholders, num_features, num_nodes, features_nonzero) pos_weight = 1 norm = 1 #pos_weight = train_edges_false.shape[0] / float(train_edges.shape[0]) #norm = (train_edges.shape[0]+train_edges_false.shape[0]) / float(train_edges_false.shape[0]*train_edges_false.shape[0]) # Optimizer with tf.name_scope('optimizer'): if model_str == 'gcn_ae': opt = OptimizerAE(preds=model.reconstructions, labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'], validate_indices=False), [-1]), pos_weight=pos_weight, norm=norm, mask=mask_index) elif model_str == 'gcn_vae': opt = OptimizerVAE(preds=model.reconstructions, labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'], validate_indices=False), [-1]), model=model, num_nodes=num_nodes, pos_weight=pos_weight, norm=norm, mask=mask_index) # Initialize session sess = tf.Session() sess.run(tf.global_variables_initializer()) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) # Train model for epoch in range(FLAGS.epochs): t = time.time() # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # Run single weight update outs = sess.run([opt.opt_op, opt.cost], feed_dict=feed_dict) print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1])) print("Optimization Finished!") #return embedding for each protein emb = sess.run(model.z_mean, feed_dict=feed_dict) return emb
features = sp.identity(features.shape[0]) # featureless logging.info('preprocessing data') # Some preprocessing adj_norm = preprocess_graph(adj) logging.info('done preprocessing data') # Define placeholders placeholders = { 'features': tf.sparse_placeholder(tf.float32), 'adj': tf.sparse_placeholder(tf.float32), 'adj_orig': tf.sparse_placeholder(tf.float32), 'dropout': tf.placeholder_with_default(0., shape=()) } num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] logging.info('create model') # Create model model = None if model_str == 'gcn_ae': model = GCNModelAE(placeholders, num_features, features_nonzero) elif model_str == 'gcn_vae': model = GCNModelVAE(placeholders, num_features, num_nodes, features_nonzero) pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) logging.info('optimizer')
def main(args): dataset = args.dataset emb_output_dir = args.output epochs = args.epochs agg = args.agg p = args.p tr = args.tr lam = args.lam lose_func = args.loss # Preprocess dataset adj, views_features = load_data(dataset, num_views=3) adj_orig = adj adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() # Calculate pairwise simlarity. views_sim_matrix = {} views_feature_matrix = {} for view in list(views_features.keys()): feature_matrix = csc_matrix.todense(views_features[view]) views_feature_matrix.update({view:feature_matrix}) kernal = "rbf" if lose_func == 'all': attr_sim = cal_attr_sim(views_feature_matrix, dataset) else: attr_sim = 0 # split nodes to train, valid and test datasets, # remove test edges from train adjacent matrix. adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(dataset, adj) print("Masking edges Done!") adj = adj_train nx_G = nx.from_numpy_array(adj.toarray()) num_nodes = adj.shape[0] adj_norm = preprocess_graph(adj) views_features_num = {} views_features_nonzero = {} for view in list(views_features.keys()): views_features[view] = sparse_to_tuple(views_features[view].tocoo()) views_features_num.update({view:views_features[view][2][1]}) views_features_nonzero.update({view:views_features[view][1].shape[0]}) # Build model MagCAE = {} for view in list(views_features.keys()): x,y = views_features[view][2][0], views_features[view][2][1] model = GAE(y, views_features_nonzero[view], adj_norm, math.ceil(2*p*y), math.ceil(p*y)) MagCAE.update({view:model}) # Loss function and optimizer. # loss weight taken by each nodes to the total loss. pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) /adj.sum() norm = adj.shape[0] * adj.shape[0] / float(adj.shape[0] * adj.shape[0] - adj.sum())*2 optimizer = tf.keras.optimizers.Adam() adj_targ = adj_train + sp.eye(adj_train.shape[0]) adj_targ = sparse_to_tuple(adj_targ) indices= np.array(adj_targ[0]) values = np.array(adj_targ[1]) dense_shape = np.array(adj_targ[2]) sparse_targ = tf.SparseTensor(indices = indices, values = values, dense_shape = dense_shape) sparse_targ = tf.cast(sparse_targ, dtype=tf.float32) adj_targ = tf.sparse.to_dense(sparse_targ) adj_targ = tf.reshape(adj_targ,[-1]) # Train and Evaluate Model # Training Loop: # In each epoch: views - > view_embedding -> aggregate embedding -> total loss -> update gradients decoder = Decoder(100) for epoch in range(epochs): loss = 0 start = time.time() with tf.GradientTape() as tape: ag_embedding ={} for VAE in list(MagCAE.keys()): v_embedding, a_hat = MagCAE[VAE](views_features[VAE]) ag_embedding.update({VAE:v_embedding}) # aggregate embeddings embedding, aggregator = aggregate_embeddings(ag_embedding, agg) # reconstruct a_hat a_hat = decoder(embedding) loss += loss_function(a_hat, adj_targ, pos_weight, norm, attr_sim, embedding, num_nodes, lam, lose_func) if agg == "weighted_concat": variables = MagCAE['view1'].trainable_variables + MagCAE['view2'].trainable_variables + MagCAE['view3'].trainable_variables + aggregator.trainable_variables gradients = tape.gradient(loss, variables) optimizer.apply_gradients(zip(gradients, variables)) # Evaluate on validate set embedding = np.array(embedding) roc_cur, ap_cur, _, _ = evaluate(val_edges, val_edges_false, adj_orig, embedding) print("Epoch {}: Val_Roc {:.4f}, Val_AP {:.4f}, Time Consumed {:.2f} sec\n".format(epoch+1, roc_cur, ap_cur, time.time()-start)) print("Training Finished!") # Evaluation Result on test Edges test_embedding= {} for VAE in list(MagCAE.keys()): v_embedding, a_hat = MagCAE[VAE](views_features[VAE]) test_embedding.update({VAE:v_embedding}) # aggregate embeddings embedding, aggregator = aggregate_embeddings(test_embedding, agg) embedding = np.array(embedding) # embedding is a tensor, convert to np array. # reconstruct a_hat test_roc, test_ap, fpr, tpr = evaluate(test_edges, test_edges_false, adj_orig, embedding) print("MagCAE test result on {}".format(dataset)) print("Test Roc: {}, Test AP: {}, P: {}, Training Ratio: {}, Lambda: {}.".format(test_roc, test_ap, p, tr, lam))
logging.warning(u"运行日志:构建不含边信息的模型") model = RecommenderGAE(placeholders, input_dim=u_features.shape[1], num_classes=NUMCLASSES, num_support=num_support, self_connections=SELFCONNECTIONS, num_basis_functions=BASES, hidden=HIDDEN, num_users=num_users, num_items=num_items, accum=ACCUM, learning_rate=LR, logging=True) # Convert sparse placeholders to tuples to construct feed_dict val_support = sparse_to_tuple(val_support) val_support_t = sparse_to_tuple(val_support_t) train_support = sparse_to_tuple(train_support) train_support_t = sparse_to_tuple(train_support_t) u_features = sparse_to_tuple(u_features) v_features = sparse_to_tuple(v_features) assert u_features[2][1] == v_features[2][1], 'Number of features of users and items must be the same!' num_features = u_features[2][1] u_features_nonzero = u_features[1].shape[0] v_features_nonzero = v_features[1].shape[0] # 使用二部图输入作为训练输出的idx以及损失函数的label # train_labels, train_u_indices, train_v_indices, u_dict, v_dict = get_original_labels()
def run(DATASET='douban', DATASEED=1234, random_seed=123, NB_EPOCH=200, DO=0, HIDDEN=[100, 75], FEATHIDDEN=64, LR=0.01, decay_rate=1.25, consecutive_threshold=5, FEATURES=False, SYM=True, TESTING=False, ACCUM='stackRGGCN', NUM_LAYERS=1, GCMC_INDICES=False): np.random.seed(random_seed) tf.set_random_seed(random_seed) SELFCONNECTIONS = False SPLITFROMFILE = True VERBOSE = False BASES = 2 WRITESUMMARY = False SUMMARIESDIR = 'logs/' if DATASET == 'ml_1m' or DATASET == 'ml_100k' or DATASET == 'douban': NUMCLASSES = 5 elif DATASET == 'ml_10m': NUMCLASSES = 10 print( '\n WARNING: this might run out of RAM, consider using train_minibatch.py for dataset %s' % DATASET) print( 'If you want to proceed with this option anyway, uncomment this.\n' ) sys.exit(1) elif DATASET == 'flixster': NUMCLASSES = 10 elif DATASET == 'yahoo_music': NUMCLASSES = 71 if ACCUM == 'sum': print( '\n WARNING: combining DATASET=%s with ACCUM=%s can cause memory issues due to large number of classes.' ) print( 'Consider using "--accum stack" as an option for this dataset.' ) print( 'If you want to proceed with this option anyway, uncomment this.\n' ) sys.exit(1) # Splitting dataset in training, validation and test set if DATASET == 'ml_1m' or DATASET == 'ml_10m': if FEATURES: datasplit_path = 'data/' + DATASET + '/withfeatures_split_seed' + str( DATASEED) + '.pickle' else: datasplit_path = 'data/' + DATASET + '/split_seed' + str( DATASEED) + '.pickle' elif FEATURES: datasplit_path = 'data/' + DATASET + '/withfeatures.pickle' else: datasplit_path = 'data/' + DATASET + '/nofeatures.pickle' if DATASET == 'flixster' or DATASET == 'douban' or DATASET == 'yahoo_music': u_features, v_features, adj_train, train_labels, train_u_indices, train_v_indices, \ val_labels, val_u_indices, val_v_indices, test_labels, \ test_u_indices, test_v_indices, class_values = load_data_monti(DATASET, TESTING) elif DATASET == 'ml_100k': print( "Using official MovieLens dataset split u1.base/u1.test with 20% validation set size..." ) u_features, v_features, adj_train, train_labels, train_u_indices, train_v_indices, \ val_labels, val_u_indices, val_v_indices, test_labels, \ test_u_indices, test_v_indices, class_values = load_official_trainvaltest_split(DATASET, TESTING) else: print("Using random dataset split ...") u_features, v_features, adj_train, train_labels, train_u_indices, train_v_indices, \ val_labels, val_u_indices, val_v_indices, test_labels, \ test_u_indices, test_v_indices, class_values = create_trainvaltest_split(DATASET, DATASEED, TESTING, datasplit_path, SPLITFROMFILE, VERBOSE) num_users, num_items = adj_train.shape num_side_features = 0 # feature loading if not FEATURES: u_features = sp.identity( num_users, format='csr') # features is just one-hot vector! v_features = sp.identity(num_items, format='csr') u_features, v_features = preprocess_user_item_features( u_features, v_features) elif FEATURES and u_features is not None and v_features is not None: # use features as side information and node_id's as node input features print("Normalizing feature vectors...") u_features_side = normalize_features(u_features) v_features_side = normalize_features(v_features) u_features_side, v_features_side = preprocess_user_item_features( u_features_side, v_features_side) u_features_side = np.array(u_features_side.todense(), dtype=np.float32) v_features_side = np.array(v_features_side.todense(), dtype=np.float32) num_side_features = u_features_side.shape[1] # node id's for node input features id_csr_v = sp.identity(num_items, format='csr') id_csr_u = sp.identity(num_users, format='csr') u_features, v_features = preprocess_user_item_features( id_csr_u, id_csr_v) else: raise ValueError( 'Features flag is set to true but no features are loaded from dataset ' + DATASET) # print("User features shape: " + str(u_features.shape)) # print("Item features shape: " + str(v_features.shape)) # print("adj_train shape: " + str(adj_train.shape)) # global normalization support = [] support_t = [] adj_train_int = sp.csr_matrix(adj_train, dtype=np.int32) for i in range(NUMCLASSES): # build individual binary rating matrices (supports) for each rating support_unnormalized = sp.csr_matrix(adj_train_int == i + 1, dtype=np.float32) if support_unnormalized.nnz == 0 and DATASET != 'yahoo_music': # yahoo music has dataset split with not all ratings types present in training set. # this produces empty adjacency matrices for these ratings. sys.exit( 'ERROR: normalized bipartite adjacency matrix has only zero entries!!!!!' ) support_unnormalized_transpose = support_unnormalized.T support.append(support_unnormalized) support_t.append(support_unnormalized_transpose) support = globally_normalize_bipartite_adjacency(support, symmetric=SYM) support_t = globally_normalize_bipartite_adjacency(support_t, symmetric=SYM) if SELFCONNECTIONS: support.append(sp.identity(u_features.shape[0], format='csr')) support_t.append(sp.identity(v_features.shape[0], format='csr')) num_support = len(support) support = sp.hstack(support, format='csr') support_t = sp.hstack(support_t, format='csr') # support and support_t become 3000x15000 (for douban with 3000 users/items and 5 ratings) # support is n_users x (n_items*n_ratings). support_t is n_items x (n_users*ratings) # NOTE: support is sparse matrix so the shape may not be as large as expected (?) # When is num_support ever not == num_rating_classes? # print('support shape: ' + str(support.shape)) # print('support_t shape: ' + str(support_t.shape)) if ACCUM == 'stack' or ACCUM == 'stackRGGCN': div = HIDDEN[0] // num_support if HIDDEN[0] % num_support != 0: print( """\nWARNING: HIDDEN[0] (=%d) of stack layer is adjusted to %d such that it can be evenly split in %d splits.\n""" % (HIDDEN[0], num_support * div, num_support)) HIDDEN[0] = num_support * div ################################################################################################################## """ support contains only training set ratings. index into support using user/item indices to create test set support. """ test_support = val_support = train_support = support test_support_t = val_support_t = train_support_t = support_t if GCMC_INDICES: # Collect all user and item nodes for test set test_u = list(set(test_u_indices)) test_v = list(set(test_v_indices)) test_support = support[np.array(test_u)] test_support_t = support_t[np.array(test_v)] # Collect all user and item nodes for validation set val_u = list(set(val_u_indices)) val_v = list(set(val_v_indices)) val_support = support[np.array(val_u)] val_support_t = support_t[np.array(val_v)] # Collect all user and item nodes for train set train_u = list(set(train_u_indices)) train_v = list(set(train_v_indices)) train_support = support[np.array(train_u)] train_support_t = support_t[np.array(train_v)] test_u_dict = {n: i for i, n in enumerate(test_u)} test_v_dict = {n: i for i, n in enumerate(test_v)} test_u_indices = np.array([test_u_dict[o] for o in test_u_indices]) test_v_indices = np.array([test_v_dict[o] for o in test_v_indices]) val_u_dict = {n: i for i, n in enumerate(val_u)} val_v_dict = {n: i for i, n in enumerate(val_v)} val_u_indices = np.array([val_u_dict[o] for o in val_u_indices]) val_v_indices = np.array([val_v_dict[o] for o in val_v_indices]) train_u_dict = {n: i for i, n in enumerate(train_u)} train_v_dict = {n: i for i, n in enumerate(train_v)} print('max train_u_indices: {}'.format(max(train_u_indices))) train_u_indices = np.array( [train_u_dict[o] for o in train_u_indices] ) ### HERE IS WHERE indices get changed to suit the new indexing into smaller set of users train_v_indices = np.array([train_v_dict[o] for o in train_v_indices]) print('max train_u_indices after: {}'.format(max(train_u_indices))) # print('train_support_shape: {}'.format(train_support.shape)) # if GCMC_INDICES, THIS IS NO LONGER (n_users, n_items*n_rating_types). but < n_users ################################################################################################################## # features as side info if FEATURES: test_u_features_side = u_features_side[np.array(test_u)] test_v_features_side = v_features_side[np.array(test_v)] val_u_features_side = u_features_side[np.array(val_u)] val_v_features_side = v_features_side[np.array(val_v)] train_u_features_side = u_features_side[np.array(train_u)] train_v_features_side = v_features_side[np.array(train_v)] else: test_u_features_side = None test_v_features_side = None val_u_features_side = None val_v_features_side = None train_u_features_side = None train_v_features_side = None placeholders = { 'u_features': tf.sparse_placeholder(tf.float32, shape=np.array(u_features.shape, dtype=np.int64)), 'v_features': tf.sparse_placeholder(tf.float32, shape=np.array(v_features.shape, dtype=np.int64)), 'u_features_nonzero': tf.placeholder(tf.int32, shape=()), 'v_features_nonzero': tf.placeholder(tf.int32, shape=()), 'labels': tf.placeholder(tf.int32, shape=(None, )), 'u_features_side': tf.placeholder(tf.float32, shape=(None, num_side_features)), 'v_features_side': tf.placeholder(tf.float32, shape=(None, num_side_features)), 'user_indices': tf.placeholder(tf.int32, shape=(None, )), 'item_indices': tf.placeholder(tf.int32, shape=(None, )), 'class_values': tf.placeholder(tf.float32, shape=class_values.shape), 'dropout': tf.placeholder_with_default(0., shape=()), 'weight_decay': tf.placeholder_with_default(0., shape=()), 'support': tf.sparse_placeholder(tf.float32, shape=(None, None)), 'support_t': tf.sparse_placeholder(tf.float32, shape=(None, None)), } ################################################################################################################## E_start, E_end = get_edges_matrices(adj_train) # E_start = sp.hstack(E_start, format='csr') # confirm if vstack is correct and not hstack # E_end = sp.hstack(E_end, format='csr') # placeholders['E_start'] = tf.sparse_placeholder(tf.float32, shape=(None, None, None)) # placeholders['E_end'] = tf.sparse_placeholder(tf.float32, shape=(None, None, None)) placeholders['E_start_list'] = [] placeholders['E_end_list'] = [] for i in range(num_support): placeholders['E_start_list'].append( tf.sparse_placeholder(tf.float32, shape=(None, None))) placeholders['E_end_list'].append( tf.sparse_placeholder(tf.float32, shape=(None, None))) # print('shape of E_end for first rating type: {}'.format(E_end[0].toarray().shape)) ################################################################################################################## # create model if FEATURES: model = RecommenderSideInfoGAE(placeholders, input_dim=u_features.shape[1], feat_hidden_dim=FEATHIDDEN, num_classes=NUMCLASSES, num_support=num_support, self_connections=SELFCONNECTIONS, num_basis_functions=BASES, hidden=HIDDEN, num_users=num_users, num_items=num_items, accum=ACCUM, learning_rate=LR, num_side_features=num_side_features, logging=True) else: model = RecommenderGAE(placeholders, input_dim=u_features.shape[1], num_classes=NUMCLASSES, num_support=num_support, self_connections=SELFCONNECTIONS, num_basis_functions=BASES, hidden=HIDDEN, num_users=num_users, num_items=num_items, accum=ACCUM, learning_rate=LR, num_layers=NUM_LAYERS, logging=True) # Convert sparse placeholders to tuples to construct feed_dict. sparse placeholders expect tuple of (indices, values, shape) test_support = sparse_to_tuple(test_support) test_support_t = sparse_to_tuple(test_support_t) val_support = sparse_to_tuple(val_support) val_support_t = sparse_to_tuple(val_support_t) train_support = sparse_to_tuple(train_support) train_support_t = sparse_to_tuple(train_support_t) u_features = sparse_to_tuple(u_features) v_features = sparse_to_tuple(v_features) assert u_features[2][1] == v_features[2][ 1], 'Number of features of users and items must be the same!' num_features = u_features[2][1] u_features_nonzero = u_features[1].shape[0] v_features_nonzero = v_features[1].shape[0] # setting E_start to be the same for train, val, and test. E_start already only contains train edges (from preprocessing script) train_E_start = [] train_E_end = [] # print('LENGTH OF E_START: {}'.format(len(E_start))) # print('NUM_SUPPORT: {}'.format(num_support)) for i in range(num_support): train_E_start.append(sparse_to_tuple(E_start[i])) train_E_end.append(sparse_to_tuple(E_end[i])) val_E_start = test_E_start = train_E_start val_E_end = test_E_end = train_E_end # Feed_dicts for validation and test set stay constant over different update steps train_feed_dict = construct_feed_dict( placeholders, u_features, v_features, u_features_nonzero, v_features_nonzero, train_support, train_support_t, train_labels, train_u_indices, train_v_indices, class_values, DO, train_u_features_side, train_v_features_side, train_E_start, train_E_end) # No dropout for validation and test runs. DO = dropout. input for val and test is same u_features and v_features. val_feed_dict = construct_feed_dict( placeholders, u_features, v_features, u_features_nonzero, v_features_nonzero, val_support, val_support_t, val_labels, val_u_indices, val_v_indices, class_values, 0., val_u_features_side, val_v_features_side, val_E_start, val_E_end) test_feed_dict = construct_feed_dict( placeholders, u_features, v_features, u_features_nonzero, v_features_nonzero, test_support, test_support_t, test_labels, test_u_indices, test_v_indices, class_values, 0., test_u_features_side, test_v_features_side, test_E_start, test_E_end) # Collect all variables to be logged into summary merged_summary = tf.summary.merge_all() sess = tf.Session() sess.run(tf.global_variables_initializer()) if WRITESUMMARY: train_summary_writer = tf.summary.FileWriter(SUMMARIESDIR + '/train', sess.graph) val_summary_writer = tf.summary.FileWriter(SUMMARIESDIR + '/val') else: train_summary_writer = None val_summary_writer = None best_val_score = np.inf best_val_loss = np.inf best_epoch = 0 wait = 0 print('Training...') #### COUTNING PARAMS total_parameters = 0 for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() variable_parameters = 1 for dim in shape: variable_parameters *= dim.value total_parameters += variable_parameters print('Total params: {}'.format(total_parameters)) # FOR A VARIABLE LEARNING RATE assign_placeholder = tf.placeholder(tf.float32) assign_op = model.learning_rate.assign(assign_placeholder) old_loss = float('inf') # print('Original learning rate is {}'.format(sess.run(model.optimizer._lr))) train_rmses, val_rmses, train_losses, val_losses = [], [], [], [] for epoch in tqdm(range(NB_EPOCH)): t = time.time() # Run single weight update # outs = sess.run([model.opt_op, model.loss, model.rmse], feed_dict=train_feed_dict) # with exponential moving averages outs = sess.run([model.training_op, model.loss, model.rmse], feed_dict=train_feed_dict) train_avg_loss = outs[1] train_rmse = outs[2] val_avg_loss, val_rmse = sess.run([model.loss, model.rmse], feed_dict=val_feed_dict) # if train_avg_loss > 0.999*old_loss: # consecutive += 1 # if consecutive >= consecutive_threshold: # LR /= decay_rate # sess.run(assign_op, feed_dict={assign_placeholder: LR}) # print('New learning rate is {}'.format(sess.run(model.optimizer._lr))) # consecutive = 0 # else: # consecutive = 0 # old_loss = train_avg_loss train_rmses.append(train_rmse) val_rmses.append(val_rmse) train_losses.append(train_avg_loss) val_losses.append(val_avg_loss) if VERBOSE: print("[*] Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(train_avg_loss), "train_rmse=", "{:.5f}".format(train_rmse), "val_loss=", "{:.5f}".format(val_avg_loss), "val_rmse=", "{:.5f}".format(val_rmse), "\t\ttime=", "{:.5f}".format(time.time() - t)) if val_rmse < best_val_score: best_val_score = val_rmse best_epoch = epoch if epoch % 20 == 0 and WRITESUMMARY: # Train set summary summary = sess.run(merged_summary, feed_dict=train_feed_dict) train_summary_writer.add_summary(summary, epoch) train_summary_writer.flush() # Validation set summary summary = sess.run(merged_summary, feed_dict=val_feed_dict) val_summary_writer.add_summary(summary, epoch) val_summary_writer.flush() if epoch % 100 == 0 and epoch > 1000 and not TESTING and False: saver = tf.train.Saver() save_path = saver.save(sess, "tmp/%s_seed%d.ckpt" % (model.name, DATASEED), global_step=model.global_step) # load polyak averages variables_to_restore = model.variable_averages.variables_to_restore( ) saver = tf.train.Saver(variables_to_restore) saver.restore(sess, save_path) val_avg_loss, val_rmse = sess.run([model.loss, model.rmse], feed_dict=val_feed_dict) print('polyak val loss = ', val_avg_loss) print('polyak val rmse = ', val_rmse) # Load back normal variables saver = tf.train.Saver() saver.restore(sess, save_path) # store model including exponential moving averages saver = tf.train.Saver() save_path = saver.save(sess, "tmp/%s.ckpt" % model.name, global_step=model.global_step) if VERBOSE: print("\nOptimization Finished!") print('best validation score =', best_val_score, 'at iteration', best_epoch) if TESTING: test_avg_loss, test_rmse = sess.run([model.loss, model.rmse], feed_dict=test_feed_dict) print('test loss = ', test_avg_loss) print('test rmse = ', test_rmse) # restore with polyak averages of parameters variables_to_restore = model.variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) saver.restore(sess, save_path) test_avg_loss, test_rmse = sess.run([model.loss, model.rmse], feed_dict=test_feed_dict) print('polyak test loss = ', test_avg_loss) print('polyak test rmse = ', test_rmse) sess.close() tf.reset_default_graph() return train_rmses, val_rmses, train_losses, val_losses, test_rmse else: # restore with polyak averages of parameters variables_to_restore = model.variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) saver.restore(sess, save_path) val_avg_loss, val_rmse = sess.run([model.loss, model.rmse], feed_dict=val_feed_dict) print('polyak val loss = ', val_avg_loss) print('polyak val rmse = ', val_rmse) sess.close() tf.reset_default_graph() return train_rmses, val_rmses, train_losses, val_losses, val_rmse
def run(user_features, movie_features, learning_rate=0.01, epochs=500, hidden=[500, 75], feat_hidden=64, accumulation='sum', dropout=0.7, num_basis_functions=2, features=False, symmetric=True, testing=True): """accumulation can be sum or stack""" # Set random seed # seed = 123 # use only for unit testing seed = int(time.time()) np.random.seed(seed) tf.set_random_seed(seed) tf.reset_default_graph() # Settings # ap = argparse.ArgumentParser() # # ap.add_argument("-d", "--dataset", type=str, default="ml_100k", # # choices=['ml_100k', 'ml_1m', 'ml_10m', 'douban', 'yahoo_music', 'flixster'], # # help="Dataset string.") # ap.add_argument("-lr", "--learning_rate", type=float, default=0.01, # help="Learning rate") # ap.add_argument("-e", "--epochs", type=int, default=2500, # help="Number training epochs") # ap.add_argument("-hi", "--hidden", type=int, nargs=2, default=[500, 75], # help="Number hidden units in 1st and 2nd layer") # ap.add_argument("-fhi", "--feat_hidden", type=int, default=64, # help="Number hidden units in the dense layer for features") # ap.add_argument("-ac", "--accumulation", type=str, default="sum", choices=['sum', 'stack'], # help="Accumulation function: sum or stack.") # ap.add_argument("-do", "--dropout", type=float, default=0.7, # help="Dropout fraction") # ap.add_argument("-nb", "--num_basis_functions", type=int, default=2, # help="Number of basis functions for Mixture Model GCN.") # ap.add_argument("-ds", "--data_seed", type=int, default=1234, # help="""Seed used to shuffle data in data_utils, taken from cf-nade (1234, 2341, 3412, 4123, 1324). # Only used for ml_1m and ml_10m datasets. """) # ap.add_argument("-sdir", "--summaries_dir", type=str, default='logs/' + str(datetime.datetime.now()).replace(' ', '_'), # help="Directory for saving tensorflow summaries.") # # Boolean flags # fp = ap.add_mutually_exclusive_group(required=False) # fp.add_argument('-nsym', '--norm_symmetric', dest='norm_symmetric', # help="Option to turn on symmetric global normalization", action='store_true') # fp.add_argument('-nleft', '--norm_left', dest='norm_symmetric', # help="Option to turn on left global normalization", action='store_false') # ap.set_defaults(norm_symmetric=True) # fp = ap.add_mutually_exclusive_group(required=False) # fp.add_argument('-f', '--features', dest='features', # help="Whether to use features (1) or not (0)", action='store_true') # fp.add_argument('-no_f', '--no_features', dest='features', # help="Whether to use features (1) or not (0)", action='store_false') # ap.set_defaults(features=False) # fp = ap.add_mutually_exclusive_group(required=False) # fp.add_argument('-ws', '--write_summary', dest='write_summary', # help="Option to turn on summary writing", action='store_true') # fp.add_argument('-no_ws', '--no_write_summary', dest='write_summary', # help="Option to turn off summary writing", action='store_false') # ap.set_defaults(write_summary=False) # fp = ap.add_mutually_exclusive_group(required=False) # fp.add_argument('-t', '--testing', dest='testing', # help="Option to turn on test set evaluation", action='store_true') # fp.add_argument('-v', '--validation', dest='testing', # help="Option to only use validation set evaluation", action='store_false') # ap.set_defaults(testing=False) # args = vars(ap.parse_args()) # print('Settings:') # print(args, '\n') # Define parameters DATASET = 'ml_100k' DATASEED = 1234 NB_EPOCH = epochs DO = dropout HIDDEN = hidden FEATHIDDEN = feat_hidden BASES = num_basis_functions LR = learning_rate WRITESUMMARY = False SUMMARIESDIR = 'logs/' + str(datetime.datetime.now()).replace(' ', '_') FEATURES = features SYM = symmetric TESTING = testing ACCUM = accumulation SELFCONNECTIONS = False SPLITFROMFILE = True VERBOSE = True NUMCLASSES = 5 # Splitting dataset in training, validation and test set print("Using official MovieLens dataset split u1.base/u1.test with 20% validation set size...") u_features = user_features v_features = movie_features _, _, adj_train, train_labels, train_u_indices, train_v_indices, \ val_labels, val_u_indices, val_v_indices, test_labels, \ test_u_indices, test_v_indices, class_values = load_official_trainvaltest_split('ml_100k', TESTING) num_users, num_items = adj_train.shape num_side_features = 0 # feature loading if not FEATURES: u_features = sp.identity(num_users, format='csr') v_features = sp.identity(num_items, format='csr') u_features, v_features = preprocess_user_item_features(u_features, v_features) elif FEATURES and u_features is not None and v_features is not None: # use features as side information and node_id's as node input features print("Normalizing feature vectors...") u_features_side = normalize_features(u_features) v_features_side = normalize_features(v_features) u_features_side, v_features_side = preprocess_user_item_features(u_features_side, v_features_side) u_features_side = np.array(u_features_side.todense(), dtype=np.float32) v_features_side = np.array(v_features_side.todense(), dtype=np.float32) num_side_features = u_features_side.shape[1] # node id's for node input features id_csr_v = sp.identity(num_items, format='csr') id_csr_u = sp.identity(num_users, format='csr') u_features, v_features = preprocess_user_item_features(id_csr_u, id_csr_v) else: raise ValueError('Features flag is set to true but no features are loaded from dataset ' + DATASET) # global normalization support = [] support_t = [] adj_train_int = sp.csr_matrix(adj_train, dtype=np.int32) for i in range(NUMCLASSES): # build individual binary rating matrices (supports) for each rating support_unnormalized = sp.csr_matrix(adj_train_int == i + 1, dtype=np.float32) if support_unnormalized.nnz == 0 and DATASET != 'yahoo_music': # yahoo music has dataset split with not all ratings types present in training set. # this produces empty adjacency matrices for these ratings. sys.exit('ERROR: normalized bipartite adjacency matrix has only zero entries!!!!!') support_unnormalized_transpose = support_unnormalized.T support.append(support_unnormalized) support_t.append(support_unnormalized_transpose) support = globally_normalize_bipartite_adjacency(support, symmetric=SYM) support_t = globally_normalize_bipartite_adjacency(support_t, symmetric=SYM) if SELFCONNECTIONS: support.append(sp.identity(u_features.shape[0], format='csr')) support_t.append(sp.identity(v_features.shape[0], format='csr')) num_support = len(support) support = sp.hstack(support, format='csr') support_t = sp.hstack(support_t, format='csr') if ACCUM == 'stack': div = HIDDEN[0] // num_support if HIDDEN[0] % num_support != 0: print("""\nWARNING: HIDDEN[0] (=%d) of stack layer is adjusted to %d such that it can be evenly split in %d splits.\n""" % (HIDDEN[0], num_support * div, num_support)) HIDDEN[0] = num_support * div # Collect all user and item nodes for test set test_u = list(set(test_u_indices)) test_v = list(set(test_v_indices)) test_u_dict = {n: i for i, n in enumerate(test_u)} test_v_dict = {n: i for i, n in enumerate(test_v)} test_u_indices = np.array([test_u_dict[o] for o in test_u_indices]) test_v_indices = np.array([test_v_dict[o] for o in test_v_indices]) test_support = support[np.array(test_u)] test_support_t = support_t[np.array(test_v)] # Collect all user and item nodes for validation set val_u = list(set(val_u_indices)) val_v = list(set(val_v_indices)) val_u_dict = {n: i for i, n in enumerate(val_u)} val_v_dict = {n: i for i, n in enumerate(val_v)} val_u_indices = np.array([val_u_dict[o] for o in val_u_indices]) val_v_indices = np.array([val_v_dict[o] for o in val_v_indices]) val_support = support[np.array(val_u)] val_support_t = support_t[np.array(val_v)] # Collect all user and item nodes for train set train_u = list(set(train_u_indices)) train_v = list(set(train_v_indices)) train_u_dict = {n: i for i, n in enumerate(train_u)} train_v_dict = {n: i for i, n in enumerate(train_v)} train_u_indices = np.array([train_u_dict[o] for o in train_u_indices]) train_v_indices = np.array([train_v_dict[o] for o in train_v_indices]) train_support = support[np.array(train_u)] train_support_t = support_t[np.array(train_v)] # features as side info if FEATURES: test_u_features_side = u_features_side[np.array(test_u)] test_v_features_side = v_features_side[np.array(test_v)] val_u_features_side = u_features_side[np.array(val_u)] val_v_features_side = v_features_side[np.array(val_v)] train_u_features_side = u_features_side[np.array(train_u)] train_v_features_side = v_features_side[np.array(train_v)] else: test_u_features_side = None test_v_features_side = None val_u_features_side = None val_v_features_side = None train_u_features_side = None train_v_features_side = None placeholders = { 'u_features': tf.sparse_placeholder(tf.float32, shape=np.array(u_features.shape, dtype=np.int64)), 'v_features': tf.sparse_placeholder(tf.float32, shape=np.array(v_features.shape, dtype=np.int64)), 'u_features_nonzero': tf.placeholder(tf.int32, shape=()), 'v_features_nonzero': tf.placeholder(tf.int32, shape=()), 'labels': tf.placeholder(tf.int32, shape=(None,)), 'u_features_side': tf.placeholder(tf.float32, shape=(None, num_side_features)), 'v_features_side': tf.placeholder(tf.float32, shape=(None, num_side_features)), 'user_indices': tf.placeholder(tf.int32, shape=(None,)), 'item_indices': tf.placeholder(tf.int32, shape=(None,)), 'class_values': tf.placeholder(tf.float32, shape=class_values.shape), 'dropout': tf.placeholder_with_default(0., shape=()), 'weight_decay': tf.placeholder_with_default(0., shape=()), 'support': tf.sparse_placeholder(tf.float32, shape=(None, None)), 'support_t': tf.sparse_placeholder(tf.float32, shape=(None, None)), } # create model if FEATURES: model = RecommenderSideInfoGAE(placeholders, input_dim=u_features.shape[1], feat_hidden_dim=FEATHIDDEN, num_classes=NUMCLASSES, num_support=num_support, self_connections=SELFCONNECTIONS, num_basis_functions=BASES, hidden=HIDDEN, num_users=num_users, num_items=num_items, accum=ACCUM, learning_rate=LR, num_side_features=num_side_features, logging=True) else: model = RecommenderGAE(placeholders, input_dim=u_features.shape[1], num_classes=NUMCLASSES, num_support=num_support, self_connections=SELFCONNECTIONS, num_basis_functions=BASES, hidden=HIDDEN, num_users=num_users, num_items=num_items, accum=ACCUM, learning_rate=LR, logging=True) # Convert sparse placeholders to tuples to construct feed_dict test_support = sparse_to_tuple(test_support) test_support_t = sparse_to_tuple(test_support_t) val_support = sparse_to_tuple(val_support) val_support_t = sparse_to_tuple(val_support_t) train_support = sparse_to_tuple(train_support) train_support_t = sparse_to_tuple(train_support_t) u_features = sparse_to_tuple(u_features) v_features = sparse_to_tuple(v_features) assert u_features[2][1] == v_features[2][1], 'Number of features of users and items must be the same!' num_features = u_features[2][1] u_features_nonzero = u_features[1].shape[0] v_features_nonzero = v_features[1].shape[0] # Feed_dicts for validation and test set stay constant over different update steps train_feed_dict = construct_feed_dict(placeholders, u_features, v_features, u_features_nonzero, v_features_nonzero, train_support, train_support_t, train_labels, train_u_indices, train_v_indices, class_values, DO, train_u_features_side, train_v_features_side) # No dropout for validation and test runs val_feed_dict = construct_feed_dict(placeholders, u_features, v_features, u_features_nonzero, v_features_nonzero, val_support, val_support_t, val_labels, val_u_indices, val_v_indices, class_values, 0., val_u_features_side, val_v_features_side) test_feed_dict = construct_feed_dict(placeholders, u_features, v_features, u_features_nonzero, v_features_nonzero, test_support, test_support_t, test_labels, test_u_indices, test_v_indices, class_values, 0., test_u_features_side, test_v_features_side) # Collect all variables to be logged into summary merged_summary = tf.summary.merge_all() #sess = tf.Session() sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) if WRITESUMMARY: train_summary_writer = tf.summary.FileWriter(SUMMARIESDIR + '/train', sess.graph) val_summary_writer = tf.summary.FileWriter(SUMMARIESDIR + '/val') else: train_summary_writer = None val_summary_writer = None best_val_score = np.inf best_val_loss = np.inf best_epoch = 0 wait = 0 print('Training...') train_loss_values = [] train_rmse_values = [] val_loss_values = [] val_rmse_values = [] list_embeddings = [] for epoch in range(NB_EPOCH): t = time.time() # Run single weight update # outs = sess.run([model.opt_op, model.loss, model.rmse], feed_dict=train_feed_dict) # with exponential moving averages outs = sess.run([model.training_op, model.loss, model.rmse], feed_dict=train_feed_dict) #print(len(model.embeddings)) train_avg_loss = outs[1] train_rmse = outs[2] val_avg_loss, val_rmse = sess.run([model.loss, model.rmse], feed_dict=val_feed_dict) train_loss_values.append(train_avg_loss) train_rmse_values.append(train_rmse) val_loss_values.append(val_avg_loss) val_rmse_values.append(val_rmse) if VERBOSE: print("[*] Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(train_avg_loss), "train_rmse=", "{:.5f}".format(train_rmse), "val_loss=", "{:.5f}".format(val_avg_loss), "val_rmse=", "{:.5f}".format(val_rmse), "\t\ttime=", "{:.5f}".format(time.time() - t)) if epoch==NB_EPOCH - 1: embedding_users = model.embeddings[0].eval(feed_dict=train_feed_dict) embedding_movies = model.embeddings[1].eval(feed_dict=train_feed_dict) if val_rmse < best_val_score: best_val_score = val_rmse best_epoch = epoch if epoch % 20 == 0 and WRITESUMMARY: # Train set summary summary = sess.run(merged_summary, feed_dict=train_feed_dict) train_summary_writer.add_summary(summary, epoch) train_summary_writer.flush() # Validation set summary summary = sess.run(merged_summary, feed_dict=val_feed_dict) val_summary_writer.add_summary(summary, epoch) val_summary_writer.flush() if epoch % 100 == 0 and epoch > 1000 and not TESTING and False: saver = tf.train.Saver() save_path = saver.save(sess, "tmp/%s_seed%d.ckpt" % (model.name, DATASEED), global_step=model.global_step) # load polyak averages variables_to_restore = model.variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) saver.restore(sess, save_path) val_avg_loss, val_rmse = sess.run([model.loss, model.rmse], feed_dict=val_feed_dict) print('polyak val loss = ', val_avg_loss) print('polyak val rmse = ', val_rmse) # Load back normal variables saver = tf.train.Saver() saver.restore(sess, save_path) # store model including exponential moving averages saver = tf.train.Saver() save_path = saver.save(sess, "tmp/%s.ckpt" % model.name, global_step=model.global_step) if VERBOSE: print("\nOptimization Finished!") print('best validation score =', best_val_score, 'at iteration', best_epoch+1) if TESTING: test_avg_loss, test_rmse = sess.run([model.loss, model.rmse], feed_dict=test_feed_dict) print('test loss = ', test_avg_loss) print('test rmse = ', test_rmse) # restore with polyak averages of parameters variables_to_restore = model.variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) saver.restore(sess, save_path) test_avg_loss, test_rmse = sess.run([model.loss, model.rmse], feed_dict=test_feed_dict) print('polyak test loss = ', test_avg_loss) print('polyak test rmse = ', test_rmse) else: # restore with polyak averages of parameters variables_to_restore = model.variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) saver.restore(sess, save_path) val_avg_loss, val_rmse = sess.run([model.loss, model.rmse], feed_dict=val_feed_dict) print('polyak val loss = ', val_avg_loss) print('polyak val rmse = ', val_rmse) print('global seed = ', seed) sess.close() return embedding_users, embedding_movies, train_loss_values, train_rmse_values, val_loss_values, val_rmse_values
def train_one_graph(adj, adj_orig, features_csr, num_node, k_num, model, opt, placeholders, sess, new_learning_rate, feed_dict, epoch, graph_index): adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) # delete self loop adj_orig.eliminate_zeros() adj_new = adj features = sparse_to_tuple(features_csr.tocoo()) adj_norm, adj_norm_sparse = preprocess_graph(adj_new) adj_label = adj_new + sp.eye(adj.shape[0]) adj_label = sparse_to_tuple(adj_label) ############ # build models adj_clean = adj_orig.tocoo() adj_clean_tensor = tf.SparseTensor(indices=np.stack( [adj_clean.row, adj_clean.col], axis=-1), values=adj_clean.data, dense_shape=adj_clean.shape) ### initial clean and noised_mask clean_mask = np.array([1, 2, 3, 4, 5]) noised_mask = np.array([6, 7, 8, 9, 10]) noised_num = noised_mask.shape[0] / 2 ################################## # feed_dict.update({placeholders["adj"]: adj_norm}) feed_dict.update({placeholders["adj_orig"]: adj_label}) feed_dict.update({placeholders["features"]: features}) node_mask = np.ones([adj.shape[0], n_class]) node_mask[num_node:, :] = 0 feed_dict.update({placeholders['node_mask']: node_mask}) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) model.k = k_num ##################################################### t = time.time() ######## if epoch > int( FLAGS.epochs / 2): ## here we can control the manner of new model _ = sess.run([opt.G_min_op], feed_dict=feed_dict, options=run_options) else: _, x_tilde = sess.run([opt.D_min_op, model.realD_tilde], feed_dict=feed_dict, options=run_options) if epoch == int(FLAGS.epochs / 2): noised_indexes, clean_indexes = get_noised_indexes( x_tilde, adj_new, num_node) feed_dict.update({placeholders["noised_mask"]: noised_indexes}) feed_dict.update({placeholders["clean_mask"]: clean_indexes}) feed_dict.update( {placeholders["noised_num"]: len(noised_indexes) / 2}) if epoch % 1 == 0 and graph_index == 0: if epoch > int(FLAGS.epochs / 2): print("This is the generation part") else: print("This is the cluster mask part") print("Epoch:", '%04d' % (epoch + 1), "time=", "{:.5f}".format(time.time() - t)) G_loss, D_loss, new_learn_rate_value = sess.run( [opt.G_comm_loss, opt.D_loss, new_learning_rate], feed_dict=feed_dict, options=run_options) print("Step: %d,G: loss=%.7f ,L_u: loss= %.7f, LR=%.7f" % (epoch, G_loss, D_loss + 1, new_learn_rate_value)) ########################################## return
# create model model = RecommenderGAE(placeholders, input_dim=u_features.shape[1], num_classes=NUMCLASSES, num_support=num_support, self_connections=SELFCONNECTIONS, num_basis_functions=BASES, hidden=HIDDEN, num_users=num_users, num_items=num_items, accum=ACCUM, learning_rate=LR, logging=True) # Convert sparse placeholders to tuples to construct feed_dict test_support = sparse_to_tuple(test_support) test_support_t = sparse_to_tuple(test_support_t) val_support = sparse_to_tuple(val_support) val_support_t = sparse_to_tuple(val_support_t) u_features = sparse_to_tuple(u_features) v_features = sparse_to_tuple(v_features) assert u_features[2][1] == v_features[2][ 1], 'Number of features of users and items must be the same!' num_features = u_features[2][1] u_features_nonzero = u_features[1].shape[0] v_features_nonzero = v_features[1].shape[0] # Feed_dicts for validation and test set stay constant over different update steps
adj_orig.eliminate_zeros() adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj) adj = adj_train adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_norm = preprocess_graph(adj) adj_norm_dense = scipy.sparse.coo_matrix( (adj_norm[1], (adj_norm[0][:, 0], adj_norm[0][:, 1])), shape=adj_norm[2]).toarray() # Some preprocessing num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] features_dense = scipy.sparse.coo_matrix( (features[1], (features[0][:, 0], features[0][:, 1])), shape=features[2]).toarray() train_xs = features_dense # In[3]: # garaph cnn function def weight_variable_glorot(input_dim, output_dim, name=""): """Create a weight variable with Glorot & Bengio (AISTATS 2010)
# Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj = adj_train # Some preprocessing adj_norm = preprocess_graph(adj) features_mat = features.toarray() attr_labels_list, dim_attr, features_rm_privacy = get_attr_list( FLAGS.dataset, labels, features_mat) features_lil = sp.lil_matrix(features_rm_privacy) features_tuple = sparse_to_tuple(features_lil.tocoo()) num_nodes = adj.shape[0] features_sp = sparse_to_tuple(features_lil.tocoo()) num_features = features_sp[2][1] features_nonzero = features_sp[1].shape[0] pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = 1 adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) # In[2]: # Define placeholders placeholders = get_placeholder(adj)
def mask_test_edges(self, edge_type, type_idx): edges_all, _, _ = preprocessing.sparse_to_tuple( self.adj_mats[edge_type][type_idx]) num_test = max(50, int(np.floor(edges_all.shape[0] * self.val_test_size))) num_val = max(50, int(np.floor(edges_all.shape[0] * self.val_test_size))) all_edge_idx = list(range(edges_all.shape[0])) np.random.shuffle(all_edge_idx) val_edge_idx = all_edge_idx[:num_val] val_edges = edges_all[val_edge_idx] test_edge_idx = all_edge_idx[num_val:(num_val + num_test)] test_edges = edges_all[test_edge_idx] train_edges = np.delete(edges_all, np.hstack([test_edge_idx, val_edge_idx]), axis=0) test_edges_false = [] while len(test_edges_false) < len(test_edges): if len(test_edges_false) % 1000 == 0: print("Constructing test edges=", "%04d/%04d" % (len(test_edges_false), len(test_edges))) idx_i = np.random.randint( 0, self.adj_mats[edge_type][type_idx].shape[0]) idx_j = np.random.randint( 0, self.adj_mats[edge_type][type_idx].shape[1]) if self._ismember([idx_i, idx_j], edges_all): continue if test_edges_false: if self._ismember([idx_i, idx_j], test_edges_false): continue test_edges_false.append([idx_i, idx_j]) val_edges_false = [] while len(val_edges_false) < len(val_edges): if len(val_edges_false) % 1000 == 0: print("Constructing val edges=", "%04d/%04d" % (len(val_edges_false), len(val_edges))) idx_i = np.random.randint( 0, self.adj_mats[edge_type][type_idx].shape[0]) idx_j = np.random.randint( 0, self.adj_mats[edge_type][type_idx].shape[1]) if self._ismember([idx_i, idx_j], edges_all): continue if val_edges_false: if self._ismember([idx_i, idx_j], val_edges_false): continue val_edges_false.append([idx_i, idx_j]) # Re-build adj matrices data = np.ones(train_edges.shape[0]) adj_train = sp.csr_matrix( (data, (train_edges[:, 0], train_edges[:, 1])), shape=self.adj_mats[edge_type][type_idx].shape) self.adj_train[edge_type][type_idx] = self.preprocess_graph(adj_train) self.train_edges[edge_type][type_idx] = train_edges self.val_edges[edge_type][type_idx] = val_edges self.val_edges_false[edge_type][type_idx] = np.array(val_edges_false) self.test_edges[edge_type][type_idx] = test_edges self.test_edges_false[edge_type][type_idx] = np.array(test_edges_false)