def get_data(self, feed_dict): input = self.features f0 = feed_dict[self.placeholders['fields'][0]] dropout = feed_dict.get(self.placeholders['dropout'], 0.0) if self.sparse_input: input = slice(input, f0) if FLAGS.reverse: input = sparse_to_tuple( np_sparse_dropout(tuple_to_coo(input), 1 - dropout)) else: input = dense_slice(input, f0) if FLAGS.reverse: input = np_dropout(input, 1 - dropout) #input = input[f0,:] feed_dict[self.inputs_ph] = input for l in range(self.L): dim = self.agg0_dim if l == 0 else FLAGS.hidden1 adj = feed_dict[self.placeholders['adj'][l]][0] self.g_ops += adj.shape[0] * dim * 4 self.adj_sizes[l] += adj.shape[0] self.amt_data += adj.shape[0] for l in range(self.L + 1): self.field_sizes[l] += feed_dict[self.placeholders['fields'] [l]].size for c, l in self.layer_comp: self.nn_ops += c * feed_dict[self.placeholders['fields'] [l]].size * 4
def selection(mat, num_val, num_test, diagonal=False): if diagonal: mat_triu = sp.triu(mat) else: mat_triu = mat mat_tuple = sparse_to_tuple(mat_triu) elements = mat_tuple[0] all_edge_idx = list(range(elements.shape[0])) np.random.shuffle(all_edge_idx) val_edge_idx = all_edge_idx[:num_val] val_edges = elements[val_edge_idx] test_edge_idx = all_edge_idx[num_val:(num_val + num_test)] test_edges = elements[test_edge_idx] train_edges = np.delete(elements, np.hstack([test_edge_idx, val_edge_idx]), axis=0) return train_edges, val_edges, test_edges
def train(model_config, sess, repeat_state, adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, k_ratio, weight, method, is_gcn): # Print model_name very_begining = time.time() print('', 'name : {}'.format(model_config['name']), 'dataset : {}'.format(model_config['dataset']), sep='\n') if model_config['Model'] == 'LP': train_time = time.time() test_acc, test_acc_of_class = Model17(adj, model_config['alpha'], y_train, y_test) train_time = time.time() - train_time print("Test set results: accuracy= {:.5f}".format(test_acc)) print("Total time={}s".format(time.time() - very_begining)) return test_acc, test_acc_of_class, 0, train_time, train_time # preprocess_features if model_config['smooth_config']['type'] is not None: if model_config['connection'] == [ 'f' for i in range(len(model_config['connection'])) ]: fetch = train_mask + val_mask + test_mask new_features = np.zeros(features.shape, dtype=features.dtype) new_features[fetch], smoothing_time = graphconv( features, adj, model_config['smooth_config'], fetch=fetch) features = new_features else: features, smoothing_time = graphconv(features, adj, model_config['smooth_config']) else: smoothing_time = 0 support = [preprocess_adj(adj)] num_supports = 1 # Speed up for MLP is_mlp = model_config['connection'] == [ 'f' for _ in range(len(model_config['connection'])) ] if is_mlp: train_features = features[train_mask] y_train = y_train[train_mask] y_train = y_train.astype(np.int32) val_features = features[val_mask] test_features = features[test_mask] labels_mask = np.ones(train_mask.sum(), dtype=np.int32) unlabels_mask = np.ones(test_mask.sum(), dtype=np.int32) else: train_features = features val_features = features test_features = features labels_mask = train_mask.astype(np.int32) unlabels_mask = test_mask.astype(np.int32) y_train = y_train.astype(np.int32) input_dim = features.shape[1] if sparse.issparse(features): train_features = sparse_to_tuple(train_features) val_features = sparse_to_tuple(val_features) test_features = sparse_to_tuple(test_features) features = sparse_to_tuple(features) # Define placeholders placeholders = { 'labels': tf.placeholder_with_default(y_train, name='labels', shape=(None, y_train.shape[1])), 'labels_mask': tf.placeholder_with_default(labels_mask, shape=(None), name='labels_mask'), 'unlabels_mask': tf.placeholder_with_default(unlabels_mask, shape=(None), name='unlabels_mask'), 'dropout': tf.placeholder_with_default(0., name='dropout', shape=()), 'adj_nnz': tf.placeholder_with_default(support[0].values.shape, shape=(1), name='adj_nnz'), } if not is_mlp: placeholders['support'] = [ tf.sparse_placeholder(tf.float32, name='support' + str(i)) for i in range(num_supports) ] if isinstance(train_features, tf.SparseTensorValue): placeholders['num_features_nonzero'] = tf.placeholder_with_default( train_features[1].shape, shape=(1), name='num_features_nonzero') placeholders['features'] = tf.sparse_placeholder(tf.float32, name='features') else: placeholders['num_features_nonzero'] = tf.placeholder_with_default( [0], shape=(1), name='num_features_nonzero') placeholders['features'] = tf.placeholder_with_default( train_features, shape=[None, features.shape[1]], name='features') # Create model model = IGCN(model_config, placeholders, is_gcn, k_ratio, weight, method, input_dim=input_dim) # Random initialize sess.run(tf.global_variables_initializer()) # Initialize FileWriter, saver & variables in graph train_writer = None valid_writer = None saver = tf.train.Saver() # Construct feed dictionary if is_mlp: if isinstance(features, tf.SparseTensorValue): train_feed_dict = { placeholders['features']: train_features, placeholders['dropout']: model_config['dropout'], } else: train_feed_dict = { placeholders['dropout']: model_config['dropout'] } valid_feed_dict = construct_feed_dict( val_features, support, y_val[val_mask], np.ones(val_mask.sum(), dtype=np.bool), 0, test_mask, placeholders) test_feed_dict = construct_feed_dict( test_features, support, y_test[test_mask], np.ones(test_mask.sum(), dtype=np.bool), 0, test_mask, placeholders) else: train_feed_dict = construct_feed_dict(train_features, support, y_train, train_mask, model_config['dropout'], test_mask, placeholders) valid_feed_dict = construct_feed_dict(val_features, support, y_val, val_mask, 0, test_mask, placeholders) test_feed_dict = construct_feed_dict(test_features, support, y_test, test_mask, 0, test_mask, placeholders) # Some support variables acc_list = [] max_valid_acc = 0 min_train_loss = 1000000 t_test = time.time() sess.run(model.assign_data, feed_dict=test_feed_dict) test_cost, test_acc, test_mo, test_acc_of_class = sess.run([ model.cross_entropy_loss, model.accuracy, model.mo_accuarcy, model.accuracy_of_class ]) sess.run(model.assign_data, feed_dict=train_feed_dict) if model_config['validate']: valid_loss, valid_acc, valid_summary = sess.run( [model.cross_entropy_loss, model.accuracy, model.summary], feed_dict=valid_feed_dict) test_duration = time.time() - t_test train_time = 0 step = model_config['epochs'] if model_config['train']: # Train model print('training...') for step in range(model_config['epochs']): # Training step t = time.time() sess.run(model.opt_op) t = time.time() - t train_time += t train_loss, train_acc = sess.run( [model.cross_entropy_loss, model.accuracy]) # if True: if step > model_config['epochs'] * 0.9 or step % 20 == 0: # If it's best performence so far, evalue on test set if model_config['validate']: sess.run(model.assign_data, feed_dict=valid_feed_dict) valid_loss, valid_acc = sess.run( [model.cross_entropy_loss, model.accuracy]) acc_list.append(valid_acc) if valid_acc >= max_valid_acc: max_valid_acc = valid_acc t_test = time.time() sess.run(model.assign_data, feed_dict=test_feed_dict) test_cost, test_acc, test_acc_of_class = \ sess.run([model.cross_entropy_loss, model.accuracy, model.accuracy_of_class]) test_duration = time.time() - t_test if model_config['verbose']: print('*', end='') else: acc_list.append(train_acc) if train_loss < min_train_loss: min_train_loss = train_loss t_test = time.time() sess.run(model.assign_data, feed_dict=test_feed_dict) test_cost, test_acc, test_mo, test_acc_of_class = \ sess.run([model.cross_entropy_loss, model.accuracy, model.mo_accuarcy, model.accuracy_of_class]) test_duration = time.time() - t_test if model_config['verbose']: print('*', end='') sess.run(model.assign_data, feed_dict=train_feed_dict) # Print results if model_config['verbose']: print("Epoch: {:04d}".format(step), "train_loss= {:.3f}".format(train_loss), "train_acc= {:.3f}".format(train_acc), end=' ') if model_config['validate']: print("val_loss=", "{:.3f}".format(valid_loss), "val_acc= {:.3f}".format(valid_acc), end=' ') else: print("test_loss=", "{:.3f}".format(test_cost), "test_acc= {:.3f}".format(test_acc), end=' ') print("time=", "{:.5f}".format(t)) print("Test set results:", "cost=", "{:.5f}".format(test_cost), "accuracy=", "{:.5f} {:.5f}".format(test_acc, test_mo), "time=", "{:.5f}".format(test_duration)) print("accuracy of each class=", test_acc_of_class) # Saving if model_config['logdir']: print('Save model to "{:s}"'.format( saver.save(sess=sess, save_path=path.join(model_config['logdir'], 'model.ckpt')))) if model_config['save_feature']: sess.run(model.assign_data, feed_dict=test_feed_dict) outs = sess.run(model.outs_for_graph) with open(model_config['save_feature'], 'w') as save: for line in outs: for item in line: save.write('%s ' % item) save.write('\n') print("Total time={}s".format(time.time() - very_begining)) return test_acc, test_acc_of_class, train_time / step * 1000, smoothing_time, train_time + smoothing_time
def connect_n6_krandom(ref, voxel_node, node_voxel, working_nodes, k_random, weighting, args): edges = [] labels = [] num_nodes = node_voxel.shape[0] tabu_list = { } # A list to avoid duplicated elements in the adjacency matrix. nodes_complete = { } # A list counting how many neighbors a node already has. valid_nodes = np.array(np.where(working_nodes > 0)) valid_nodes = np.transpose(valid_nodes) for node_idx in range(num_nodes): y, x, z = node_voxel[ node_idx] # getting the 3d position for current node labels.append(ref[y, x, z]) # Labels come from the CNN prediction # Basic n6 connectivity for axis in range(3): axisy = int(axis == 0) axisx = int(axis == 1) axisz = int(axis == 2) for ne in [-1, 1]: neighbor = y + axisy * ne, x + axisx * ne, z + axisz * ne if neighbor not in voxel_node: continue ne_idx = voxel_node[neighbor] if (node_idx, ne_idx) not in tabu_list and (ne_idx, node_idx) not in \ tabu_list: tabu_list[(node_idx, ne_idx)] = 1 # adding the edge to the tabu list weighting.weights_for( (y, x, z), neighbor, args) # computing the weight for the current pair. weighting.weights_for( neighbor, (y, x, z), args) # computing the weight for the current pair. edges.append([node_idx, ne_idx]) # Adding the edge in the opposite direction. edges.append([ne_idx, node_idx]) # Generating random connections to current node. for j in range(k_random): valid_neigh = False if node_idx not in nodes_complete: nodes_complete[node_idx] = 0 elif nodes_complete[node_idx] == k_random: break while not valid_neigh: lu_idx = np.random.randint( low=0, high=num_nodes) # we look for a random node. yl, xl, zl = valid_nodes[ lu_idx] # getting the euclidean coordinates for the voxel. lu_idx = voxel_node[yl, xl, zl] # getting the node index. if lu_idx not in nodes_complete: nodes_complete[lu_idx] = 0 valid_neigh = True elif nodes_complete[lu_idx] < k_random: valid_neigh = True if not (node_idx, lu_idx) in tabu_list and not (lu_idx, node_idx) in tabu_list \ and node_idx != lu_idx: # checking if the edge was already generated weighting.weights_for( (y, x, z), (yl, xl, zl), args) # computing the weight for the current pair. weighting.weights_for((yl, xl, zl), (y, x, z), args) tabu_list[(node_idx, lu_idx)] = 1 edges.append([node_idx, lu_idx]) # Adding the weight in the opposite direction edges.append([lu_idx, node_idx]) # Increasing the amount of neighbors connected to each node nodes_complete[node_idx] += 1 nodes_complete[lu_idx] += 1 edges = np.asarray(edges, dtype=int) pp_args = {"edges": edges, "num_nodes": num_nodes} weighting.post_process( pp_args) # Applying weight post-processing, e.g. normalization weights = weighting.get_weights() edges, weights, _ = gut.sparse_to_tuple(weights) return edges, weights, np.asarray(labels, dtype=np.float32), num_nodes
# Given a training set of protein-protein interactions in yeast S. cerevisiae, our goal is to take these interactions # and train a GCN model that can predict new protein-protein interactions. That is, we would like to predict new # edges in the yeast protein interaction network. print("Start") # Check if regenerate_training_date is set to True: regenerate training/validation/test data adj, adj_train, val_edges, val_edges_false, test_edges, test_edges_false = load_data() num_nodes = adj.shape[0] num_edges = adj.sum() # # Simple GCN: no node features (featureless). Substitute the identity matrix for the feature matrix: X = I # features = sparse_to_tuple(sp.identity(num_nodes)) num_features = features[2][1] features_nonzero = features[1].shape[0] # # Store original adjacency matrix (without diagonal entries) for later # adj_orig = (adj - sp.dia_matrix((adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)) adj_orig.eliminate_zeros() adj_norm = sparse_to_tuple(sym_normalize_matrix(adj_train + sp.eye(adj.shape[0]))) # Since the adj_train matrix was not created with diagonal entires, add them now. adj_label = sparse_to_tuple(adj_train + sp.eye(adj_train.shape[0])) # Define placeholders
def get_data(self, feed_dict, is_training): ids = feed_dict[self.placeholders['fields'][-1]] if self.sparse_input: feed_dict[self.inputs_ph] = sparse_to_tuple(self.features[ids]) else: feed_dict[self.inputs_ph] = self.features[ids]
def train(model_config, sess, seed, repeat_state, data_split=None): # Print model_config very_begining = time.time() print('', 'name : {}'.format(model_config['name']), 'logdir : {}'.format(model_config['logdir']), 'dataset : {}'.format(model_config['dataset']), 'train_size : {}'.format(model_config['train_size']), 'learning_rate : {}'.format(model_config['learning_rate']), 'feature : {}'.format(model_config['feature']), 'logging : {}'.format(model_config['logging']), sep='\n') if data_split: adj = data_split['adj'] features = data_split['features'] y_train = data_split['y_train'] y_val = data_split['y_val'] y_test = data_split['y_test'] train_mask = data_split['train_mask'] val_mask = data_split['val_mask'] test_mask = data_split['test_mask'] triplet = data_split['triplet'] else: # Load data adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, size_of_each_class, triplet = \ load_data(model_config['dataset'],train_size=model_config['train_size'], validation_size=model_config['validation_size'], model_config=model_config, shuffle=model_config['shuffle'], repeat_state=repeat_state) stored_A = model_config['dataset'] if model_config['drop_inter_class_edge']: adj = drop_inter_class_edge(adj) stored_A = model_config['dataset'] + '_drop' # preprocess_features begin = time.time() features = smooth(features, adj, model_config['smoothing'], model_config, stored_A=stored_A + '_A_I') print(time.time() - begin, 's') data_split = { 'adj': adj, 'features': features, 'y_train': y_train, 'y_val': y_val, 'y_test': y_test, 'train_mask': train_mask, 'val_mask': val_mask, 'test_mask': test_mask, 'triplet': triplet } laplacian = sparse.diags(adj.sum(1).flat, 0) - adj laplacian = laplacian.astype(np.float32).tocoo() if type(model_config['t']) == int and model_config['t'] < 0: eta = adj.shape[0] / (adj.sum() / adj.shape[0])**len( model_config['connection']) model_config['t'] = (y_train.sum(axis=0) * 3 * eta / y_train.sum()).astype(np.int64) print('t=', model_config['t']) # origin_adj = adj if model_config['Model'] == 0: pass elif model_config['Model'] in [1, 2, 3, 4]: # absorption probability print( 'Calculating Absorption Probability...', # 's :{}'.format(model_config['s']), 'alpha :{}'.format(model_config['alpha']), 'type :{}'.format(model_config['absorption_type']), sep='\n') if model_config['Model'] == 1: adj = Model1(adj, model_config['t'], model_config['alpha'], model_config['absorption_type']) elif model_config['Model'] == 2: adj = Model2(adj, model_config['s'], model_config['alpha'], y_train) elif model_config['Model'] == 3: # original_y_train = y_train y_train, train_mask = Model3(adj, model_config['s'], model_config['alpha'], y_train, train_mask) elif model_config['Model'] == 4: y_train, train_mask = Model4(adj, model_config['s'], model_config['alpha'], y_train, train_mask) elif model_config['Model'] == 5: adj = Model5(features, adj, model_config['mu']) elif model_config['Model'] == 6: adj = Model6(adj) elif model_config['Model'] == 7: y_train, train_mask = Model7(adj, model_config['s'], model_config['alpha'], y_train, train_mask, features) elif model_config['Model'] == 8: # original_y_train = y_train y_train, train_mask = Model8(adj, model_config['s'], model_config['alpha'], y_train, train_mask) elif model_config['Model'] == 9: y_train, train_mask = Model9(adj, model_config['t'], model_config['alpha'], y_train, train_mask, stored_A=stored_A + '_A_I') elif model_config['Model'] == 10: y_train, train_mask = Model10(adj, model_config['s'], model_config['t'], model_config['alpha'], y_train, train_mask, features, stored_A=stored_A + '_A_H') elif model_config['Model'] == 11: y = np.sum(train_mask) label_per_sample, sample2label = Model11(y, y_train, train_mask) elif model_config['Model'] == 12: pass elif model_config['Model'] == 13: y_train, train_mask = Model9(adj, model_config['t'], model_config['alpha'], y_train, train_mask, stored_A=stored_A + '_A_I') y = np.sum(train_mask) label_per_sample, sample2label = Model11(y, y_train, train_mask) elif model_config['Model'] == 14: y = np.sum(train_mask) label_per_sample, sample2label = Model11(y, y_train, train_mask) elif model_config['Model'] == 15: y_train, train_mask = Model9(adj, model_config['t'], model_config['alpha'], y_train, train_mask, stored_A=stored_A + '_A_I') y = np.sum(train_mask) label_per_sample, sample2label = Model11(y, y_train, train_mask) elif model_config['Model'] == 16: with tf.Graph().as_default(): with tf.Session(config=tf.ConfigProto( intra_op_parallelism_threads=model_config['threads']) ) as sub_sess: tf.set_random_seed(seed) test_acc, test_acc_of_class, prediction = train( model_config['Model_to_add_label'], sub_sess, seed, data_split=data_split) y_train, train_mask = Model16(prediction, model_config['t'], y_train, train_mask) model_config = model_config['Model_to_predict'] print('', 'name : {}'.format(model_config['name']), 'logdir : {}'.format(model_config['logdir']), 'dataset : {}'.format(model_config['dataset']), 'train_size : {}'.format(model_config['train_size']), 'learning_rate : {}'.format(model_config['learning_rate']), 'feature : {}'.format(model_config['feature']), 'logging : {}'.format(model_config['logging']), sep='\n') elif model_config['Model'] == 17: if model_config['smoothing'] is not None: stored_A = None adj = construct_knn_graph(features, model_config['k']) else: stored_A = stored_A + '_A_I' if model_config['drop_inter_class_edge']: stored_A = None test_acc, test_acc_of_class, prediction = Model17( adj, model_config['alpha'], y_train, train_mask, y_test, stored_A=stored_A) print("Test set results: accuracy= {:.5f}".format(test_acc)) print("accuracy of each class=", test_acc_of_class) print("Total time={}s".format(time.time() - very_begining)) return test_acc, test_acc_of_class, prediction, size_of_each_class, time.time( ) - very_begining elif model_config['Model'] == 18: y_train, train_mask = Model9(adj, model_config['t'], model_config['alpha'], y_train, train_mask, stored_A=stored_A + '_A_I') alpha = 1e-6 test_acc, test_acc_of_class, prediction = Model17(adj, alpha, y_train, train_mask, y_test, stored_A=stored_A + '_A_I') print("Test set results: accuracy= {:.5f}".format(test_acc)) print("accuracy of each class=", test_acc_of_class) return test_acc, test_acc_of_class, prediction elif model_config['Model'] == 19: with tf.Graph().as_default(): with tf.Session(config=tf.ConfigProto( intra_op_parallelism_threads=model_config['threads']) ) as sub_sess: tf.set_random_seed(seed) test_acc, test_acc_of_class, prediction = train( model_config['Model_to_add_label'], sub_sess, seed, data_split=data_split) stored_A = stored_A + '_A_I' # print(time.time()-very_begining) y_train, train_mask = Model19(prediction, model_config['t'], y_train, train_mask, adj, model_config['alpha'], stored_A, model_config['Model19']) # print(time.time()-very_begining) model_config = model_config['Model_to_predict'] print('', 'name : {}'.format(model_config['name']), 'logdir : {}'.format(model_config['logdir']), 'dataset : {}'.format(model_config['dataset']), 'train_size : {}'.format(model_config['train_size']), 'learning_rate : {}'.format(model_config['learning_rate']), 'feature : {}'.format(model_config['feature']), 'logging : {}'.format(model_config['logging']), sep='\n') elif model_config['Model'] == 20: pass elif model_config['Model'] == 21: pass elif model_config['Model'] == 22: alpha = model_config['alpha'] stored_A = stored_A + '_A_I' features = Model22(adj, features, alpha, stored_A) elif model_config['Model'] == 23: if model_config['classifier'] == 'tree': clf = tree.DecisionTreeClassifier( max_depth=model_config['tree_depth']) t = time.time() clf.fit(features[train_mask], np.argmax(y_train[train_mask], axis=1)) t = time.time() - t prediction = clf.predict(features[test_mask]) elif model_config['classifier'] == 'svm': clf = svm.SVC( ) #kernel='rbf', gamma=model_config['gamma'], class_weight='balanced', degree=model_config['svm_degree']) t = time.time() clf.fit(features[train_mask], np.argmax(y_train[train_mask], axis=1)) t = time.time() - t prediction = clf.predict(features[test_mask]) elif model_config['classifier'] == 'cnn': prediction, t = cnn.train(model_config, features, train_mask, y_train, test_mask, y_test) else: raise ValueError( "model_config['classifier'] should be in ['svm', 'tree']") test_acc = np.sum(prediction == np.argmax(y_test[test_mask], axis=1)) / np.sum(test_mask) # test_acc = test_acc[0] one_hot_prediction = np.zeros(y_test[test_mask].shape) one_hot_prediction[np.arange(one_hot_prediction.shape[0]), prediction] = 1 test_acc_of_class = np.sum(one_hot_prediction * y_test[test_mask], axis=0) / np.sum(y_test[test_mask], axis=0) #TODO print("Test set results: cost= {:.5f} accuracy= {:.5f} time= {:.5f}". format(0., test_acc, 0.)) print("accuracy of each class=", test_acc_of_class) print("Total time={}s".format(time.time() - very_begining)) return test_acc, test_acc_of_class, prediction, size_of_each_class, t elif model_config['Model'] == 26: adj = Model26(adj, model_config['t'], model_config['alpha'], y_train, train_mask, stored_A=stored_A + '_A_I') elif model_config['Model'] == 28: features = Model28(adj, features, stored_A, model_config['k']) else: raise ValueError( '''model_config['Model'] must be in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,''' ''' 11, 12, 13, 14, 15, 16, 17, 18], but is {} now'''.format( model_config['Model'])) # Some preprocessing if model_config['connection'] == [ 'f' for i in range(len(model_config['connection'])) ]: train_features = features[train_mask] val_features = features[val_mask] test_features = features[test_mask] else: train_features = features val_features = features test_features = features if sparse.issparse(features): train_features = sparse_to_tuple(train_features) val_features = sparse_to_tuple(val_features) test_features = sparse_to_tuple(test_features) features = sparse_to_tuple(features) if model_config['Model'] == 12: if model_config['k'] < 0: if hasattr(model_config['train_size'], '__getitem__'): eta = 0 for i in model_config['train_size']: eta += i eta /= adj.shape[0] else: eta = model_config['train_size'] / 100 k = (1 / eta)**(1 / len(model_config['connection'])) k = int(k) else: k = model_config['k'] model_config['name'] += '_k{}'.format(k) support = Model12(adj, k) num_supports = len(support) elif model_config['conv'] == 'taubin': support = [ sparse_to_tuple( taubin_smoothor(adj, model_config['taubin_lambda'], model_config['taubin_mu'], model_config['taubin_repeat'])) ] num_supports = 1 elif model_config['conv'] == 'test21': support = [ sparse_to_tuple( Test21(adj, model_config['alpha'], beta=model_config['beta'], stored_A=stored_A + '_A_I')) ] num_supports = 1 elif model_config['conv'] == 'gcn': support = [preprocess_adj(adj)] num_supports = 1 elif model_config['conv'] == 'gcn_unnorm': support = [sparse_to_tuple(adj.astype(np.float32))] num_supports = 1 elif model_config['conv'] == 'gcn_noloop': support = [preprocess_adj(adj, loop=False)] num_supports = 1 elif model_config['conv'] == 'gcn_rw': support = [preprocess_adj(adj, type='rw')] num_supports = 1 elif model_config['conv'] in ['cheby', 'chebytheta']: # origin_adj_support = chebyshev_polynomials(origin_adj, model_config['max_degree']) support = chebyshev_polynomials(adj, model_config['max_degree']) num_supports = 1 + model_config['max_degree'] else: raise ValueError('Invalid argument for model_config["conv"]: ' + str(model_config['conv'])) # Define placeholders placeholders = { 'support': [ tf.sparse_placeholder(tf.float32, name='support' + str(i)) for i in range(num_supports) ], 'features': tf.sparse_placeholder(tf.float32, name='features') if isinstance( features, tf.SparseTensorValue) else tf.placeholder( tf.float32, shape=[None, features.shape[1]], name='features'), 'labels': tf.placeholder(tf.int32, name='labels', shape=(None, y_train.shape[1])), 'labels_mask': tf.placeholder(tf.int32, name='labels_mask'), 'dropout': tf.placeholder_with_default(0., name='dropout', shape=()), 'num_features_nonzero': tf.placeholder(tf.int32, name='num_features_nonzero'), # helper variable for sparse dropout 'laplacian': tf.SparseTensor(indices=np.vstack([laplacian.row, laplacian.col]).transpose(), values=laplacian.data, dense_shape=laplacian.shape), 'triplet': tf.placeholder(tf.int32, name='triplet', shape=(None, None)), 'noise_sigma': tf.placeholder(tf.float32, name='noise_sigma'), 'noise': tf.sparse_placeholder(tf.float32, name='features') if isinstance( features, tf.SparseTensorValue) else tf.placeholder( tf.float32, shape=[None, features.shape[1]], name='features') } if model_config['Model'] in [11, 13, 14, 15]: placeholders['label_per_sample'] = tf.placeholder( tf.float32, name='label_per_sample', shape=(None, label_per_sample.shape[1])) placeholders['sample2label'] = tf.placeholder( tf.float32, name='sample2label', shape=(label_per_sample.shape[1], y_train.shape[1])) # Create model model = GCN_MLP(model_config, placeholders, input_dim=train_features[2][1]) # Random initialize sess.run(tf.global_variables_initializer()) # Initialize FileWriter, saver & variables in graph train_writer = None valid_writer = None saver = None # Construct feed dictionary if model_config['connection'] == [ 'f' for i in range(len(model_config['connection'])) ]: train_feed_dict = construct_feed_dict( train_features, support, y_train[train_mask], np.ones(train_mask.sum(), dtype=np.bool), triplet, model_config['noise_sigma'], placeholders) train_feed_dict.update( {placeholders['dropout']: model_config['dropout']}) valid_feed_dict = construct_feed_dict( val_features, support, y_val[val_mask], np.ones(val_mask.sum(), dtype=np.bool), triplet, 0, placeholders) test_feed_dict = construct_feed_dict( test_features, support, y_test[test_mask], np.ones(test_mask.sum(), dtype=np.bool), triplet, 0, placeholders) else: train_feed_dict = construct_feed_dict(train_features, support, y_train, train_mask, triplet, model_config['noise_sigma'], placeholders) train_feed_dict.update( {placeholders['dropout']: model_config['dropout']}) valid_feed_dict = construct_feed_dict(val_features, support, y_val, val_mask, triplet, 0, placeholders) test_feed_dict = construct_feed_dict(test_features, support, y_test, test_mask, triplet, 0, placeholders) if model_config['Model'] in [11, 13, 14, 15]: train_feed_dict.update( {placeholders['label_per_sample']: label_per_sample}) train_feed_dict.update({placeholders['sample2label']: sample2label}) valid_feed_dict.update( {placeholders['label_per_sample']: label_per_sample}) valid_feed_dict.update({placeholders['sample2label']: sample2label}) test_feed_dict.update( {placeholders['label_per_sample']: label_per_sample}) test_feed_dict.update({placeholders['sample2label']: sample2label}) # tmp = sess.run([model.prediction, model.sample2label], feed_dict=test_feed_dict) # Some support variables valid_loss_list = [] max_valid_acc = 0 max_train_acc = 0 t_test = time.time() test_cost, test_acc, test_acc_of_class, prediction = sess.run( [ model.loss, model.accuracy, model.accuracy_of_class, model.prediction ], feed_dict=test_feed_dict) test_duration = time.time() - t_test timer = 0 begin = time.time() # print(time.time() - very_begining) if model_config['train']: # Train model print('training...') for step in range(model_config['epochs']): if model_config['Model'] in [ 20, 21 ] and step == model_config['epochs'] / 2: stored_A = stored_A + '_A_I' y_train, train_mask = Model20(prediction, model_config['t'], y_train, train_mask, adj, model_config['alpha'], stored_A) if model_config['Model'] == 21: y_train, train_mask = Model16(prediction, model_config['t2'], y_train, train_mask) train_feed_dict = construct_feed_dict( features, support, y_train, train_mask, model_config['noise_sigma'], placeholders) train_feed_dict.update( {placeholders['dropout']: model_config['dropout']}) max_valid_acc = 0 max_train_acc = 0 # Training step if model_config['logdir'] and step % 100 == 0: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() t = time.time() sess.run(model.opt_op, feed_dict=train_feed_dict, options=run_options, run_metadata=run_metadata) t = time.time() - t train_writer.add_run_metadata(run_metadata, 'step%d' % step) # Create the Timeline object, and write it to a json with open(path.join(model_config['logdir'], 'timeline.json'), 'w') as f: f.write( timeline.Timeline(run_metadata.step_stats). generate_chrome_trace_format()) else: t = time.time() if isinstance(train_features, tf.SparseTensorValue): train_feed_dict.update({ placeholders['features']: tf.SparseTensorValue( train_features.indices, train_features.values + np.random.normal(0, model_config['noise_sigma'], train_features.indices.shape[0]), train_features.dense_shape) }) else: train_feed_dict.update({ placeholders['features']: train_features + np.random.normal(0, model_config['noise_sigma'], train_features.shape) }) sess.run(model.opt_op, feed_dict=train_feed_dict) t = time.time() - t timer += t train_loss, train_acc, train_summary = sess.run( [model.loss, model.accuracy, model.summary], feed_dict=train_feed_dict) # Logging if model_config['logdir']: global_step = model.global_step.eval(session=sess) train_writer.add_summary(train_summary, global_step) valid_writer.add_summary(valid_summary, global_step) # If it's best performence so far, evalue on test set if model_config['validate']: valid_loss, valid_acc, valid_summary = sess.run( [model.loss, model.accuracy, model.summary], feed_dict=valid_feed_dict) valid_loss_list.append(valid_loss) if valid_acc >= max_valid_acc: max_valid_acc = valid_acc t_test = time.time() test_cost, test_acc, test_acc_of_class = sess.run( [model.loss, model.accuracy, model.accuracy_of_class], feed_dict=test_feed_dict) test_duration = time.time() - t_test prediction = sess.run(model.prediction, train_feed_dict) if args.verbose: print('*', end='') else: if train_acc >= max_train_acc: max_train_acc = train_acc t_test = time.time() test_cost, test_acc, test_acc_of_class = sess.run( [model.loss, model.accuracy, model.accuracy_of_class], feed_dict=test_feed_dict) test_duration = time.time() - t_test prediction = sess.run(model.prediction, train_feed_dict) if args.verbose: print('*', end='') # Print results if args.verbose: print("Epoch: {:04d}".format(step), "train_loss= {:.3f}".format(train_loss), "train_acc= {:.3f}".format(train_acc), end=' ') if model_config['validate']: print("val_loss=", "{:.3f}".format(valid_loss), "val_acc= {:.3f}".format(valid_acc), end=' ') print("time=", "{:.5f}".format(t)) if 0 < model_config['early_stopping'] < step \ and valid_loss_list[-1] > np.mean(valid_loss_list[-(model_config['early_stopping'] + 1):-1]): print("Early stopping...") break else: print("Optimization Finished!") # Testing print("Test set results:", "cost=", "{:.5f}".format(test_cost), "accuracy=", "{:.5f}".format(test_acc), "time=", "{:.5f}".format(test_duration)) print("accuracy of each class=", test_acc_of_class) # Saving if model_config['logdir']: print('Save model to "{:s}"'.format( saver.save(sess=sess, save_path=path.join(model_config['logdir'], 'model.ckpt'), global_step=global_step))) print("Total time={}s".format(time.time() - very_begining)) return test_acc, test_acc_of_class, prediction, size_of_each_class, time.time( ) - begin