def main(): args = parse_args() print ("Configured paths") # if os.path.exists(args.embedding_path): # os._exit(0) graph = nx.read_weighted_edgelist(args.edgelist, delimiter=" ", nodetype=None,create_using=nx.Graph()) graph_int = nx.read_weighted_edgelist(args.edgelist, delimiter=" ", nodetype=int,create_using=nx.Graph()) model = Struc2Vec(graph.to_directed(), walk_length=10, num_walks=8,workers=8, verbose=40 ) walks=model.return_walk_list() walks_int=[] for one_walk in walks: walks_int.append( [int(i) for i in one_walk]) walks=walks_int graph=graph_int #print(walks) assert not (args.visualise and args.embedding_dim > 2), "Can only visualise two dimensions" assert args.embedding_path is not None, "you must specify a path to save embedding" configure_paths(args) # build model num_nodes = len(graph) model = build_model(num_nodes, args) model, initial_epoch = load_weights(model, args) optimizer = ExponentialMappingOptimizer(lr=args.lr) loss = hyperbolic_softmax_loss(sigma=args.sigma) model.compile(optimizer=optimizer, loss=loss, target_tensors=[tf.placeholder(dtype=tf.int32)]) model.summary() callbacks = [ TerminateOnNaN(), EarlyStopping(monitor="loss", patience=args.patience, verbose=True), Checkpointer(epoch=initial_epoch, nodes=sorted(graph.nodes()), embedding_directory=args.embedding_path) ] positive_samples, negative_samples, probs = \ determine_positive_and_negative_samples(graph,walks,args) if args.use_generator: print ("Training with data generator with {} worker threads".format(args.workers)) training_generator = TrainingDataGenerator(positive_samples, probs, model, args) model.fit_generator(training_generator, workers=args.workers, max_queue_size=10, use_multiprocessing=args.workers>0, epochs=args.num_epochs, steps_per_epoch=len(training_generator), initial_epoch=initial_epoch, verbose=args.verbose, callbacks=callbacks ) else: print ("Training without data generator") train_x = np.append(positive_samples, negative_samples, axis=-1) train_y = np.zeros([len(train_x), 1, 1], dtype=np.int32 ) model.fit(train_x, train_y, shuffle=True, batch_size=args.batch_size, epochs=args.num_epochs, initial_epoch=initial_epoch, verbose=args.verbose, callbacks=callbacks ) print ("Training complete") embedding = model.get_weights()[0] embedding = hyperboloid_to_poincare_ball(embedding) print(embedding) ax=plot(embedding) theta = np.linspace(0, 2 * np.pi, 200) x = np.cos(theta) y = np.sin(theta) ax.plot(x, y, color="black", linewidth=2) ax.axis("equal") ax.figure.savefig("Hyper.pdf",bbox_inches='tight')
def main(): args = parse_args() assert not (args.visualise and args.embedding_dim > 2), "Can only visualise two dimensions" assert args.embedding_path is not None, "you must specify a path to save embedding" if not args.no_walks: assert args.walk_path is not None, "you must specify a path to save walks" random.seed(args.seed) np.random.seed(args.seed) tf.set_random_seed(args.seed) graph, features, node_labels = load_data(args) print ("Loaded dataset") if False: plot_degree_dist(graph, "degree distribution") configure_paths(args) print ("Configured paths") # build model num_nodes = len(graph) model = build_model(num_nodes, args) model, initial_epoch = load_weights(model, args) optimizer = ExponentialMappingOptimizer(lr=args.lr) loss = hyperbolic_softmax_loss(sigma=args.sigma) model.compile(optimizer=optimizer, loss=loss, target_tensors=[tf.placeholder(dtype=tf.int32)]) model.summary() callbacks = [ TerminateOnNaN(), EarlyStopping(monitor="loss", patience=args.patience, verbose=True), Checkpointer(epoch=initial_epoch, nodes=sorted(graph.nodes()), embedding_directory=args.embedding_path) ] positive_samples, negative_samples, probs = \ determine_positive_and_negative_samples(graph, features, args) del features # remove features reference to free up memory if args.use_generator: print ("Training with data generator with {} worker threads".format(args.workers)) training_generator = TrainingDataGenerator(positive_samples, probs, model, args) model.fit_generator(training_generator, workers=args.workers, max_queue_size=10, use_multiprocessing=args.workers>0, epochs=args.num_epochs, steps_per_epoch=len(training_generator), initial_epoch=initial_epoch, verbose=args.verbose, callbacks=callbacks ) else: print ("Training without data generator") train_x = np.append(positive_samples, negative_samples, axis=-1) train_y = np.zeros([len(train_x), 1, 1], dtype=np.int32 ) model.fit(train_x, train_y, shuffle=True, batch_size=args.batch_size, epochs=args.num_epochs, initial_epoch=initial_epoch, verbose=args.verbose, callbacks=callbacks ) print ("Training complete") if args.visualise: embedding = model.get_weights()[0] if embedding.shape[1] == 3: print ("projecting to poincare ball") embedding = hyperboloid_to_poincare_ball(embedding) draw_graph(graph, embedding, node_labels, path="2d-poincare-disk-visualisation.png")
def main(): args = parse_args() test_results_dir = args.test_results_dir if not os.path.exists(test_results_dir): os.makedirs(test_results_dir, exist_ok=True) test_results_filename = os.path.join(test_results_dir, "test_results.csv") if check_complete(test_results_filename, args.seed): return test_results_lock_filename = os.path.join(test_results_dir, "test_results.lock") touch(test_results_lock_filename) _, _, node_labels = load_data(args) print ("Loaded dataset") embedding = load_embedding(args.dist_fn, args.embedding_directory) min_count = 10 if node_labels.shape[1] == 1: # remove any node belonging to an under-represented class label_counts = Counter(node_labels.flatten()) mask = np.array([label_counts[l] >= min_count for l in node_labels.flatten()]) embedding = embedding[mask] node_labels = node_labels[mask] else: assert node_labels.shape[1] > 1 idx = node_labels.sum(0) >= min_count node_labels = node_labels[:, idx] idx = node_labels.any(-1) embedding = embedding[idx] node_labels = node_labels[idx] if args.dist_fn == "hyperboloid": print ("loaded a hyperboloid embedding") # print ("projecting from hyperboloid to klein") # embedding = hyperboloid_to_klein(embedding) print ("projecting from hyperboloid to poincare") embedding = hyperboloid_to_poincare_ball(embedding) print ("projecting from poincare to klein") embedding = poincare_ball_to_klein(embedding) elif args.dist_fn == "poincare": print ("loaded a poincare embedding") # print ("projecting from poincare to klein") # embedding = poincare_ball_to_hyperboloid(embedding) # embedding = hyperboloid_to_klein(embedding) print ("projecting from poincare to klein") embedding = poincare_ball_to_klein(embedding) test_results = {} label_percentages, f1_micros, f1_macros = \ evaluate_node_classification(embedding, node_labels) for label_percentage, f1_micro, f1_macro in zip(label_percentages, f1_micros, f1_macros): print ("{:.2f}".format(label_percentage), "micro = {:.2f}".format(f1_micro), "macro = {:.2f}".format(f1_macro) ) test_results.update({"{:.2f}_micro".format(label_percentage): f1_micro}) test_results.update({"{:.2f}_macro".format(label_percentage): f1_macro}) k = 10 k_fold_roc, k_fold_f1, k_fold_precision, k_fold_recall = \ evaluate_kfold_label_classification(embedding, node_labels, k=k) test_results.update({ "{}-fold-roc".format(k): k_fold_roc, "{}-fold-f1".format(k): k_fold_f1, "{}-fold-precision".format(k): k_fold_precision, "{}-fold-recall".format(k): k_fold_recall, }) print ("saving test results to {}".format(test_results_filename)) threadsafe_save_test_results(test_results_lock_filename, test_results_filename, args.seed, data=test_results )