def main():

    args = parse_args()

    test_results_dir = args.test_results_dir
    if not os.path.exists(test_results_dir):
        os.makedirs(test_results_dir, exist_ok=True)

    test_results_filename = os.path.join(test_results_dir,
                                         "{}.pkl".format(args.seed))

    # if check_complete(test_results_filename, args.seed):
    # 	return

    # test_results_lock_filename = os.path.join(test_results_dir,
    # 	"test_results.lock")
    # touch(test_results_lock_filename)

    args.directed = True

    graph, _, _ = load_data(args)
    # assert nx.is_directed(graph)
    print("Loaded dataset")
    print()

    if isinstance(graph, nx.DiGraph):
        graph = nx.adjacency_matrix(graph, nodelist=sorted(graph),
                                    weight=None).astype(bool)

    random.seed(args.seed)

    test_edges = list(zip(*graph.nonzero()))
    num_edges = len(test_edges)

    nodes = set(range(graph.shape[0]))
    del graph
    test_non_edges = sample_non_edges(nodes, set(test_edges), num_edges)

    test_edges = np.array(test_edges)
    test_non_edges = np.array(test_non_edges)

    print("number of test edges:", len(test_edges))
    print("number of test non edges:", len(test_non_edges))

    embedding = load_embedding(args.dist_fn, args.embedding_directory)

    test_results = dict()

    (mean_rank_recon, ap_recon,
     roc_recon) = evaluate_rank_AUROC_AP(embedding, test_edges, test_non_edges,
                                         args.dist_fn)

    test_results.update({
        "mean_rank_recon": mean_rank_recon,
        "ap_recon": ap_recon,
        "roc_recon": roc_recon
    })

    map_recon, precisions_at_k = \
     evaluate_mean_average_precision(
     embedding,
     test_edges,
     args.dist_fn)
    test_results.update({"map_recon": map_recon})

    for k, pk in precisions_at_k.items():
        print("precision at", k, pk)
    test_results.update(
        {"p@{}".format(k): pk
         for k, pk in precisions_at_k.items()})

    print("saving test results to {}".format(test_results_filename))

    test_results = pd.Series(test_results)
    with open(test_results_filename, "wb") as f:
        pkl.dump(test_results, f, pkl.HIGHEST_PROTOCOL)

    print("done")
Exemplo n.º 2
0
def main():

    args = parse_args()

    test_results_dir = args.test_results_dir
    if not os.path.exists(test_results_dir):
        os.makedirs(test_results_dir, exist_ok=True)
    test_results_filename = os.path.join(test_results_dir, "test_results.csv")

    if check_complete(test_results_filename, args.seed):
        return

    test_results_lock_filename = os.path.join(test_results_dir,
                                              "test_results.lock")
    touch(test_results_lock_filename)

    graph, _, _ = load_data(args)
    assert not nx.is_directed(graph)
    print("Loaded dataset")
    print()

    seed = args.seed
    random.seed(seed)

    removed_edges_dir = args.removed_edges_dir

    test_edgelist_fn = os.path.join(removed_edges_dir, "test_edges.tsv")
    test_non_edgelist_fn = os.path.join(removed_edges_dir,
                                        "test_non_edges.tsv")

    print("loading test edges from {}".format(test_edgelist_fn))
    print("loading test non-edges from {}".format(test_non_edgelist_fn))

    test_edges = read_edgelist(test_edgelist_fn)
    test_non_edges = read_edgelist(test_non_edgelist_fn)

    test_edges = np.array(test_edges)
    test_non_edges = np.array(test_non_edges)

    print("number of test edges:", len(test_edges))
    print("number of test non edges:", len(test_non_edges))

    embedding = load_embedding(args.dist_fn, args.embedding_directory)

    test_results = dict()

    (mean_rank_lp, ap_lp,
     roc_lp) = evaluate_rank_AUROC_AP(embedding, test_edges, test_non_edges,
                                      args.dist_fn)

    test_results.update({
        "mean_rank_lp": mean_rank_lp,
        "ap_lp": ap_lp,
        "roc_lp": roc_lp
    })

    map_lp, precisions_at_k = evaluate_mean_average_precision(
        embedding, test_edges, args.dist_fn, graph_edges=graph.edges())

    test_results.update({"map_lp": map_lp})

    for k, pk in precisions_at_k.items():
        print("precision at", k, pk)
    test_results.update(
        {"p@{}".format(k): pk
         for k, pk in precisions_at_k.items()})

    print("saving test results to {}".format(test_results_filename))

    threadsafe_save_test_results(test_results_lock_filename,
                                 test_results_filename,
                                 seed,
                                 data=test_results)

    print("done")
Exemplo n.º 3
0
def main():

    args = parse_args()

    test_results_dir = args.test_results_dir
    if not os.path.exists(test_results_dir):
        os.makedirs(test_results_dir, exist_ok=True)
    test_results_filename = os.path.join(test_results_dir,
                                         "{}.pkl".format(args.seed))

    args.directed = True

    graph, _, _ = load_data(args)
    # assert nx.is_directed(graph)
    print("Loaded dataset")
    print()

    if isinstance(graph, nx.DiGraph):
        graph = nx.adjacency_matrix(graph, nodelist=sorted(graph),
                                    weight=None).astype(bool)

    N = graph.shape[0]
    print("network has", N, "nodes")

    graph_edges = list(zip(*graph.nonzero()))
    del graph

    seed = args.seed
    random.seed(seed)

    removed_edges_dir = args.removed_edges_dir

    test_edgelist_fn = os.path.join(removed_edges_dir, "test_edges.tsv")
    test_non_edgelist_fn = os.path.join(removed_edges_dir,
                                        "test_non_edges.tsv")

    print("loading test edges from {}".format(test_edgelist_fn))
    print("loading test non-edges from {}".format(test_non_edgelist_fn))

    test_edges = read_edgelist(test_edgelist_fn)
    test_non_edges = read_edgelist(test_non_edgelist_fn)
    # test_non_edges = sample_non_edges(range(N),
    # 	set(graph_edges).union(test_edges),
    # 	sample_size=10*len(test_edges))

    test_edges = np.array(test_edges)
    test_non_edges = np.array(test_non_edges)

    print("number of test edges:", len(test_edges))
    print("number of test non edges:", len(test_non_edges))

    embedding = load_embedding(args.dist_fn, args.embedding_directory)

    test_results = dict()

    (mean_rank_lp, ap_lp,
     roc_lp) = evaluate_rank_AUROC_AP(embedding, test_edges, test_non_edges,
                                      args.dist_fn)

    test_results.update({
        "mean_rank_lp": mean_rank_lp,
        "ap_lp": ap_lp,
        "roc_lp": roc_lp
    })

    map_lp, precisions_at_k = evaluate_mean_average_precision(
        embedding, test_edges, args.dist_fn, graph_edges=graph_edges)

    test_results.update({"map_lp": map_lp})

    for k, pk in precisions_at_k.items():
        print("precision at", k, pk)
    test_results.update(
        {"p@{}".format(k): pk
         for k, pk in precisions_at_k.items()})

    print("saving test results to {}".format(test_results_filename))

    test_results = pd.Series(test_results)

    with open(test_results_filename, "wb") as f:
        pkl.dump(test_results, f, pkl.HIGHEST_PROTOCOL)

    print("done")
Exemplo n.º 4
0
def main():

	args = parse_args()

	test_results_dir = args.test_results_dir
	if not os.path.exists(test_results_dir):
		os.makedirs(test_results_dir, exist_ok=True)
	test_results_filename = os.path.join(test_results_dir, 
		"test_results.csv")

	if check_complete(test_results_filename, args.seed):
		return

	test_results_lock_filename = os.path.join(test_results_dir, 
		"test_results.lock")
	touch(test_results_lock_filename)

	_, _, node_labels = load_data(args)
	print ("Loaded dataset")

	embedding = load_embedding(args.dist_fn, args.embedding_directory)

	min_count = 10
	if node_labels.shape[1] == 1: # remove any node belonging to an under-represented class
		label_counts = Counter(node_labels.flatten())
		mask = np.array([label_counts[l] >= min_count
			for l in node_labels.flatten()])
		embedding = embedding[mask]
		node_labels = node_labels[mask]
	else:
		assert node_labels.shape[1] > 1
		idx = node_labels.sum(0) >= min_count
		node_labels = node_labels[:, idx]
		idx = node_labels.any(-1)
		embedding = embedding[idx]
		node_labels = node_labels[idx]

	if args.dist_fn == "hyperboloid":
		print ("loaded a hyperboloid embedding")
		# print ("projecting from hyperboloid to klein")
		# embedding = hyperboloid_to_klein(embedding)
		print ("projecting from hyperboloid to poincare")
		embedding = hyperboloid_to_poincare_ball(embedding)
		print ("projecting from poincare to klein")
		embedding = poincare_ball_to_klein(embedding)

	elif args.dist_fn == "poincare":
		print ("loaded a poincare embedding")
		# print ("projecting from poincare to klein")
		# embedding = poincare_ball_to_hyperboloid(embedding)
		# embedding = hyperboloid_to_klein(embedding)
		print ("projecting from poincare to klein")
		embedding = poincare_ball_to_klein(embedding)

	test_results = {}
	
	label_percentages, f1_micros, f1_macros = \
		evaluate_node_classification(embedding, node_labels)

	for label_percentage, f1_micro, f1_macro in zip(label_percentages, f1_micros, f1_macros):
		print ("{:.2f}".format(label_percentage), 
			"micro = {:.2f}".format(f1_micro), 
			"macro = {:.2f}".format(f1_macro) )
		test_results.update({"{:.2f}_micro".format(label_percentage): f1_micro})
		test_results.update({"{:.2f}_macro".format(label_percentage): f1_macro})

	k = 10
	k_fold_roc, k_fold_f1, k_fold_precision, k_fold_recall = \
		evaluate_kfold_label_classification(embedding, node_labels, k=k)

	test_results.update({
		"{}-fold-roc".format(k): k_fold_roc, 
		"{}-fold-f1".format(k): k_fold_f1,
		"{}-fold-precision".format(k): k_fold_precision,
		"{}-fold-recall".format(k): k_fold_recall,
		})

	print ("saving test results to {}".format(test_results_filename))
	threadsafe_save_test_results(test_results_lock_filename, test_results_filename, args.seed, data=test_results )
Exemplo n.º 5
0
def main():

	args = parse_args()

	test_results_dir = args.test_results_dir
	if not os.path.exists(test_results_dir):
		os.makedirs(test_results_dir, exist_ok=True)
	test_results_filename = os.path.join(test_results_dir, 
		"test_results.csv")

	if check_complete(test_results_filename, args.seed):
		return

	test_results_lock_filename = os.path.join(test_results_dir, 
		"test_results.lock")
	touch(test_results_lock_filename)

	graph, _, _ = load_data(args)
	assert not args.directed 
	assert not nx.is_directed(graph)
	print ("Loaded dataset")
	print ()

	random.seed(args.seed)
	
	test_edges = list(graph.edges())

	test_edges += [(v, u) for u, v in test_edges]

	num_edges = len(test_edges)

	test_non_edges = sample_non_edges(graph, 
		set(test_edges),
		num_edges)

	test_edges = np.array(test_edges)
	test_non_edges = np.array(test_non_edges)


	print ("number of test edges:", len(test_edges))
	print ("number of test non edges:", len(test_non_edges))


	embedding = load_embedding(args.dist_fn, 
		args.embedding_directory)
	
	test_results = dict()

	(mean_rank_recon, ap_recon, 
		roc_recon) = evaluate_rank_AUROC_AP(
			embedding,
			test_edges, 
			test_non_edges,
			args.dist_fn)

	test_results.update({"mean_rank_recon": mean_rank_recon, 
		"ap_recon": ap_recon,
		"roc_recon": roc_recon})

	map_recon, precisions_at_k = evaluate_mean_average_precision(
		embedding, 
		test_edges,
		args.dist_fn)
	test_results.update({"map_recon": map_recon})

	# precisions_at_k = [(k, 
	# 	evaluate_precision_at_k(embedding,  
	# 		test_edges, 
	# 		args.dist_fn,
	# 		k=k))
	# 		for k in (1, 3, 5, 10)]
	for k, pk in precisions_at_k.items():
		print ("precision at", k, pk)
	test_results.update({"p@{}".format(k): pk
		for k, pk in precisions_at_k.items()})

	print ("saving test results to {}".format(
		test_results_filename))

	threadsafe_save_test_results(test_results_lock_filename, 
		test_results_filename, args.seed, data=test_results )

	print ("done")