def test_unsupervised_vs_auc(): def loader(): return pg.load_datasets_multiple_communities(["graph9"]) algorithms = pg.create_variations(pg.create_many_filters(), pg.create_many_variation_types()) time_scores = pg.benchmark_scores(pg.benchmark(algorithms, loader(), pg.Time)) assert sum(time_scores) > 0 measures = {"AUC": lambda ground_truth, exlude: pg.MultiSupervised(pg.AUC, ground_truth, exlude), "NDCG": lambda ground_truth, exlude: pg.MultiSupervised(pg.NDCG, ground_truth, exlude), "Density": lambda graph: pg.MultiUnsupervised(pg.Density, graph), "Conductance": lambda graph: pg.MultiUnsupervised(pg.Conductance(autofix=True).as_unsupervised_method(), graph), "Modularity": lambda graph: pg.MultiUnsupervised(pg.Modularity(max_positive_samples=5).as_unsupervised_method(), graph), "CCcos": lambda graph: pg.ClusteringCoefficient(graph, similarity="cos", max_positive_samples=5), "CCdot": lambda graph: pg.ClusteringCoefficient(graph, similarity="dot", max_positive_samples=5), "LinkAUCcos": lambda graph: pg.LinkAssessment(graph, similarity="cos", max_positive_samples=5), "LinkAUCdot": lambda graph: pg.LinkAssessment(graph, similarity="dot", max_positive_samples=5), "HopAUCcos": lambda graph: pg.LinkAssessment(graph, similarity="cos", hops=2, max_positive_samples=5), "HopAUCdot": lambda graph: pg.LinkAssessment(graph, similarity="dot", hops=2, max_positive_samples=5), } scores = {}#measure: pg.benchmark_scores(pg.benchmark(algorithms, loader(), measures[measure])) for measure in measures} for measure in measures: # do this as a for loop, because pytest becomes a little slow above list comprehension scores[measure] = pg.benchmark_scores(pg.benchmark(algorithms, loader(), measures[measure])) supervised = {"AUC", "NDCG"} evaluations = dict() for measure in measures: evaluations[measure] = abs(pg.SpearmanCorrelation(scores["AUC"])(scores[measure])) #for measure in measures: # print(measure, evaluations[measure]) assert max([evaluations[measure] for measure in measures if measure not in supervised]) == evaluations["LinkAUCdot"]
def test_correlation_compliance(): graph = next(pg.load_datasets_graph(["graph5"])) # TODO: Make spearman and pearson correlation support tensorflow alg1 = pg.PageRank(alpha=0.5) alg2 = pg.PageRank(alpha=0.99) pearson_ordinals = pg.PearsonCorrelation(pg.Ordinals(alg1)(graph))( pg.Ordinals(alg2)(graph)) spearman = pg.SpearmanCorrelation(alg1(graph))(alg2(graph)) assert pearson_ordinals == spearman
def test_venuerank(): graph = next(pg.load_datasets_graph(["bigraph"])) for _ in supported_backends(): venuerank = pg.PageRank(alpha=0.85, max_iters=10000, converge_to_eigenvectors=True, tol=1.E-12) venuerank_result = venuerank.rank(graph) small_restart = pg.PageRank(alpha=0.99, max_iters=10000, tol=1.E-12) small_restart_result = small_restart.rank(graph) #assert venuerank.convergence.iteration < small_restart.convergence.iteration / 2 corr = pg.SpearmanCorrelation(pg.Ordinals()(venuerank_result))( pg.Ordinals()(small_restart_result)) assert corr > 0.99
def test_rank_order_convergence(): graph = next(pg.load_datasets_graph(["graph9"])) algorithm1 = pg.Ordinals(pg.PageRank(0.85, tol=1.E-12, max_iters=1000)) algorithm2 = pg.Ordinals( pg.PageRank(0.85, convergence=pg.RankOrderConvergenceManager(0.85))) algorithm3 = pg.Ordinals( pg.PageRank(0.85, convergence=pg.RankOrderConvergenceManager( 0.85, 0.99, "fraction_of_walks"))) for _ in supported_backends(): ranks1 = algorithm1.rank(graph, {"A": 1}) ranks2 = algorithm2.rank(graph, {"A": 1}) ranks3 = algorithm3.rank(graph, {"A": 1}) assert pg.SpearmanCorrelation(ranks1)(ranks2) > 0.95 assert pg.SpearmanCorrelation(ranks1)(ranks3) > 0.95 assert pg.SpearmanCorrelation(ranks3)(ranks2) > 0.95 assert "17 iterations" in str(algorithm3.ranker.convergence) with pytest.raises(Exception): algorithm = pg.Ordinals( pg.PageRank(0.85, convergence=pg.RankOrderConvergenceManager( 0.85, 0.99, "unknown"))) algorithm.rank(graph, {"A": 1})
import pygrank as pg loader = list(pg.load_datasets_multiple_communities(["bigraph", "cora", "citeseer"])) algorithms = pg.create_variations(pg.create_demo_filters(), pg.create_many_variation_types()) algorithms = pg.create_variations(algorithms, pg.Normalize) # add normalization to all algorithms print("Algorithms", len(algorithms)) measures = {"AUC": lambda ground_truth, exlude: pg.MultiSupervised(pg.AUC, ground_truth, exlude), "NDCG": lambda ground_truth, exlude: pg.MultiSupervised(pg.NDCG, ground_truth, exlude), "Density": lambda graph: pg.MultiUnsupervised(pg.Density, graph), "Modularity": lambda graph: pg.MultiUnsupervised(pg.Modularity, graph), "LinkCC": lambda graph: pg.ClusteringCoefficient(graph, similarity="dot"), "LinkAUCcos": lambda graph: pg.LinkAssessment(graph, similarity="cos"), "HopAUCdot": lambda graph: pg.LinkAssessment(graph, similarity="dot", hops=2), } scores = {measure: pg.benchmark_scores(pg.benchmark(algorithms, loader, measures[measure])) for measure in measures} evaluations_vs_auc = dict() evaluations_vs_ndcg = dict() for measure in measures: evaluations_vs_auc[measure] = abs(pg.SpearmanCorrelation(scores["AUC"])(scores[measure])) evaluations_vs_ndcg[measure] = abs(pg.SpearmanCorrelation(scores["NDCG"])(scores[measure])) pg.benchmark_print([("Measure", "AUC corr", "NDCG corr")] + [(measure, evaluations_vs_auc[measure], evaluations_vs_ndcg[measure]) for measure in measures])