def test_multigroup_benchmarks(): datasets = ["bigraph"] pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") tol = 1.E-9 optimization = pg.SelfClearDict() algorithms = { "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=tol), "ppr0.9": pg.PageRank(alpha=0.9, preprocessor=pre, max_iters=10000, tol=tol), "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=tol), "hk3": pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), "hk5": pg.HeatKernel(t=5, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), "hk7": pg.HeatKernel(t=7, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), } tuned = { "selected": pg.AlgorithmSelection(algorithms.values(), fraction_of_training=0.8) } loader = pg.load_datasets_multiple_communities(datasets, min_group_size=50) pg.benchmark_print(pg.benchmark( algorithms | tuned, loader, lambda ground_truth, exclude: pg.MultiSupervised( pg.AUC, ground_truth, exclude), fraction_of_training=.8, seed=list(range(1))), decimals=3, delimiter=" & ", end_line="\\\\") loader = pg.load_datasets_multiple_communities(datasets, min_group_size=50) pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.Modularity, sensitive=pg.pRule, fraction_of_training=.8, seed=list(range(1))), decimals=3, delimiter=" & ", end_line="\\\\")
def test_algorithm_selection(): for _ in supported_backends(): _, graph, communities = next( pg.load_datasets_multiple_communities(["bigraph"], max_group_number=3)) train, test = pg.split(communities, 0.05) # 5% of community members are known algorithms = pg.create_variations(pg.create_demo_filters(), pg.Normalize) supervised_algorithm = pg.AlgorithmSelection(algorithms.values(), measure=pg.AUC, tuning_backend="numpy") print(supervised_algorithm.cite()) modularity_algorithm = pg.AlgorithmSelection( algorithms.values(), fraction_of_training=1, measure=pg.Modularity().as_supervised_method(), tuning_backend="numpy") supervised_aucs = list() modularity_aucs = list() for seeds, members in zip(train.values(), test.values()): measure = pg.AUC(members, exclude=seeds) supervised_aucs.append(measure(supervised_algorithm(graph, seeds))) modularity_aucs.append(measure(modularity_algorithm(graph, seeds))) assert abs( sum(supervised_aucs) / len(supervised_aucs) - sum(modularity_aucs) / len(modularity_aucs)) < 0.05
def test_fair_personalizer(): H = pg.PageRank(assume_immutability=True, normalization="symmetric") algorithms = { "FairPers": lambda G, p, s: pg.Normalize( pg.FairPersonalizer(H, error_type=pg.Mabs, max_residual=0)).rank( G, p, sensitive=s), "FairPers-C": lambda G, p, s: pg.Normalize( pg.FairPersonalizer( H, .80, pRule_weight=10, error_type=pg.Mabs, max_residual=0)). rank(G, p, sensitive=s), "FairPersSkew": lambda G, p, s: pg.Normalize( pg.FairPersonalizer(H, error_skewing=True, max_residual=0)).rank( G, p, sensitive=s), "FairPersSkew-C": lambda G, p, s: pg.Normalize( pg.FairPersonalizer( H, .80, error_skewing=True, pRule_weight=10, max_residual=0) ).rank(G, p, sensitive=s), } _, graph, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) labels = pg.to_signal(graph, groups[0]) sensitive = pg.to_signal(graph, groups[1]) for algorithm in algorithms.values(): ranks = algorithm(graph, labels, sensitive) assert pg.pRule(sensitive)( ranks ) > 0.79 # allow a leeway for generalization capabilities compared to 80%
def test_hoptuner_autorgression(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.01) auc1 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(measure=pg.AUC).rank(training)) auc3 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(measure=pg.AUC, autoregression=5).rank(training)) assert auc3 > auc1*0.9
def test_lowpass_tuning(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.1) auc1 = pg.AUC(evaluation, exclude=training)(pg.ParameterTuner(lambda params: pg.GenericGraphFilter(params)).rank(training)) auc2 = pg.AUC(evaluation, exclude=training)(pg.ParameterTuner(lambda params: pg.LowPassRecursiveGraphFilter(params)).rank(training)) assert auc2 > auc1*0.8
def test_hoptuner_explicit_algorithm(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5) auc1 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(lambda params: pg.GenericGraphFilter(params, krylov_dims=10), basis="arnoldi", measure=pg.AUC).rank(training)) auc2 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(basis="arnoldi", krylov_dims=10, measure=pg.AUC).rank(training)) assert abs(auc1-auc2) < 0.005
def test_fair_heuristics(): H = pg.PageRank(assume_immutability=True, normalization="symmetric") algorithms = { "FairO": lambda G, p, s: pg.Normalize(pg.AdHocFairness(H, method="O")).rank(G, sensitive=s), "FairB": lambda G, p, s: pg.Normalize()(pg.AdHocFairness("B").transform(H.rank(G, p), sensitive=s)), "LFPRN": lambda G, p, s: pg.Normalize()(pg.LFPR().rank(G, p, sensitive=s)), "LFPRP": lambda G, p, s: pg.Normalize()(pg.LFPR(redistributor="original").rank(G, p, sensitive=s)), "FairWalk": lambda G, p, s: pg.FairWalk(H).rank(G, p, sensitive=s) } import networkx as nx _, graph, groups = next(pg.load_datasets_multiple_communities(["bigraph"], graph_api=nx)) # TODO: networx needed due to edge weighting by some algorithms labels = pg.to_signal(graph, groups[0]) sensitive = pg.to_signal(graph, groups[1]) for name, algorithm in algorithms.items(): ranks = algorithm(graph, labels, sensitive) if name == "FairWalk": assert pg.pRule(sensitive)(ranks) > 0.6 # TODO: Check why fairwalk fails by that much and increase the limit. else: assert pg.pRule(sensitive)(ranks) > 0.98 sensitive = 1 - sensitive.np for name, algorithm in algorithms.items(): ranks = algorithm(graph, labels, sensitive) if name == "FairWalk": assert pg.pRule(sensitive)(ranks) > 0.6 else: assert pg.pRule(sensitive)(ranks) > 0.98
def test_fair_heuristics(): H = pg.PageRank(assume_immutability=True, normalization="symmetric") algorithms = { "FairO": lambda G, p, s: pg.Normalize(pg.AdHocFairness(H, method="O")).rank( G, sensitive=s), "FairB": lambda G, p, s: pg.Normalize() (pg.AdHocFairness("B").transform(H.rank(G, p), sensitive=s)), "FairWalk": lambda G, p, s: pg.FairWalk(H).rank(G, p, sensitive=s) } _, graph, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) labels = pg.to_signal(graph, groups[0]) sensitive = pg.to_signal(graph, groups[1]) for algorithm in algorithms.values(): ranks = algorithm(graph, labels, sensitive) assert pg.pRule(sensitive)( ranks ) > 0.6 # TODO: Check why fairwalk fails by that much and increase the limit. sensitive = 1 - sensitive.np for algorithm in algorithms.values(): ranks = algorithm(graph, labels, sensitive) assert pg.pRule(sensitive)(ranks) > 0.6
def test_autotune(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5) auc1 = pg.AUC(evaluation, exclude=training)(pg.PageRank().rank(training)) auc2 = pg.AUC(evaluation, exclude=training)(pg.HeatKernel().rank(training)) auc3 = pg.AUC(evaluation, exclude=training)(pg.ParameterTuner(optimization_dict=dict()).rank(training)) assert min(auc1, auc2) <= auc3 and max(auc1, auc2)*0.9 <= auc3
def test_autotune_methods(): import numpy as np _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group})) aucs = [pg.AUC(evaluation, exclude=training)(ranker.rank(training)) for ranker in pg.create_demo_filters().values()] auc2 = pg.AUC(evaluation, exclude=training)(pg.AlgorithmSelection().rank(training)) assert max(aucs)-np.std(aucs) <= auc2
def test_autotune_manual(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5) auc1 = pg.AUC(evaluation, exclude=training)(pg.PageRank().rank(training)) alg2 = pg.ParameterTuner(lambda params: pg.PageRank(params[0]), max_vals=[0.99], min_vals=[0.5]).tune(training) auc2 = pg.AUC(evaluation, exclude=training)(alg2.rank(training)) assert auc1 <= auc2
def test_hoptuner_arnoldi_backends(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5) auc1 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(basis="arnoldi", measure=pg.AUC).rank(training)) auc2 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(basis="arnoldi", measure=pg.AUC, tuning_backend="pytorch").rank(training)) auc3 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(basis="arnoldi", measure=pg.AUC, tuning_backend="tensorflow").rank(training)) assert auc1 == auc2 assert auc1 == auc3
def test_autotune_backends(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5) for tuner in [pg.HopTuner, pg.AlgorithmSelection, pg.ParameterTuner]: auc3 = pg.AUC(evaluation, exclude=training)(tuner(measure=pg.KLDivergence, tuning_backend="pytorch").rank(training)) auc2 = pg.AUC(evaluation, exclude=training)(tuner(measure=pg.KLDivergence, tuning_backend="tensorflow").rank(training)) auc1 = pg.AUC(evaluation, exclude=training)(tuner(measure=pg.KLDivergence).rank(training)) # TODO: maybe fix KLDivergence implementation to not be affected by backend.epsilon() assert abs(auc1-auc2) < 0.005 # different results due to different backend.epsilon() assert abs(auc1-auc3) < 0.005
def test_hoptuner_arnoldi(): _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) group = groups[0] training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5) auc1 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(measure=pg.AUC).rank(training)) auc2 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(basis="arnoldi", measure=pg.AUC).rank(training)) assert abs(auc1 - auc2) < 0.005
def test_invalid_fairness_arguments(): _, graph, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) labels = pg.to_signal(graph, groups[0]) sensitive = pg.to_signal(graph, groups[1]) H = pg.PageRank(assume_immutability=True, normalization="symmetric") with pytest.raises(Exception): # this tests that a deprecated way of applying fairwalk actually raises an exception pg.AdHocFairness(H, method="FairWalk").rank(graph, labels, sensitive=sensitive) with pytest.raises(Exception): pg.FairPersonalizer(H, parity_type="universal").rank(graph, labels, sensitive=sensitive) with pytest.raises(Exception): pg.FairWalk(None).transform(H.rank(graph, labels), sensitive=sensitive)
def test_fair_personalizer_mistreatment(): H = pg.PageRank(assume_immutability=True, normalization="symmetric") algorithms = { "Base": lambda G, p, s: H.rank(G, p), "FairPersMistreat": pg.Normalize(pg.FairPersonalizer(H, parity_type="mistreatment", pRule_weight=10)), "FairPersTPR": pg.Normalize(pg.FairPersonalizer(H, parity_type="TPR", pRule_weight=10)), "FairPersTNR": pg.Normalize(pg.FairPersonalizer(H, parity_type="TNR", pRule_weight=-1)) # TNR optimization increases mistreatment for this example } mistreatment = lambda known_scores, sensitive_signal, exclude: \ pg.AM([pg.Disparity([pg.TPR(known_scores, exclude=1 - (1 - exclude) * sensitive_signal), pg.TPR(known_scores, exclude=1 - (1 - exclude) * (1 - sensitive_signal))]), pg.Disparity([pg.TNR(known_scores, exclude=1 - (1 - exclude) * sensitive_signal), pg.TNR(known_scores, exclude=1 - (1 - exclude) * (1 - sensitive_signal))])]) _, graph, groups = next(pg.load_datasets_multiple_communities(["synthfeats"])) labels = pg.to_signal(graph, groups[0]) sensitive = pg.to_signal(graph, groups[1]) train, test = pg.split(labels) # TODO: maybe try to check for greater improvement base_mistreatment = mistreatment(test, sensitive, train)(algorithms["Base"](graph, train, sensitive)) for algorithm in algorithms.values(): if algorithm != algorithms["Base"]: print(algorithm.cite()) assert base_mistreatment >= mistreatment(test, sensitive, train)(algorithm(graph, train, sensitive))
import pygrank as pg _, graph, communities = next( pg.load_datasets_multiple_communities(["EUCore"], max_group_number=3)) train, test = pg.split(communities, 0.05) # 5% of community members are known algorithms = pg.create_variations(pg.create_demo_filters(), pg.Normalize) supervised_algorithm = pg.AlgorithmSelection(algorithms.values(), measure=pg.AUC) print(supervised_algorithm.cite()) modularity_algorithm = pg.AlgorithmSelection( algorithms.values(), fraction_of_training=1, measure=pg.Modularity().as_supervised_method()) linkauc_algorithm = None best_evaluation = 0 linkAUC = pg.LinkAssessment( graph, similarity="cos", hops=1) # LinkAUC, because emails systemically exhibit homophily for algorithm in algorithms.values(): evaluation = linkAUC.evaluate({ community: algorithm(graph, seeds) for community, seeds in train.items() }) if evaluation > best_evaluation: best_evaluation = evaluation linkauc_algorithm = algorithm supervised_aucs = list() modularity_aucs = list() linkauc_aucs = list()
import pygrank as pg loader = list(pg.load_datasets_multiple_communities(["bigraph", "cora", "citeseer"])) algorithms = pg.create_variations(pg.create_demo_filters(), pg.create_many_variation_types()) algorithms = pg.create_variations(algorithms, pg.Normalize) # add normalization to all algorithms print("Algorithms", len(algorithms)) measures = {"AUC": lambda ground_truth, exlude: pg.MultiSupervised(pg.AUC, ground_truth, exlude), "NDCG": lambda ground_truth, exlude: pg.MultiSupervised(pg.NDCG, ground_truth, exlude), "Density": lambda graph: pg.MultiUnsupervised(pg.Density, graph), "Modularity": lambda graph: pg.MultiUnsupervised(pg.Modularity, graph), "LinkCC": lambda graph: pg.ClusteringCoefficient(graph, similarity="dot"), "LinkAUCcos": lambda graph: pg.LinkAssessment(graph, similarity="cos"), "HopAUCdot": lambda graph: pg.LinkAssessment(graph, similarity="dot", hops=2), } scores = {measure: pg.benchmark_scores(pg.benchmark(algorithms, loader, measures[measure])) for measure in measures} evaluations_vs_auc = dict() evaluations_vs_ndcg = dict() for measure in measures: evaluations_vs_auc[measure] = abs(pg.SpearmanCorrelation(scores["AUC"])(scores[measure])) evaluations_vs_ndcg[measure] = abs(pg.SpearmanCorrelation(scores["NDCG"])(scores[measure])) pg.benchmark_print([("Measure", "AUC corr", "NDCG corr")] + [(measure, evaluations_vs_auc[measure], evaluations_vs_ndcg[measure]) for measure in measures])
.8, pRule_weight=10, max_residual=1, error_type=pg.Mabs, error_skewing=False, parameter_buckets=1, parity_type="impact") #"FFfix-C": pg.FairTradeoff(filter, .8, pRule_weight=10, error_type=pg.Mabs) #"FairTf": pg.FairnessTf(filter) } algorithms = pg.create_variations(algorithms, {"": pg.Normalize}) #import cProfile as profile #pr = profile.Profile() #pr.enable() mistreatment = lambda known_scores, sensitive_signal, exclude: \ pg.AM([pg.Disparity([pg.TPR(known_scores, exclude=1-(1-exclude.np)*sensitive_signal.np), pg.TPR(known_scores, exclude=1-(1-exclude.np)*(1-sensitive_signal.np))]), pg.Disparity([pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * sensitive_signal.np), pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * (1 - sensitive_signal.np))])]) pg.benchmark_print(pg.benchmark(algorithms, pg.load_datasets_multiple_communities( datasets, max_group_number=2), metric=pg.AUC, sensitive=pg.pRule, fraction_of_training=seed_fractions), delimiter=" & ", end_line="\\\\") #pr.disable() #pr.dump_stats('profile.pstat')
import pygrank as pg def community_detection(graph, known_members_set): ranks_set = [ pg.ParameterTuner()(graph, known_members) for known_members in known_members_set ] options = list(range(len(ranks_set))) found_set = [list() for _ in known_members_set] for v in graph: found_set[max(options, key=lambda i: ranks_set[i][v])].append(v) return found_set _, graph, groups = next(pg.load_datasets_multiple_communities(["citeseer"])) train_set, test_set = pg.split(groups, 0.5) train_set = train_set.values() test_set = test_set.values() found_set = community_detection(graph, train_set) precisions = list() recalls = list() for found, train, test in zip(found_set, train_set, test_set): train, test = set(train), set(test) new_nodes = [v for v in found if v not in train] TP = len([v for v in new_nodes if v in test]) precisions.append(TP / len(new_nodes) if new_nodes else 0) recalls.append(TP / len(test)) print("Avg. precision", sum(precisions) / len(precisions)) print("Avg. recall", sum(recalls) / len(recalls))
def loader(): return pg.load_datasets_multiple_communities(["graph9"])