def test_seed_oversampling(): _, graph, group = next(pg.load_datasets_one_community(["graph9"])) for _ in supported_backends(): training, evaluation = pg.split(list(group), training_samples=2) training, evaluation = pg.to_signal(graph, {v: 1 for v in training}), pg.to_signal( graph, {v: 1 for v in evaluation}) for measure in [pg.NDCG, pg.AUC]: ranks = pg.PageRank(0.9, max_iters=1000).rank(graph, training) base_result = measure(evaluation, training).evaluate(ranks) ranks = pg.SeedOversampling(pg.PageRank(0.9, max_iters=1000)).rank( graph, training) so_result = measure(evaluation, training).evaluate(ranks) bso_result = measure(evaluation, training).evaluate( pg.BoostedSeedOversampling(pg.PageRank(0.9, max_iters=1000)).rank( graph, training)) assert float(base_result) <= float(so_result) assert float(so_result) <= float(bso_result) pg.SeedOversampling(pg.PageRank(0.99, max_iters=1000), "top").rank(graph, training) pg.SeedOversampling(pg.PageRank(0.99, max_iters=1000), "neighbors").rank(graph, training) pg.BoostedSeedOversampling(pg.PageRank(max_iters=1000), 'naive', oversample_from_iteration='original').rank( graph, {"A": 1})
def test_seed_oversampling_arguments(): _, graph, group = next(pg.load_datasets_one_community(["graph9"])) with pytest.raises(Exception): pg.SeedOversampling(pg.PageRank(), 'unknown').rank(graph, {"A": 1}) with pytest.raises(Exception): pg.SeedOversampling(pg.PageRank()).rank(graph, {"A": 0.1, "B": 1}) with pytest.raises(Exception): pg.BoostedSeedOversampling(pg.PageRank(), 'unknown').rank(graph, {"A": 1}) with pytest.raises(Exception): pg.BoostedSeedOversampling(pg.PageRank(), 'naive', oversample_from_iteration='unknown').rank( graph, {"B": 1})
def test_postprocessor_citations(): assert pg.Tautology(pg.PageRank()).cite() == pg.PageRank().cite() assert pg.Normalize(pg.PageRank()).cite() != pg.PageRank().cite() assert pg.Normalize(pg.PageRank(), "sum").cite() != pg.Normalize(pg.PageRank(), "range").cite() assert pg.Ordinals(pg.PageRank()).cite() != pg.Normalize(pg.PageRank(), "sum").cite() assert pg.Transformer(pg.PageRank()).cite() != pg.PageRank().cite() assert pg.Threshold(pg.PageRank()).cite() != pg.PageRank().cite() assert pg.Sweep(pg.PageRank()).cite() != pg.PageRank().cite() assert pg.BoostedSeedOversampling(pg.PageRank()).cite() != pg.PageRank().cite() assert pg.SeedOversampling(pg.PageRank()).cite() != pg.PageRank().cite() assert pg.SeedOversampling(pg.PageRank(), method="safe").cite() \ != pg.SeedOversampling(pg.PageRank(), method="top").cite() assert pg.BoostedSeedOversampling(pg.PageRank(), objective="partial").cite() \ != pg.BoostedSeedOversampling(pg.PageRank(), objective="naive").cite() assert pg.BoostedSeedOversampling(pg.PageRank(), oversample_from_iteration="previous").cite() \ != pg.BoostedSeedOversampling(pg.PageRank(), oversample_from_iteration="original").cite()
def test_explicit_citations(): assert "unknown node ranking algorithm" == pg.NodeRanking().cite() assert "with parameters tuned \cite{krasanakis2021pygrank}" in pg.ParameterTuner( lambda params: pg.PageRank(params[0])).cite() assert "Postprocessor" in pg.Postprocessor().cite() assert pg.PageRank().cite() in pg.AlgorithmSelection().cite() assert "krasanakis2021pygrank" in pg.ParameterTuner().cite() assert "ortega2018graph" in pg.ParameterTuner().cite() assert pg.HeatKernel().cite() in pg.SeedOversampling(pg.HeatKernel()).cite() assert pg.AbsorbingWalks().cite() in pg.BoostedSeedOversampling(pg.AbsorbingWalks()).cite() assert "krasanakis2018venuerank" in pg.BiasedKernel(converge_to_eigenvectors=True).cite() assert "yu2021chebyshev" in pg.HeatKernel(coefficient_type="chebyshev").cite() assert "susnjara2015accelerated" in pg.HeatKernel(krylov_dims=5).cite() assert "krasanakis2021pygrank" in pg.GenericGraphFilter(optimization_dict=dict()).cite() assert "tautology" in pg.Tautology().cite() assert pg.PageRank().cite() == pg.Tautology(pg.PageRank()).cite() assert "mabs" in pg.MabsMaintain(pg.PageRank()).cite() assert "max normalization" in pg.Normalize(pg.PageRank()).cite() assert "[0,1] range" in pg.Normalize(pg.PageRank(), "range").cite() assert "ordinal" in pg.Ordinals(pg.PageRank()).cite() assert "exp" in pg.Transformer(pg.PageRank()).cite() assert "0.5" in pg.Threshold(pg.PageRank(), 0.5).cite() assert "andersen2007local" in pg.Sweep(pg.PageRank()).cite() assert pg.HeatKernel().cite() in pg.Sweep(pg.PageRank(), pg.HeatKernel()).cite() assert "LFPRO" in pg.AdHocFairness("O").cite() assert "LFPRO" in pg.AdHocFairness(pg.PageRank(), "LFPRO").cite() assert "multiplicative" in pg.AdHocFairness(pg.PageRank(), "B").cite() assert "multiplicative" in pg.AdHocFairness(pg.PageRank(), "mult").cite() assert "tsioutsiouliklis2020fairness" in pg.AdHocFairness().cite() assert "rahman2019fairwalk" in pg.FairWalk(pg.PageRank()).cite() assert "krasanakis2020prioredit" in pg.FairPersonalizer(pg.PageRank()).cite()
def test_krylov_space_oversampling(): # this demonstrates a highly complicated setting _, graph, community = next(pg.load_datasets_one_community(["bigraph"])) algorithm = pg.HeatKernel( t=5, # the number of hops away HeatKernel places maximal importance on krylov_dims=5, normalization="symmetric", renormalize=True) for _ in supported_backends(): personalization = {node: 1. for node in list(community)[:10]} oversampling = pg.SeedOversampling(algorithm) pg.Normalize(oversampling)(graph, personalization) measure = pg.Conductance() assert measure(pg.Normalize(algorithm)( graph, personalization)) >= measure( pg.Normalize(oversampling)(graph, personalization)) - 5.E-6
personalization: pg.GraphSignalData = None, **kwargs): personalization = pg.to_signal(graph, personalization) graph = personalization.graph ranks = self.ranker(personalization) ret = 0 total_sum = pg.sum(ranks) accum_sum = 0 for threshold in sorted(ranks.values()): accum_sum += threshold if accum_sum > total_sum * 0.1: break for i, v in enumerate(ranks): pg.utils.log(f"{i}/{len(ranks)}") if ranks[v] >= threshold: partial = ranks >> pg.Threshold(ranks[v], inclusive=True) >> self.ranker ret = partial * ranks[v] + ret return ret algs = { "ppr": pg.PageRank(0.9), "ppr+so": pg.PageRank(0.9) >> pg.SeedOversampling(), "ppr+bso": pg.PageRank(0.9) >> pg.BoostedSeedOversampling(), "ppr+sso": pg.PageRank(0.9) >> StochasticSeedOversampling(), } loader = pg.load_datasets_one_community(["citeseer"]) pg.benchmark_print(pg.benchmark(algs, loader, pg.AUC, 3))
import pygrank as pg _, graph, community = next(pg.load_datasets_one_community(["EUCore"])) algorithm = pg.HeatKernel( t=5, # the number of hops away HeatKernel places maximal importance on normalization="symmetric", renormalize=True) personalization = {node: 1. for node in community} # ignored nodes assumed to be zeroes algorithms = { "HK5": algorithm, "HK5+Oversampling": pg.SeedOversampling(algorithm) } algorithms = algorithms | pg.create_variations(algorithms, {"+Sweep": pg.Sweep}) algorithms = pg.create_variations(algorithms, {"": pg.Normalize}) measure = pg.Conductance() for algorithm_name, algorithm in algorithms.items(): scores = algorithm(graph, personalization) # returns a dict-like pg.GraphSignal print(algorithm_name, measure(scores))
import pygrank as pg algorithm = pg.HeatKernel( t=5, # the number of hops to place maximal importance on normalization="symmetric", renormalize=True) algorithms = { "hk5": algorithm, "hk5+oversampling": pg.SeedOversampling(algorithm) } algorithms = algorithms | pg.create_variations(algorithms, {"+sweep": pg.Sweep}) algorithms = pg.create_variations(algorithms, pg.Normalize) _, graph, community = next(pg.load_datasets_one_community(["EUCore"])) personalization = {node: 1. for node in community} # missing scores considered zero measure = pg.Conductance() # smaller means tightly-knit stochastic community for algorithm_name, algorithm in algorithms.items(): scores = algorithm(graph, personalization) # returns a dict-like pg.GraphSignal pg.benchmark_print_line(algorithm_name, measure(scores), tabs=[20, 5]) # pretty