def test_threshold(): _, graph, group = next(pg.load_datasets_one_community(["bigraph"])) for _ in supported_backends(): training, evaluation = pg.split(list(group), training_samples=0.5) cond1 = pg.Conductance().evaluate( pg.Threshold(pg.Sweep(pg.PageRank())).rank( graph, {v: 1 for v in training})) cond2 = pg.Conductance().evaluate( pg.Threshold("gap").transform(pg.PageRank().rank( graph, {v: 1 for v in training}))) # try all api types assert cond1 <= cond2
def evaluate(graph, algorithm): tprs = list() ppvs = list() f1s = list() aucs = list() for node in list(graph): neighbors = list(graph.neighbors(node)) if len(neighbors) < 10: continue training = pg.to_signal(graph, {node: 1}) test = pg.to_signal(graph, {neighbor: 1 for neighbor in neighbors}) for neighbor in random.sample(neighbors, 1): assert graph.has_edge(node, neighbor) graph.remove_edge(node, neighbor) assert not graph.has_edge(node, neighbor) assert not graph.has_edge(neighbor, node) result = (training >> algorithm) * (1 - training) aucs.append(pg.AUC(test, exclude=training)(result)) top = result >> pg.Top(10) >> pg.Threshold() prec = pg.PPV(test, exclude=training)(top) rec = pg.TPR(test, exclude=training)(top) ppvs.append(prec) tprs.append(rec) f1s.append(pg.safe_div(2 * prec * rec, prec + rec)) for neighbor in graph.neighbors(node): if not graph.has_edge(node, neighbor): graph.add_edge(node, neighbor) print( f"\r{algorithm.cite()}\t AUC {sum(aucs) / len(aucs):.3f}\t f1 {sum(f1s) / len(f1s):.3f}\t prec {sum(ppvs) / len(ppvs):.3f}\t rec {sum(tprs)/len(tprs):.3f}\t", end="") print()
def test_explicit_citations(): assert "unknown node ranking algorithm" == pg.NodeRanking().cite() assert "with parameters tuned \cite{krasanakis2021pygrank}" in pg.ParameterTuner( lambda params: pg.PageRank(params[0])).cite() assert "Postprocessor" in pg.Postprocessor().cite() assert pg.PageRank().cite() in pg.AlgorithmSelection().cite() assert "krasanakis2021pygrank" in pg.ParameterTuner().cite() assert "ortega2018graph" in pg.ParameterTuner().cite() assert pg.HeatKernel().cite() in pg.SeedOversampling(pg.HeatKernel()).cite() assert pg.AbsorbingWalks().cite() in pg.BoostedSeedOversampling(pg.AbsorbingWalks()).cite() assert "krasanakis2018venuerank" in pg.BiasedKernel(converge_to_eigenvectors=True).cite() assert "yu2021chebyshev" in pg.HeatKernel(coefficient_type="chebyshev").cite() assert "susnjara2015accelerated" in pg.HeatKernel(krylov_dims=5).cite() assert "krasanakis2021pygrank" in pg.GenericGraphFilter(optimization_dict=dict()).cite() assert "tautology" in pg.Tautology().cite() assert pg.PageRank().cite() == pg.Tautology(pg.PageRank()).cite() assert "mabs" in pg.MabsMaintain(pg.PageRank()).cite() assert "max normalization" in pg.Normalize(pg.PageRank()).cite() assert "[0,1] range" in pg.Normalize(pg.PageRank(), "range").cite() assert "ordinal" in pg.Ordinals(pg.PageRank()).cite() assert "exp" in pg.Transformer(pg.PageRank()).cite() assert "0.5" in pg.Threshold(pg.PageRank(), 0.5).cite() assert "andersen2007local" in pg.Sweep(pg.PageRank()).cite() assert pg.HeatKernel().cite() in pg.Sweep(pg.PageRank(), pg.HeatKernel()).cite() assert "LFPRO" in pg.AdHocFairness("O").cite() assert "LFPRO" in pg.AdHocFairness(pg.PageRank(), "LFPRO").cite() assert "multiplicative" in pg.AdHocFairness(pg.PageRank(), "B").cite() assert "multiplicative" in pg.AdHocFairness(pg.PageRank(), "mult").cite() assert "tsioutsiouliklis2020fairness" in pg.AdHocFairness().cite() assert "rahman2019fairwalk" in pg.FairWalk(pg.PageRank()).cite() assert "krasanakis2020prioredit" in pg.FairPersonalizer(pg.PageRank()).cite()
def test_threshold(): _, graph, group = next(pg.load_datasets_one_community(["bigraph"])) for _ in supported_backends(): training, evaluation = pg.split(list(group), training_samples=0.5) algorithm = pg.PageRank() cond1 = pg.Conductance().evaluate( pg.Threshold(pg.Sweep(algorithm), "gap").rank(graph, {v: 1 for v in training})) cond2 = pg.Conductance().evaluate( pg.Threshold(0.3).transform( algorithm.rank(graph, {v: 1 for v in training}))) # try all api types cond3 = pg.Conductance().evaluate( pg.Threshold(1).transform( algorithm.rank( graph, {v: 1 for v in training}))) # should yield infinite conductance # TODO: find an algorithm other than gap to outperform 0.2 threshold too assert cond1 <= cond2 assert cond2 <= cond3
def test_postprocessor_citations(): assert pg.Tautology(pg.PageRank()).cite() == pg.PageRank().cite() assert pg.Normalize(pg.PageRank()).cite() != pg.PageRank().cite() assert pg.Normalize(pg.PageRank(), "sum").cite() != pg.Normalize(pg.PageRank(), "range").cite() assert pg.Ordinals(pg.PageRank()).cite() != pg.Normalize(pg.PageRank(), "sum").cite() assert pg.Transformer(pg.PageRank()).cite() != pg.PageRank().cite() assert pg.Threshold(pg.PageRank()).cite() != pg.PageRank().cite() assert pg.Sweep(pg.PageRank()).cite() != pg.PageRank().cite() assert pg.BoostedSeedOversampling(pg.PageRank()).cite() != pg.PageRank().cite() assert pg.SeedOversampling(pg.PageRank()).cite() != pg.PageRank().cite() assert pg.SeedOversampling(pg.PageRank(), method="safe").cite() \ != pg.SeedOversampling(pg.PageRank(), method="top").cite() assert pg.BoostedSeedOversampling(pg.PageRank(), objective="partial").cite() \ != pg.BoostedSeedOversampling(pg.PageRank(), objective="naive").cite() assert pg.BoostedSeedOversampling(pg.PageRank(), oversample_from_iteration="previous").cite() \ != pg.BoostedSeedOversampling(pg.PageRank(), oversample_from_iteration="original").cite()
def rank(self, graph: pg.GraphSignalGraph = None, personalization: pg.GraphSignalData = None, **kwargs): personalization = pg.to_signal(graph, personalization) graph = personalization.graph ranks = self.ranker(personalization) ret = 0 total_sum = pg.sum(ranks) accum_sum = 0 for threshold in sorted(ranks.values()): accum_sum += threshold if accum_sum > total_sum * 0.1: break for i, v in enumerate(ranks): pg.utils.log(f"{i}/{len(ranks)}") if ranks[v] >= threshold: partial = ranks >> pg.Threshold(ranks[v], inclusive=True) >> self.ranker ret = partial * ranks[v] + ret return ret
def overlapping_community_detection(graph, known_members, top=None): graph_filter = pg.PageRank( 0.9) if len(known_members) < 50 else pg.ParameterTuner().tune( graph, known_members) ranks = pg.to_signal(graph, {v: 1 for v in known_members }) >> pg.Sweep(graph_filter) >> pg.Normalize("range") if top is not None: ranks = ranks * (1 - pg.to_signal(graph, {v: 1 for v in known_members}) ) # set known member scores to zero return sorted(list(graph), key=lambda node: -ranks[node] )[:top] # return specific number of top predictions threshold = pg.optimize(max_vals=[1], loss=lambda p: pg.Conductance(graph) (pg.Threshold(p[0]).transform(ranks)))[0] known_members = set(known_members) return [ v for v in graph if ranks[v] > threshold and v not in known_members ]