Пример #1
0
def test_seed_oversampling():
    _, graph, group = next(pg.load_datasets_one_community(["graph9"]))
    for _ in supported_backends():
        training, evaluation = pg.split(list(group), training_samples=2)
        training, evaluation = pg.to_signal(graph,
                                            {v: 1
                                             for v in training}), pg.to_signal(
                                                 graph,
                                                 {v: 1
                                                  for v in evaluation})
        for measure in [pg.NDCG, pg.AUC]:
            ranks = pg.PageRank(0.9, max_iters=1000).rank(graph, training)
            base_result = measure(evaluation, training).evaluate(ranks)
            ranks = pg.SeedOversampling(pg.PageRank(0.9, max_iters=1000)).rank(
                graph, training)
            so_result = measure(evaluation, training).evaluate(ranks)
            bso_result = measure(evaluation, training).evaluate(
                pg.BoostedSeedOversampling(pg.PageRank(0.9,
                                                       max_iters=1000)).rank(
                                                           graph, training))
            assert float(base_result) <= float(so_result)
            assert float(so_result) <= float(bso_result)
        pg.SeedOversampling(pg.PageRank(0.99, max_iters=1000),
                            "top").rank(graph, training)
        pg.SeedOversampling(pg.PageRank(0.99, max_iters=1000),
                            "neighbors").rank(graph, training)
        pg.BoostedSeedOversampling(pg.PageRank(max_iters=1000),
                                   'naive',
                                   oversample_from_iteration='original').rank(
                                       graph, {"A": 1})
Пример #2
0
def test_seed_oversampling_arguments():
    _, graph, group = next(pg.load_datasets_one_community(["graph9"]))
    with pytest.raises(Exception):
        pg.SeedOversampling(pg.PageRank(), 'unknown').rank(graph, {"A": 1})
    with pytest.raises(Exception):
        pg.SeedOversampling(pg.PageRank()).rank(graph, {"A": 0.1, "B": 1})
    with pytest.raises(Exception):
        pg.BoostedSeedOversampling(pg.PageRank(),
                                   'unknown').rank(graph, {"A": 1})
    with pytest.raises(Exception):
        pg.BoostedSeedOversampling(pg.PageRank(),
                                   'naive',
                                   oversample_from_iteration='unknown').rank(
                                       graph, {"B": 1})
Пример #3
0
def test_postprocessor_citations():
    assert pg.Tautology(pg.PageRank()).cite() == pg.PageRank().cite()
    assert pg.Normalize(pg.PageRank()).cite() != pg.PageRank().cite()
    assert pg.Normalize(pg.PageRank(), "sum").cite() != pg.Normalize(pg.PageRank(), "range").cite()
    assert pg.Ordinals(pg.PageRank()).cite() != pg.Normalize(pg.PageRank(), "sum").cite()
    assert pg.Transformer(pg.PageRank()).cite() != pg.PageRank().cite()
    assert pg.Threshold(pg.PageRank()).cite() != pg.PageRank().cite()
    assert pg.Sweep(pg.PageRank()).cite() != pg.PageRank().cite()
    assert pg.BoostedSeedOversampling(pg.PageRank()).cite() != pg.PageRank().cite()
    assert pg.SeedOversampling(pg.PageRank()).cite() != pg.PageRank().cite()
    assert pg.SeedOversampling(pg.PageRank(), method="safe").cite() \
           != pg.SeedOversampling(pg.PageRank(), method="top").cite()
    assert pg.BoostedSeedOversampling(pg.PageRank(), objective="partial").cite() \
           != pg.BoostedSeedOversampling(pg.PageRank(), objective="naive").cite()
    assert pg.BoostedSeedOversampling(pg.PageRank(), oversample_from_iteration="previous").cite() \
           != pg.BoostedSeedOversampling(pg.PageRank(), oversample_from_iteration="original").cite()
Пример #4
0
def test_explicit_citations():
    assert "unknown node ranking algorithm" == pg.NodeRanking().cite()
    assert "with parameters tuned \cite{krasanakis2021pygrank}" in pg.ParameterTuner(
        lambda params: pg.PageRank(params[0])).cite()
    assert "Postprocessor" in pg.Postprocessor().cite()
    assert pg.PageRank().cite() in pg.AlgorithmSelection().cite()
    assert "krasanakis2021pygrank" in pg.ParameterTuner().cite()
    assert "ortega2018graph" in pg.ParameterTuner().cite()
    assert pg.HeatKernel().cite() in pg.SeedOversampling(pg.HeatKernel()).cite()
    assert pg.AbsorbingWalks().cite() in pg.BoostedSeedOversampling(pg.AbsorbingWalks()).cite()
    assert "krasanakis2018venuerank" in pg.BiasedKernel(converge_to_eigenvectors=True).cite()
    assert "yu2021chebyshev" in pg.HeatKernel(coefficient_type="chebyshev").cite()
    assert "susnjara2015accelerated" in pg.HeatKernel(krylov_dims=5).cite()
    assert "krasanakis2021pygrank" in pg.GenericGraphFilter(optimization_dict=dict()).cite()
    assert "tautology" in pg.Tautology().cite()
    assert pg.PageRank().cite() == pg.Tautology(pg.PageRank()).cite()
    assert "mabs" in pg.MabsMaintain(pg.PageRank()).cite()
    assert "max normalization" in pg.Normalize(pg.PageRank()).cite()
    assert "[0,1] range" in pg.Normalize(pg.PageRank(), "range").cite()
    assert "ordinal" in pg.Ordinals(pg.PageRank()).cite()
    assert "exp" in pg.Transformer(pg.PageRank()).cite()
    assert "0.5" in pg.Threshold(pg.PageRank(), 0.5).cite()
    assert "andersen2007local" in pg.Sweep(pg.PageRank()).cite()
    assert pg.HeatKernel().cite() in pg.Sweep(pg.PageRank(), pg.HeatKernel()).cite()
    assert "LFPRO" in pg.AdHocFairness("O").cite()
    assert "LFPRO" in pg.AdHocFairness(pg.PageRank(), "LFPRO").cite()
    assert "multiplicative" in pg.AdHocFairness(pg.PageRank(), "B").cite()
    assert "multiplicative" in pg.AdHocFairness(pg.PageRank(), "mult").cite()
    assert "tsioutsiouliklis2020fairness" in pg.AdHocFairness().cite()
    assert "rahman2019fairwalk" in pg.FairWalk(pg.PageRank()).cite()
    assert "krasanakis2020prioredit" in pg.FairPersonalizer(pg.PageRank()).cite()
Пример #5
0
def test_krylov_space_oversampling():
    # this demonstrates a highly complicated setting
    _, graph, community = next(pg.load_datasets_one_community(["bigraph"]))
    algorithm = pg.HeatKernel(
        t=5,  # the number of hops away HeatKernel places maximal importance on
        krylov_dims=5,
        normalization="symmetric",
        renormalize=True)
    for _ in supported_backends():
        personalization = {node: 1. for node in list(community)[:10]}
        oversampling = pg.SeedOversampling(algorithm)
        pg.Normalize(oversampling)(graph, personalization)
        measure = pg.Conductance()
        assert measure(pg.Normalize(algorithm)(
            graph, personalization)) >= measure(
                pg.Normalize(oversampling)(graph, personalization)) - 5.E-6
Пример #6
0
             personalization: pg.GraphSignalData = None,
             **kwargs):
        personalization = pg.to_signal(graph, personalization)
        graph = personalization.graph
        ranks = self.ranker(personalization)
        ret = 0
        total_sum = pg.sum(ranks)
        accum_sum = 0
        for threshold in sorted(ranks.values()):
            accum_sum += threshold
            if accum_sum > total_sum * 0.1:
                break
        for i, v in enumerate(ranks):
            pg.utils.log(f"{i}/{len(ranks)}")
            if ranks[v] >= threshold:
                partial = ranks >> pg.Threshold(ranks[v],
                                                inclusive=True) >> self.ranker
                ret = partial * ranks[v] + ret
        return ret


algs = {
    "ppr": pg.PageRank(0.9),
    "ppr+so": pg.PageRank(0.9) >> pg.SeedOversampling(),
    "ppr+bso": pg.PageRank(0.9) >> pg.BoostedSeedOversampling(),
    "ppr+sso": pg.PageRank(0.9) >> StochasticSeedOversampling(),
}

loader = pg.load_datasets_one_community(["citeseer"])
pg.benchmark_print(pg.benchmark(algs, loader, pg.AUC, 3))
Пример #7
0
import pygrank as pg
_, graph, community = next(pg.load_datasets_one_community(["EUCore"]))
algorithm = pg.HeatKernel(
    t=5,  # the number of hops away HeatKernel places maximal importance on
    normalization="symmetric",
    renormalize=True)
personalization = {node: 1.
                   for node in community}  # ignored nodes assumed to be zeroes
algorithms = {
    "HK5": algorithm,
    "HK5+Oversampling": pg.SeedOversampling(algorithm)
}
algorithms = algorithms | pg.create_variations(algorithms,
                                               {"+Sweep": pg.Sweep})
algorithms = pg.create_variations(algorithms, {"": pg.Normalize})

measure = pg.Conductance()
for algorithm_name, algorithm in algorithms.items():
    scores = algorithm(graph,
                       personalization)  # returns a dict-like pg.GraphSignal
    print(algorithm_name, measure(scores))
Пример #8
0
import pygrank as pg

algorithm = pg.HeatKernel(
    t=5,  # the number of hops to place maximal importance on
    normalization="symmetric",
    renormalize=True)
algorithms = {
    "hk5": algorithm,
    "hk5+oversampling": pg.SeedOversampling(algorithm)
}
algorithms = algorithms | pg.create_variations(algorithms,
                                               {"+sweep": pg.Sweep})
algorithms = pg.create_variations(algorithms, pg.Normalize)

_, graph, community = next(pg.load_datasets_one_community(["EUCore"]))
personalization = {node: 1.
                   for node in community}  # missing scores considered zero
measure = pg.Conductance()  # smaller means tightly-knit stochastic community
for algorithm_name, algorithm in algorithms.items():
    scores = algorithm(graph,
                       personalization)  # returns a dict-like pg.GraphSignal
    pg.benchmark_print_line(algorithm_name, measure(scores),
                            tabs=[20, 5])  # pretty