Пример #1
0
def test_all_communities_benchmarks():
    datasets = ["bigraph"]
    pre = pg.preprocessor(assume_immutability=True, normalization="symmetric")
    tol = 1.E-9
    optimization = pg.SelfClearDict()
    algorithms = {
        "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=tol),
        "ppr0.9": pg.PageRank(alpha=0.9, preprocessor=pre, max_iters=10000, tol=tol),
        "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=tol),
        "hk3": pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization),
        "hk5": pg.HeatKernel(t=5, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization),
        "hk7": pg.HeatKernel(t=7, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization),
    }

    tuned = {"selected": pg.AlgorithmSelection(algorithms.values(), fraction_of_training=0.8)}
    loader = pg.load_datasets_all_communities(datasets, min_group_size=50)
    pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.AUC, fraction_of_training=.8, seed=list(range(1))),
                       decimals=3, delimiter=" & ", end_line="\\\\")
    loader = pg.load_datasets_all_communities(datasets, min_group_size=50)
    pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.Modularity, sensitive=pg.pRule, fraction_of_training=.8, seed=list(range(1))),
                       decimals=3, delimiter=" & ", end_line="\\\\")
    mistreatment = lambda known_scores, sensitive_signal, exclude: \
        pg.AM([pg.Disparity([pg.TPR(known_scores, exclude=1 - (1 - exclude.np) * sensitive_signal.np),
                             pg.TPR(known_scores, exclude=1 - (1 - exclude.np) * (1 - sensitive_signal.np))]),
               pg.Disparity([pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * sensitive_signal.np),
                             pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * (1 - sensitive_signal.np))])])
    loader = pg.load_datasets_all_communities(datasets, min_group_size=50)
    pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.Modularity, sensitive=mistreatment, fraction_of_training=.8, seed=list(range(1))),
                       decimals=3, delimiter=" & ", end_line="\\\\")
Пример #2
0
def test_autorefs():
    """
    Tests that different (base) algorithms yield different citations, that all citations have at least one
    reference to a publication and that wrapping the same base algorithms yields the same citations.
    """
    pre = pg.preprocessor(assume_immutability=True, normalization="symmetric")
    algs = {
        "ppr.85": pg.PageRank(.85, preprocessor=pre, tol=1.E-9,
                              max_iters=1000),
        "ppr.99": pg.PageRank(.99, preprocessor=pre, tol=1.E-9,
                              max_iters=1000),
        "hk3": pg.HeatKernel(3, preprocessor=pre, tol=1.E-9, max_iters=1000),
        "hk5": pg.HeatKernel(5, preprocessor=pre, tol=1.E-9, max_iters=1000),
        "hk5'": pg.HeatKernel(5, preprocessor=pre, tol=1.E-9, max_iters=1000),
    }
    algs = algs | pg.create_variations(
        algs, {
            "+Sweep": pg.Sweep,
            "+SO": pg.SeedOversampling,
            "+BSO": pg.BoostedSeedOversampling
        })
    citations = set()
    for alg in algs.values():
        citation = alg.cite()
        assert "\\cite{" in citation
        citations.add(citation)
    assert len(citations) == len(algs) - 4
Пример #3
0
def test_one_community_benchmarks():
    pg.load_backend("numpy")
    datasets = ["graph9", "bigraph"]
    pre = pg.preprocessor(assume_immutability=True, normalization="symmetric")
    algorithms = {
        "ppr0.85":
        pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=1.E-9),
        "ppr0.99":
        pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=1.E-9),
        "hk3":
        pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=1.E-9),
        "hk5":
        pg.HeatKernel(t=5, preprocessor=pre, max_iters=10000, tol=1.E-9),
        "tuned":
        pg.ParameterTuner(preprocessor=pre, max_iters=10000, tol=1.E-9),
    }
    # algorithms = benchmark.create_variations(algorithms, {"": pg.Tautology, "+SO": pg.SeedOversampling})
    # loader = pg.load_datasets_one_community(datasets)
    # pg.benchmark(algorithms, loader, "time", verbose=True)

    loader = pg.load_datasets_one_community(datasets)
    pg.benchmark_print(
        pg.benchmark_average(
            pg.benchmark_ranks(
                pg.benchmark(algorithms,
                             loader,
                             pg.AUC,
                             fraction_of_training=.8))))
Пример #4
0
def test_completion():
    graph = next(pg.load_datasets_graph(["graph9"]))
    for _ in supported_backends():
        pg.PageRank().rank(graph)
        pg.HeatKernel().rank(graph)
        pg.AbsorbingWalks().rank(graph)
        pg.HeatKernel().rank(graph)
        assert True
Пример #5
0
def test_multigroup_benchmarks():
    datasets = ["bigraph"]
    pre = pg.preprocessor(assume_immutability=True, normalization="symmetric")
    tol = 1.E-9
    optimization = pg.SelfClearDict()
    algorithms = {
        "ppr0.85":
        pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=tol),
        "ppr0.9":
        pg.PageRank(alpha=0.9, preprocessor=pre, max_iters=10000, tol=tol),
        "ppr0.99":
        pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=tol),
        "hk3":
        pg.HeatKernel(t=3,
                      preprocessor=pre,
                      max_iters=10000,
                      tol=tol,
                      optimization_dict=optimization),
        "hk5":
        pg.HeatKernel(t=5,
                      preprocessor=pre,
                      max_iters=10000,
                      tol=tol,
                      optimization_dict=optimization),
        "hk7":
        pg.HeatKernel(t=7,
                      preprocessor=pre,
                      max_iters=10000,
                      tol=tol,
                      optimization_dict=optimization),
    }

    tuned = {
        "selected":
        pg.AlgorithmSelection(algorithms.values(), fraction_of_training=0.8)
    }
    loader = pg.load_datasets_multiple_communities(datasets, min_group_size=50)
    pg.benchmark_print(pg.benchmark(
        algorithms | tuned,
        loader,
        lambda ground_truth, exclude: pg.MultiSupervised(
            pg.AUC, ground_truth, exclude),
        fraction_of_training=.8,
        seed=list(range(1))),
                       decimals=3,
                       delimiter=" & ",
                       end_line="\\\\")
    loader = pg.load_datasets_multiple_communities(datasets, min_group_size=50)
    pg.benchmark_print(pg.benchmark(algorithms | tuned,
                                    loader,
                                    pg.Modularity,
                                    sensitive=pg.pRule,
                                    fraction_of_training=.8,
                                    seed=list(range(1))),
                       decimals=3,
                       delimiter=" & ",
                       end_line="\\\\")
Пример #6
0
def test_completion():
    graph = next(pg.load_datasets_graph(["graph9"]))
    for _ in supported_backends():
        pg.PageRank().rank(graph)
        pg.PageRank(normalization="both").rank(graph)
        pg.HeatKernel().rank(graph)
        pg.AbsorbingWalks().rank(graph)
        pg.SymmetricAbsorbingRandomWalks().rank(graph)
        pg.HeatKernel().rank(graph)
        assert True
Пример #7
0
def test_filter_citations():
    assert pg.PageRank().cite() != pg.GraphFilter().cite()
    assert pg.HeatKernel().cite() != pg.GraphFilter().cite()
    assert pg.AbsorbingWalks().cite() != pg.GraphFilter().cite()
    assert pg.HeatKernel().cite() != pg.GraphFilter().cite()
    assert pg.PageRank(alpha=0.85).cite() != pg.PageRank(alpha=0.99).cite()
    assert pg.HeatKernel(krylov_dims=0).cite() != pg.HeatKernel(krylov_dims=5).cite()
    assert pg.HeatKernel(coefficient_type="taylor").cite() != pg.HeatKernel(coefficient_type="chebyshev").cite()
    assert pg.HeatKernel(optimization_dict=dict()).cite() != pg.HeatKernel(optimization_dict=None).cite()
Пример #8
0
def test_autotune():
    _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"]))
    group = groups[0]
    training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5)
    auc1 = pg.AUC(evaluation, exclude=training)(pg.PageRank().rank(training))
    auc2 = pg.AUC(evaluation, exclude=training)(pg.HeatKernel().rank(training))
    auc3 = pg.AUC(evaluation, exclude=training)(pg.ParameterTuner(optimization_dict=dict()).rank(training))
    assert min(auc1, auc2) <= auc3 and max(auc1, auc2)*0.9 <= auc3
Пример #9
0
def test_explicit_citations():
    assert "unknown node ranking algorithm" == pg.NodeRanking().cite()
    assert "with parameters tuned \cite{krasanakis2021pygrank}" in pg.ParameterTuner(
        lambda params: pg.PageRank(params[0])).cite()
    assert "Postprocessor" in pg.Postprocessor().cite()
    assert pg.PageRank().cite() in pg.AlgorithmSelection().cite()
    assert "krasanakis2021pygrank" in pg.ParameterTuner().cite()
    assert "ortega2018graph" in pg.ParameterTuner().cite()
    assert pg.HeatKernel().cite() in pg.SeedOversampling(pg.HeatKernel()).cite()
    assert pg.AbsorbingWalks().cite() in pg.BoostedSeedOversampling(pg.AbsorbingWalks()).cite()
    assert "krasanakis2018venuerank" in pg.BiasedKernel(converge_to_eigenvectors=True).cite()
    assert "yu2021chebyshev" in pg.HeatKernel(coefficient_type="chebyshev").cite()
    assert "susnjara2015accelerated" in pg.HeatKernel(krylov_dims=5).cite()
    assert "krasanakis2021pygrank" in pg.GenericGraphFilter(optimization_dict=dict()).cite()
    assert "tautology" in pg.Tautology().cite()
    assert pg.PageRank().cite() == pg.Tautology(pg.PageRank()).cite()
    assert "mabs" in pg.MabsMaintain(pg.PageRank()).cite()
    assert "max normalization" in pg.Normalize(pg.PageRank()).cite()
    assert "[0,1] range" in pg.Normalize(pg.PageRank(), "range").cite()
    assert "ordinal" in pg.Ordinals(pg.PageRank()).cite()
    assert "exp" in pg.Transformer(pg.PageRank()).cite()
    assert "0.5" in pg.Threshold(pg.PageRank(), 0.5).cite()
    assert "andersen2007local" in pg.Sweep(pg.PageRank()).cite()
    assert pg.HeatKernel().cite() in pg.Sweep(pg.PageRank(), pg.HeatKernel()).cite()
    assert "LFPRO" in pg.AdHocFairness("O").cite()
    assert "LFPRO" in pg.AdHocFairness(pg.PageRank(), "LFPRO").cite()
    assert "multiplicative" in pg.AdHocFairness(pg.PageRank(), "B").cite()
    assert "multiplicative" in pg.AdHocFairness(pg.PageRank(), "mult").cite()
    assert "tsioutsiouliklis2020fairness" in pg.AdHocFairness().cite()
    assert "rahman2019fairwalk" in pg.FairWalk(pg.PageRank()).cite()
    assert "krasanakis2020prioredit" in pg.FairPersonalizer(pg.PageRank()).cite()
Пример #10
0
def test_auc_ndcg_compliance():
    _, graph, group = next(pg.load_datasets_one_community(["bigraph"]))
    training, test = pg.split(group, 0.5)
    for _ in supported_backends():
        scores1 = pg.PageRank()(graph, training)
        scores2 = pg.HeatKernel()(graph, training)
        AUC1 = pg.AUC(test, exclude=training)(scores1)
        AUC2 = pg.AUC(test, exclude=training)(scores2)
        NDCG1 = float(pg.NDCG(test, exclude=training)(scores1))
        NDCG2 = float(pg.NDCG(test, exclude=training)(scores2))
        assert (AUC1 < AUC2) == (NDCG1 < NDCG2)
        with pytest.raises(Exception):
            pg.AUC(test, exclude=test, k=len(graph) + 1)(scores2)
        with pytest.raises(Exception):
            pg.NDCG(test, exclude=training, k=len(graph) + 1)(scores2)
Пример #11
0
def test_krylov_space_oversampling():
    # this demonstrates a highly complicated setting
    _, graph, community = next(pg.load_datasets_one_community(["bigraph"]))
    algorithm = pg.HeatKernel(
        t=5,  # the number of hops away HeatKernel places maximal importance on
        krylov_dims=5,
        normalization="symmetric",
        renormalize=True)
    for _ in supported_backends():
        personalization = {node: 1. for node in list(community)[:10]}
        oversampling = pg.SeedOversampling(algorithm)
        pg.Normalize(oversampling)(graph, personalization)
        measure = pg.Conductance()
        assert measure(pg.Normalize(algorithm)(
            graph, personalization)) >= measure(
                pg.Normalize(oversampling)(graph, personalization)) - 5.E-6
Пример #12
0
import pygrank as pg

#datasets = ["acm", "amazon", "ant", "citeseer","dblp","facebook0","facebook686","log4j","maven","pubmed","squirel", "twitter"]
datasets = [
    "facebook0", "facebook686", "log4j", "ant", "eucore", "citeseer", "dblp"
]
seed_fractions = [0.3, 0.5]
pre = pg.preprocessor(assume_immutability=True, normalization="symmetric")

filters = {
    "ppr0.85":
    pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=1.E-6),
    "ppr0.99":
    pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=1.E-6),
    "hk3":
    pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=1.E-6),
    "hk7":
    pg.HeatKernel(t=7, preprocessor=pre, max_iters=10000, tol=1.E-6),
}
filters = pg.create_variations(filters, {"": pg.Tautology, "+Sweep": pg.Sweep})

for name, filter in filters.items():
    print("=====", name, "=====")
    algorithms = {
        "None":
        filter,
        "Mult":
        pg.AdHocFairness(filter, "B"),
        "LFPRO":
        pg.AdHocFairness(filter, "O"),
        #"FBuck-C": pg.FairPersonalizer(filter, .8, pRule_weight=10, max_residual=1, error_type=pg.Mabs, parameter_buckets=0),
Пример #13
0
import pygrank as pg

datasets = ["friendster"]
pre = pg.preprocessor(assume_immutability=True,
                      normalization="symmetric")  # common preprocessor
algs = {
    "ppr.85": pg.PageRank(.85, preprocessor=pre),
    "ppr.99": pg.PageRank(.99, preprocessor=pre, max_iters=1000),
    "hk3": pg.HeatKernel(3, preprocessor=pre),
    "hk5": pg.HeatKernel(5, preprocessor=pre),
    "tuned": pg.ParameterTuner(preprocessor=pre)
}
loader = pg.load_datasets_one_community(datasets)
pg.benchmark_print(pg.benchmark(algs, loader, pg.AUC, fraction_of_training=.5))
Пример #14
0
import pygrank as pg

datasets = ["amazon", "citeseer", "maven"]
community_size = 500

pre = pg.preprocessor(assume_immutability=True, normalization="symmetric")
convergence = {"tol": 1.E-9, "max_iters": 10000}
#convergence = {"error_type": "iters", "max_iters": 41}

algorithms = {
    "ppr0.5": pg.PageRank(alpha=0.5, preprocessor=pre, **convergence),
    "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, **convergence),
    "ppr0.9": pg.PageRank(alpha=0.9, preprocessor=pre, **convergence),
    "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, **convergence),
    "hk2": pg.HeatKernel(t=2, preprocessor=pre, **convergence),
    "hk3": pg.HeatKernel(t=3, preprocessor=pre, **convergence),
    "hk5": pg.HeatKernel(t=5, preprocessor=pre, **convergence),
    "hk7": pg.HeatKernel(t=7, preprocessor=pre, **convergence),
}

postprocessor = pg.Tautology
algorithms = pg.benchmarks.create_variations(algorithms, postprocessor)
measure = pg.AUC
optimization = pg.SelfClearDict()


def create_param_tuner(optimizer=pg.optimize):
    return pg.ParameterTuner(lambda params:
                              pg.Normalize(
                                  postprocessor(
                                      pg.GenericGraphFilter([1]+params,
Пример #15
0
def test_filter_invalid_parameters():
    graph = next(pg.load_datasets_graph(["graph5"]))
    with pytest.raises(Exception):
        pg.HeatKernel(normalization="unknown").rank(graph)
    with pytest.raises(Exception):
        pg.HeatKernel(coefficient_type="unknown").rank(graph)
Пример #16
0
import pygrank as pg
_, graph, community = next(pg.load_datasets_one_community(["EUCore"]))
algorithm = pg.HeatKernel(
    t=5,  # the number of hops away HeatKernel places maximal importance on
    normalization="symmetric",
    renormalize=True)
personalization = {node: 1.
                   for node in community}  # ignored nodes assumed to be zeroes
algorithms = {
    "HK5": algorithm,
    "HK5+Oversampling": pg.SeedOversampling(algorithm)
}
algorithms = algorithms | pg.create_variations(algorithms,
                                               {"+Sweep": pg.Sweep})
algorithms = pg.create_variations(algorithms, {"": pg.Normalize})

measure = pg.Conductance()
for algorithm_name, algorithm in algorithms.items():
    scores = algorithm(graph,
                       personalization)  # returns a dict-like pg.GraphSignal
    print(algorithm_name, measure(scores))
Пример #17
0
def test_filter_as_postprocessor():
    assert isinstance(pg.HeatKernel() >> pg.PageRank(normalization="salsa"),
                      pg.PageRank)
Пример #18
0
datasets = ["eucore", "citeseer", "blockmodel"]
#datasets = ["maven"]
pre = pg.preprocessor(assume_immutability=True, normalization="symmetric")
tol = 1.E-9
optimization = pg.SelfClearDict()
algorithms = {
    "ppr0.85":
    pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=tol),
    "ppr0.9":
    pg.PageRank(alpha=0.9, preprocessor=pre, max_iters=10000, tol=tol),
    "ppr0.99":
    pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=tol),
    "hk3":
    pg.HeatKernel(t=3,
                  preprocessor=pre,
                  max_iters=10000,
                  tol=tol,
                  optimization_dict=optimization),
    "hk5":
    pg.HeatKernel(t=5,
                  preprocessor=pre,
                  max_iters=10000,
                  tol=tol,
                  optimization_dict=optimization),
    "hk7":
    pg.HeatKernel(t=7,
                  preprocessor=pre,
                  max_iters=10000,
                  tol=tol,
                  optimization_dict=optimization),
}
Пример #19
0
import pygrank as pg
datasets = ["EUCore", "Amazon"]
pre = pg.preprocessor(assume_immutability=True, normalization="symmetric")
algs = {
    "ppr.85": pg.PageRank(.85, preprocessor=pre, tol=1.E-9, max_iters=1000),
    "ppr.99": pg.PageRank(.99, preprocessor=pre, tol=1.E-9, max_iters=1000),
    "hk3": pg.HeatKernel(3, preprocessor=pre, tol=1.E-9, max_iters=1000),
    "hk5": pg.HeatKernel(5, preprocessor=pre, tol=1.E-9, max_iters=1000),
}

algs = algs | pg.create_variations(algs, {"+Sweep": pg.Sweep})
loader = pg.load_datasets_one_community(datasets)
algs["tuned"] = pg.ParameterTuner(preprocessor=pre, tol=1.E-9, max_iters=1000)
algs["selected"] = pg.AlgorithmSelection(
    pg.create_demo_filters(preprocessor=pre, tol=1.E-9,
                           max_iters=1000).values())
algs["tuned+Sweep"] = pg.ParameterTuner(
    ranker_generator=lambda params: pg.Sweep(
        pg.GenericGraphFilter(
            params, preprocessor=pre, tol=1.E-9, max_iters=1000)))

for alg in algs.values():
    print(alg.cite())  # prints a list of algorithm citations

pg.benchmark_print(pg.benchmark(algs, loader, pg.AUC, fraction_of_training=.5),
                   delimiter=" & ",
                   end_line="\\\\")