示例#1
0
def test_threshold():
    _, graph, group = next(pg.load_datasets_one_community(["bigraph"]))
    for _ in supported_backends():
        training, evaluation = pg.split(list(group), training_samples=0.5)
        cond1 = pg.Conductance().evaluate(
            pg.Threshold(pg.Sweep(pg.PageRank())).rank(
                graph, {v: 1
                        for v in training}))
        cond2 = pg.Conductance().evaluate(
            pg.Threshold("gap").transform(pg.PageRank().rank(
                graph, {v: 1
                        for v in training})))  # try all api types
        assert cond1 <= cond2
示例#2
0
def evaluate(graph, algorithm):
    tprs = list()
    ppvs = list()
    f1s = list()
    aucs = list()
    for node in list(graph):
        neighbors = list(graph.neighbors(node))
        if len(neighbors) < 10:
            continue
        training = pg.to_signal(graph, {node: 1})
        test = pg.to_signal(graph, {neighbor: 1 for neighbor in neighbors})
        for neighbor in random.sample(neighbors, 1):
            assert graph.has_edge(node, neighbor)
            graph.remove_edge(node, neighbor)
            assert not graph.has_edge(node, neighbor)
            assert not graph.has_edge(neighbor, node)
        result = (training >> algorithm) * (1 - training)
        aucs.append(pg.AUC(test, exclude=training)(result))
        top = result >> pg.Top(10) >> pg.Threshold()
        prec = pg.PPV(test, exclude=training)(top)
        rec = pg.TPR(test, exclude=training)(top)
        ppvs.append(prec)
        tprs.append(rec)
        f1s.append(pg.safe_div(2 * prec * rec, prec + rec))
        for neighbor in graph.neighbors(node):
            if not graph.has_edge(node, neighbor):
                graph.add_edge(node, neighbor)
        print(
            f"\r{algorithm.cite()}\t AUC {sum(aucs) / len(aucs):.3f}\t f1 {sum(f1s) / len(f1s):.3f}\t prec {sum(ppvs) / len(ppvs):.3f}\t rec {sum(tprs)/len(tprs):.3f}\t",
            end="")
    print()
示例#3
0
def test_explicit_citations():
    assert "unknown node ranking algorithm" == pg.NodeRanking().cite()
    assert "with parameters tuned \cite{krasanakis2021pygrank}" in pg.ParameterTuner(
        lambda params: pg.PageRank(params[0])).cite()
    assert "Postprocessor" in pg.Postprocessor().cite()
    assert pg.PageRank().cite() in pg.AlgorithmSelection().cite()
    assert "krasanakis2021pygrank" in pg.ParameterTuner().cite()
    assert "ortega2018graph" in pg.ParameterTuner().cite()
    assert pg.HeatKernel().cite() in pg.SeedOversampling(pg.HeatKernel()).cite()
    assert pg.AbsorbingWalks().cite() in pg.BoostedSeedOversampling(pg.AbsorbingWalks()).cite()
    assert "krasanakis2018venuerank" in pg.BiasedKernel(converge_to_eigenvectors=True).cite()
    assert "yu2021chebyshev" in pg.HeatKernel(coefficient_type="chebyshev").cite()
    assert "susnjara2015accelerated" in pg.HeatKernel(krylov_dims=5).cite()
    assert "krasanakis2021pygrank" in pg.GenericGraphFilter(optimization_dict=dict()).cite()
    assert "tautology" in pg.Tautology().cite()
    assert pg.PageRank().cite() == pg.Tautology(pg.PageRank()).cite()
    assert "mabs" in pg.MabsMaintain(pg.PageRank()).cite()
    assert "max normalization" in pg.Normalize(pg.PageRank()).cite()
    assert "[0,1] range" in pg.Normalize(pg.PageRank(), "range").cite()
    assert "ordinal" in pg.Ordinals(pg.PageRank()).cite()
    assert "exp" in pg.Transformer(pg.PageRank()).cite()
    assert "0.5" in pg.Threshold(pg.PageRank(), 0.5).cite()
    assert "andersen2007local" in pg.Sweep(pg.PageRank()).cite()
    assert pg.HeatKernel().cite() in pg.Sweep(pg.PageRank(), pg.HeatKernel()).cite()
    assert "LFPRO" in pg.AdHocFairness("O").cite()
    assert "LFPRO" in pg.AdHocFairness(pg.PageRank(), "LFPRO").cite()
    assert "multiplicative" in pg.AdHocFairness(pg.PageRank(), "B").cite()
    assert "multiplicative" in pg.AdHocFairness(pg.PageRank(), "mult").cite()
    assert "tsioutsiouliklis2020fairness" in pg.AdHocFairness().cite()
    assert "rahman2019fairwalk" in pg.FairWalk(pg.PageRank()).cite()
    assert "krasanakis2020prioredit" in pg.FairPersonalizer(pg.PageRank()).cite()
示例#4
0
def test_threshold():
    _, graph, group = next(pg.load_datasets_one_community(["bigraph"]))
    for _ in supported_backends():
        training, evaluation = pg.split(list(group), training_samples=0.5)
        algorithm = pg.PageRank()
        cond1 = pg.Conductance().evaluate(
            pg.Threshold(pg.Sweep(algorithm),
                         "gap").rank(graph, {v: 1
                                             for v in training}))
        cond2 = pg.Conductance().evaluate(
            pg.Threshold(0.3).transform(
                algorithm.rank(graph,
                               {v: 1
                                for v in training})))  # try all api types
        cond3 = pg.Conductance().evaluate(
            pg.Threshold(1).transform(
                algorithm.rank(
                    graph,
                    {v: 1
                     for v in training})))  # should yield infinite conductance
        # TODO: find an algorithm other than gap to outperform 0.2 threshold too
        assert cond1 <= cond2
        assert cond2 <= cond3
示例#5
0
def test_postprocessor_citations():
    assert pg.Tautology(pg.PageRank()).cite() == pg.PageRank().cite()
    assert pg.Normalize(pg.PageRank()).cite() != pg.PageRank().cite()
    assert pg.Normalize(pg.PageRank(), "sum").cite() != pg.Normalize(pg.PageRank(), "range").cite()
    assert pg.Ordinals(pg.PageRank()).cite() != pg.Normalize(pg.PageRank(), "sum").cite()
    assert pg.Transformer(pg.PageRank()).cite() != pg.PageRank().cite()
    assert pg.Threshold(pg.PageRank()).cite() != pg.PageRank().cite()
    assert pg.Sweep(pg.PageRank()).cite() != pg.PageRank().cite()
    assert pg.BoostedSeedOversampling(pg.PageRank()).cite() != pg.PageRank().cite()
    assert pg.SeedOversampling(pg.PageRank()).cite() != pg.PageRank().cite()
    assert pg.SeedOversampling(pg.PageRank(), method="safe").cite() \
           != pg.SeedOversampling(pg.PageRank(), method="top").cite()
    assert pg.BoostedSeedOversampling(pg.PageRank(), objective="partial").cite() \
           != pg.BoostedSeedOversampling(pg.PageRank(), objective="naive").cite()
    assert pg.BoostedSeedOversampling(pg.PageRank(), oversample_from_iteration="previous").cite() \
           != pg.BoostedSeedOversampling(pg.PageRank(), oversample_from_iteration="original").cite()
示例#6
0
 def rank(self,
          graph: pg.GraphSignalGraph = None,
          personalization: pg.GraphSignalData = None,
          **kwargs):
     personalization = pg.to_signal(graph, personalization)
     graph = personalization.graph
     ranks = self.ranker(personalization)
     ret = 0
     total_sum = pg.sum(ranks)
     accum_sum = 0
     for threshold in sorted(ranks.values()):
         accum_sum += threshold
         if accum_sum > total_sum * 0.1:
             break
     for i, v in enumerate(ranks):
         pg.utils.log(f"{i}/{len(ranks)}")
         if ranks[v] >= threshold:
             partial = ranks >> pg.Threshold(ranks[v],
                                             inclusive=True) >> self.ranker
             ret = partial * ranks[v] + ret
     return ret
示例#7
0
def overlapping_community_detection(graph, known_members, top=None):
    graph_filter = pg.PageRank(
        0.9) if len(known_members) < 50 else pg.ParameterTuner().tune(
            graph, known_members)
    ranks = pg.to_signal(graph,
                         {v: 1
                          for v in known_members
                          }) >> pg.Sweep(graph_filter) >> pg.Normalize("range")
    if top is not None:
        ranks = ranks * (1 - pg.to_signal(graph, {v: 1
                                                  for v in known_members})
                         )  # set known member scores to zero
        return sorted(list(graph), key=lambda node: -ranks[node]
                      )[:top]  # return specific number of top predictions

    threshold = pg.optimize(max_vals=[1],
                            loss=lambda p: pg.Conductance(graph)
                            (pg.Threshold(p[0]).transform(ranks)))[0]
    known_members = set(known_members)
    return [
        v for v in graph if ranks[v] > threshold and v not in known_members
    ]