Пример #1
0
def test_appnp_tf():
    from tensorflow.keras.layers import Dropout, Dense
    from tensorflow.keras.regularizers import L2

    class APPNP(tf.keras.Sequential):
        def __init__(self, num_inputs, num_outputs, hidden=64):
            super().__init__([
                Dropout(0.5, input_shape=(num_inputs,)),
                Dense(hidden, activation="relu", kernel_regularizer=L2(1.E-5)),
                Dropout(0.5),
                Dense(num_outputs, activation="relu")])
            self.ranker = pg.ParameterTuner(
                lambda par: pg.GenericGraphFilter([par[0] ** i for i in range(int(10))],
                                                  error_type="iters", max_iters=int(10)),
                max_vals=[0.95], min_vals=[0.5], verbose=False,
                measure=pg.Mabs, deviation_tol=0.1, tuning_backend="numpy")

        def call(self, features, graph, training=False):
            predict = super().call(features, training=training)
            propagate = self.ranker.propagate(graph, predict, graph_dropout=0.5 if training else 0)
            return tf.nn.softmax(propagate, axis=1)

    graph, features, labels = pg.load_feature_dataset('synthfeats')
    training, test = pg.split(list(range(len(graph))), 0.8)
    training, validation = pg.split(training, 1 - 0.2 / 0.8)
    model = APPNP(features.shape[1], labels.shape[1])
    with pg.Backend('tensorflow'):  # pygrank computations in tensorflow backend
        graph = pg.preprocessor(renormalize=True, cors=True)(graph)  # cors = use in many backends
        pg.gnn_train(model, features, graph, labels, training, validation,
                     optimizer=tf.optimizers.Adam(learning_rate=0.01), verbose=True, epochs=50)
        assert float(pg.gnn_accuracy(labels, model(features, graph), test)) == 1.  # dataset is super-easy to predict
Пример #2
0
def test_gnn_errors():
    graph, features, labels = pg.load_feature_dataset('synthfeats')
    training, test = pg.split(list(range(len(graph))), 0.8)
    training, validation = pg.split(training, 1 - 0.2 / 0.8)

    from tensorflow.keras.layers import Dropout, Dense
    from tensorflow.keras.regularizers import L2

    class APPNP(tf.keras.Sequential):
        def __init__(self, num_inputs, num_outputs, hidden=64):
            super().__init__([
                Dropout(0.5, input_shape=(num_inputs,)),
                Dense(hidden, activation="relu", kernel_regularizer=L2(1.E-5)),
                Dropout(0.5),
                Dense(num_outputs, activation="relu")])
            self.ranker = pg.PageRank(0.9, renormalize=True, assume_immutability=True,
                                      use_quotient=False, error_type="iters", max_iters=10)  # 10 iterations

        def call(self, features, graph, training=False):
            predict = super().call(features, training=training)
            propagate = self.ranker.propagate(graph, predict, graph_dropout=0.5 if training else 0)
            return tf.nn.softmax(propagate, axis=1)

    model = APPNP(features.shape[1], labels.shape[1])
    with pytest.raises(Exception):
        pg.gnn_train(model, graph, features, labels, training, validation, test=test, epochs=2)
    pg.load_backend('tensorflow')
    pg.gnn_train(model, features, graph, labels, training, validation, test=test, epochs=300, patience=2)
    predictions = model(features, graph)
    pg.load_backend('numpy')
    with pytest.raises(Exception):
        pg.gnn_accuracy(labels, predictions, test)
Пример #3
0
def test_appnp_torch():
    graph, features, labels = pg.load_feature_dataset('synthfeats')
    training, test = pg.split(list(range(len(graph))), 0.8)
    training, validation = pg.split(training, 1 - 0.2 / 0.8)

    class AutotuneAPPNP(torch.nn.Module):
        def __init__(self, num_inputs, num_outputs, hidden=64):
            super().__init__()
            self.layer1 = torch.nn.Linear(num_inputs, hidden)
            self.layer2 = torch.nn.Linear(hidden, num_outputs)
            self.activation = torch.nn.ReLU()
            self.dropout = torch.nn.Dropout(0.5)
            self.num_outputs = num_outputs
            self.ranker = pg.PageRank(0.9,
                                      renormalize=True,
                                      assume_immutability=True,
                                      error_type="iters",
                                      max_iters=10)

        def forward(self, inputs, training=False):
            graph, features = inputs
            predict = self.dropout(torch.FloatTensor(features))
            predict = self.dropout(self.activation(self.layer1(predict)))
            predict = self.activation(self.layer2(predict))
            predict = self.ranker.propagate(
                graph, predict, graph_dropout=0.5 if training else 0)
            ret = torch.nn.functional.softmax(predict, dim=1)
            self.loss = 0
            for param in self.layer1.parameters():
                self.loss = self.loss + 1E-5 * torch.norm(param)
            return ret

    def init_weights(m):
        if isinstance(m, torch.nn.Linear):
            torch.nn.init.xavier_uniform_(m.weight)
            m.bias.data.fill_(0.01)

    pg.load_backend('pytorch')
    model = AutotuneAPPNP(features.shape[1], labels.shape[1])
    model.apply(init_weights)
    pg.gnn_train(model,
                 graph,
                 features,
                 labels,
                 training,
                 validation,
                 epochs=50,
                 patience=2)
    # TODO: higher numbers fail only on github actions - for local tests it is fine
    assert float(pg.gnn_accuracy(labels, model([graph, features]),
                                 test)) >= 0.2
    pg.load_backend('numpy')
Пример #4
0
def test_sweep_streaming():
    _, graph, group = next(pg.load_datasets_one_community(["bigraph"]))
    for _ in supported_backends():
        training, evaluation = pg.split(list(group), training_samples=0.1)
        auc1 = pg.AUC({v: 1
                       for v in evaluation}, exclude=training).evaluate(
                           (pg.PageRank() >> pg.Sweep()).rank(
                               graph, {v: 1
                                       for v in training}))
        auc2 = pg.AUC({v: 1
                       for v in evaluation},
                      exclude=training).evaluate(pg.PageRank().rank(
                          graph, {v: 1
                                  for v in training}))
        auc3 = pg.AUC(
            {v: 1
             for v in evaluation}, exclude=training).evaluate(
                 pg.PageRank() >> pg.Transformer(pg.log) >> pg.LinearSweep()
                 | pg.to_signal(graph, {v: 1
                                        for v in training}))
        assert auc1 > auc2
        assert abs(auc1 - auc3) < pg.epsilon()

    with pytest.raises(Exception):
        pg.Sweep() << "a"
Пример #5
0
def test_seed_oversampling():
    _, graph, group = next(pg.load_datasets_one_community(["graph9"]))
    for _ in supported_backends():
        training, evaluation = pg.split(list(group), training_samples=2)
        training, evaluation = pg.to_signal(graph,
                                            {v: 1
                                             for v in training}), pg.to_signal(
                                                 graph,
                                                 {v: 1
                                                  for v in evaluation})
        for measure in [pg.NDCG, pg.AUC]:
            ranks = pg.PageRank(0.9, max_iters=1000).rank(graph, training)
            base_result = measure(evaluation, training).evaluate(ranks)
            ranks = pg.SeedOversampling(pg.PageRank(0.9, max_iters=1000)).rank(
                graph, training)
            so_result = measure(evaluation, training).evaluate(ranks)
            bso_result = measure(evaluation, training).evaluate(
                pg.BoostedSeedOversampling(pg.PageRank(0.9,
                                                       max_iters=1000)).rank(
                                                           graph, training))
            assert float(base_result) <= float(so_result)
            assert float(so_result) <= float(bso_result)
        pg.SeedOversampling(pg.PageRank(0.99, max_iters=1000),
                            "top").rank(graph, training)
        pg.SeedOversampling(pg.PageRank(0.99, max_iters=1000),
                            "neighbors").rank(graph, training)
        pg.BoostedSeedOversampling(pg.PageRank(max_iters=1000),
                                   'naive',
                                   oversample_from_iteration='original').rank(
                                       graph, {"A": 1})
Пример #6
0
def test_algorithm_selection():
    for _ in supported_backends():
        _, graph, communities = next(
            pg.load_datasets_multiple_communities(["bigraph"],
                                                  max_group_number=3))
        train, test = pg.split(communities,
                               0.05)  # 5% of community members are known
        algorithms = pg.create_variations(pg.create_demo_filters(),
                                          pg.Normalize)

        supervised_algorithm = pg.AlgorithmSelection(algorithms.values(),
                                                     measure=pg.AUC,
                                                     tuning_backend="numpy")
        print(supervised_algorithm.cite())
        modularity_algorithm = pg.AlgorithmSelection(
            algorithms.values(),
            fraction_of_training=1,
            measure=pg.Modularity().as_supervised_method(),
            tuning_backend="numpy")

        supervised_aucs = list()
        modularity_aucs = list()
        for seeds, members in zip(train.values(), test.values()):
            measure = pg.AUC(members, exclude=seeds)
            supervised_aucs.append(measure(supervised_algorithm(graph, seeds)))
            modularity_aucs.append(measure(modularity_algorithm(graph, seeds)))

        assert abs(
            sum(supervised_aucs) / len(supervised_aucs) -
            sum(modularity_aucs) / len(modularity_aucs)) < 0.05
Пример #7
0
def test_lowpass_tuning():
    _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"]))
    group = groups[0]
    training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.1)
    auc1 = pg.AUC(evaluation, exclude=training)(pg.ParameterTuner(lambda params: pg.GenericGraphFilter(params)).rank(training))
    auc2 = pg.AUC(evaluation, exclude=training)(pg.ParameterTuner(lambda params: pg.LowPassRecursiveGraphFilter(params)).rank(training))
    assert auc2 > auc1*0.8
Пример #8
0
def test_hoptuner_autorgression():
    _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"]))
    group = groups[0]
    training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.01)
    auc1 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(measure=pg.AUC).rank(training))
    auc3 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(measure=pg.AUC, autoregression=5).rank(training))
    assert auc3 > auc1*0.9
Пример #9
0
def test_hoptuner_explicit_algorithm():
    _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"]))
    group = groups[0]
    training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5)
    auc1 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(lambda params: pg.GenericGraphFilter(params, krylov_dims=10), basis="arnoldi", measure=pg.AUC).rank(training))
    auc2 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(basis="arnoldi", krylov_dims=10, measure=pg.AUC).rank(training))
    assert abs(auc1-auc2) < 0.005
Пример #10
0
def test_autotune_methods():
    import numpy as np
    _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"]))
    group = groups[0]
    training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}))
    aucs = [pg.AUC(evaluation, exclude=training)(ranker.rank(training)) for ranker in pg.create_demo_filters().values()]
    auc2 = pg.AUC(evaluation, exclude=training)(pg.AlgorithmSelection().rank(training))
    assert max(aucs)-np.std(aucs) <= auc2
Пример #11
0
def test_autotune_manual():
    _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"]))
    group = groups[0]
    training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5)
    auc1 = pg.AUC(evaluation, exclude=training)(pg.PageRank().rank(training))
    alg2 = pg.ParameterTuner(lambda params: pg.PageRank(params[0]), max_vals=[0.99], min_vals=[0.5]).tune(training)
    auc2 = pg.AUC(evaluation, exclude=training)(alg2.rank(training))
    assert auc1 <= auc2
Пример #12
0
def test_autotune():
    _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"]))
    group = groups[0]
    training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5)
    auc1 = pg.AUC(evaluation, exclude=training)(pg.PageRank().rank(training))
    auc2 = pg.AUC(evaluation, exclude=training)(pg.HeatKernel().rank(training))
    auc3 = pg.AUC(evaluation, exclude=training)(pg.ParameterTuner(optimization_dict=dict()).rank(training))
    assert min(auc1, auc2) <= auc3 and max(auc1, auc2)*0.9 <= auc3
Пример #13
0
def test_hoptuner_arnoldi_backends():
    _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"]))
    group = groups[0]
    training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5)
    auc1 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(basis="arnoldi", measure=pg.AUC).rank(training))
    auc2 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(basis="arnoldi", measure=pg.AUC, tuning_backend="pytorch").rank(training))
    auc3 = pg.AUC(evaluation, exclude=training)(pg.HopTuner(basis="arnoldi", measure=pg.AUC, tuning_backend="tensorflow").rank(training))
    assert auc1 == auc2
    assert auc1 == auc3
Пример #14
0
def test_chebyshev():
    _, graph, group = next(pg.load_datasets_one_community(["bigraph"]))
    #  do not test with tensorflow, as it can be too slow
    training, evaluation = pg.split(pg.to_signal(graph, {v: 1 for v in group}))
    tuned_auc = pg.AUC(evaluation, training).evaluate(pg.ParameterTuner().rank(
        graph, training))
    tuned_chebyshev_auc = pg.AUC(evaluation, training).evaluate(
        pg.ParameterTuner(coefficient_type="chebyshev").rank(graph, training))
    assert (tuned_auc - tuned_chebyshev_auc) < 0.1
Пример #15
0
def test_appnp_tf():
    graph, features, labels = pg.load_feature_dataset('synthfeats')
    training, test = pg.split(list(range(len(graph))), 0.8)
    training, validation = pg.split(training, 1 - 0.2 / 0.8)

    class APPNP(tf.keras.Sequential):
        def __init__(self, num_inputs, num_outputs, hidden=64):
            super().__init__([
                tf.keras.layers.Dropout(0.5, input_shape=(num_inputs, )),
                tf.keras.layers.Dense(
                    hidden,
                    activation=tf.nn.relu,
                    kernel_regularizer=tf.keras.regularizers.L2(1.E-5)),
                tf.keras.layers.Dropout(0.5),
                tf.keras.layers.Dense(num_outputs, activation=tf.nn.relu),
            ])
            self.ranker = pg.PageRank(0.9,
                                      renormalize=True,
                                      assume_immutability=True,
                                      error_type="iters",
                                      max_iters=10)
            self.input_spec = None  # prevents some versions of tensorflow from checking call inputs

        def call(self, inputs, training=False):
            graph, features = inputs
            predict = super().call(features, training=training)
            predict = self.ranker.propagate(
                graph, predict, graph_dropout=0.5 if training else 0)
            return tf.nn.softmax(predict, axis=1)

    pg.load_backend('tensorflow')
    model = APPNP(features.shape[1], labels.shape[1])
    pg.gnn_train(model,
                 graph,
                 features,
                 labels,
                 training,
                 validation,
                 test=test,
                 epochs=50)
    assert float(pg.gnn_accuracy(labels, model([graph, features]),
                                 test)) >= 0.5
    pg.load_backend('numpy')
Пример #16
0
def test_autotune_backends():
    _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"]))
    group = groups[0]
    training, evaluation = pg.split(pg.to_signal(G, {v: 1 for v in group}), training_samples=0.5)
    for tuner in [pg.HopTuner, pg.AlgorithmSelection, pg.ParameterTuner]:
        auc3 = pg.AUC(evaluation, exclude=training)(tuner(measure=pg.KLDivergence, tuning_backend="pytorch").rank(training))
        auc2 = pg.AUC(evaluation, exclude=training)(tuner(measure=pg.KLDivergence, tuning_backend="tensorflow").rank(training))
        auc1 = pg.AUC(evaluation, exclude=training)(tuner(measure=pg.KLDivergence).rank(training))
        # TODO: maybe fix KLDivergence implementation to not be affected by backend.epsilon()
        assert abs(auc1-auc2) < 0.005  # different results due to different backend.epsilon()
        assert abs(auc1-auc3) < 0.005
Пример #17
0
def test_hoptuner_arnoldi():
    _, G, groups = next(pg.load_datasets_multiple_communities(["bigraph"]))
    group = groups[0]
    training, evaluation = pg.split(pg.to_signal(G, {v: 1
                                                     for v in group}),
                                    training_samples=0.5)
    auc1 = pg.AUC(evaluation,
                  exclude=training)(pg.HopTuner(measure=pg.AUC).rank(training))
    auc2 = pg.AUC(evaluation,
                  exclude=training)(pg.HopTuner(basis="arnoldi",
                                                measure=pg.AUC).rank(training))
    assert abs(auc1 - auc2) < 0.005
Пример #18
0
def test_appnp_torch():
    graph, features, labels = pg.load_feature_dataset('synthfeats')
    training, test = pg.split(list(range(len(graph))), 0.8)
    training, validation = pg.split(training, 1 - 0.2 / 0.8)

    class AutotuneAPPNP(torch.nn.Module):
        def __init__(self, num_inputs, num_outputs, hidden=64):
            super().__init__()
            self.layer1 = torch.nn.Linear(num_inputs, hidden)
            self.layer2 = torch.nn.Linear(hidden, num_outputs)
            self.activation = torch.nn.ReLU()
            self.dropout = torch.nn.Dropout(0.5)
            self.num_outputs = num_outputs
            self.ranker = pg.ParameterTuner(
                lambda par: pg.GenericGraphFilter([par[0] ** i for i in range(int(10))],
                                                  error_type="iters", max_iters=int(10)),
                max_vals=[0.95], min_vals=[0.5], verbose=False,
                measure=pg.Mabs, deviation_tol=0.1, tuning_backend="numpy")

        def forward(self, features, graph, training=False):
            predict = self.dropout(torch.FloatTensor(features))
            predict = self.dropout(self.activation(self.layer1(predict)))
            predict = self.activation(self.layer2(predict))
            predict = self.ranker.propagate(graph, predict, graph_dropout=0.5 if training else 0)
            ret = torch.nn.functional.softmax(predict, dim=1)
            self.loss = 0
            for param in self.layer1.parameters():
                self.loss = self.loss + 1E-5*torch.norm(param)
            return ret

    def init_weights(m):
        if isinstance(m, torch.nn.Linear):
            torch.nn.init.xavier_uniform_(m.weight)
            m.bias.data.fill_(0.01)

    model = AutotuneAPPNP(features.shape[1], labels.shape[1])
    graph = pg.preprocessor(renormalize=True, cors=True)(graph)
    model.apply(init_weights)
    with pg.Backend('pytorch'):
        pg.gnn_train(model, features, graph, labels, training, validation, epochs=50)
Пример #19
0
def test_threshold():
    _, graph, group = next(pg.load_datasets_one_community(["bigraph"]))
    for _ in supported_backends():
        training, evaluation = pg.split(list(group), training_samples=0.5)
        cond1 = pg.Conductance().evaluate(
            pg.Threshold(pg.Sweep(pg.PageRank())).rank(
                graph, {v: 1
                        for v in training}))
        cond2 = pg.Conductance().evaluate(
            pg.Threshold("gap").transform(pg.PageRank().rank(
                graph, {v: 1
                        for v in training})))  # try all api types
        assert cond1 <= cond2
Пример #20
0
def test_strange_input_types():
    _, graph, group = next(pg.load_datasets_one_community(["bigraph"]))
    training, test = pg.split(group)
    for _ in supported_backends():
        scores = pg.PageRank()(graph, {v: 1 for v in training})
        ndcg = pg.NDCG(pg.to_signal(scores, {v: 1
                                             for v in test}),
                       k=3)({v: scores[v]
                             for v in scores})
        ndcg_biased = pg.NDCG(pg.to_signal(scores, {v: 1
                                                    for v in test}),
                              k=3)({v: scores[v]
                                    for v in test})
        assert ndcg < ndcg_biased
Пример #21
0
def test_split():
    data = {
        "community1": ["A", "B", "C", "D"],
        "community2": ["B", "E", "F", "G", "H", "I"]
    }
    training, test = pg.split(data, 1)
    assert training == test
    training, test = pg.split(data, 0.5)
    assert len(training["community2"]) == 3
    assert len(training["community1"]) == 2
    assert len(test["community2"]) == 3
    assert len(set(training["community1"]) - set(test["community1"])) == len(
        training["community1"])
    assert len(set(training["community2"]) - set(test["community2"])) == len(
        training["community2"])
    training, test = pg.split(data, 2)
    assert len(training["community2"]) == 2
    assert len(test["community1"]) == 2
    training, test = pg.split(data["community1"], 0.75)
    assert len(training) == 3
    assert len(test) == 1
    training, test = pg.split(set(data["community1"]), 0.75)
    assert len(training) == 3
    assert len(test) == 1
Пример #22
0
def test_auc_ndcg_compliance():
    _, graph, group = next(pg.load_datasets_one_community(["bigraph"]))
    training, test = pg.split(group, 0.5)
    for _ in supported_backends():
        scores1 = pg.PageRank()(graph, training)
        scores2 = pg.HeatKernel()(graph, training)
        AUC1 = pg.AUC(test, exclude=training)(scores1)
        AUC2 = pg.AUC(test, exclude=training)(scores2)
        NDCG1 = float(pg.NDCG(test, exclude=training)(scores1))
        NDCG2 = float(pg.NDCG(test, exclude=training)(scores2))
        assert (AUC1 < AUC2) == (NDCG1 < NDCG2)
        with pytest.raises(Exception):
            pg.AUC(test, exclude=test, k=len(graph) + 1)(scores2)
        with pytest.raises(Exception):
            pg.NDCG(test, exclude=training, k=len(graph) + 1)(scores2)
Пример #23
0
def test_sweep():
    _, graph, group = next(pg.load_datasets_one_community(["bigraph"]))
    for _ in supported_backends():
        training, evaluation = pg.split(list(group), training_samples=0.1)
        auc1 = pg.AUC({v: 1
                       for v in evaluation}, exclude=training).evaluate(
                           pg.Sweep(pg.PageRank()).rank(
                               graph, {v: 1
                                       for v in training}))
        auc2 = pg.AUC({v: 1
                       for v in evaluation},
                      exclude=training).evaluate(pg.PageRank().rank(
                          graph, {v: 1
                                  for v in training}))
        assert auc1 > auc2
Пример #24
0
def test_seed_top():
    _, graph, group = next(pg.load_datasets_one_community(["bigraph"]))
    for _ in supported_backends():
        training, evaluation = pg.split(list(group), training_samples=2)
        original_training = set(training)
        from random import random, seed
        seed(0)
        training, evaluation = pg.to_signal(graph, {v: 1 for v in graph if v in original_training or random() < 0.5}), \
                               pg.to_signal(graph, {v: 1 for v in evaluation})
        for measure in [pg.AUC, pg.NDCG]:
            #ranks = pg.PageRank(0.9, max_iters=1000).rank(graph, training)
            #base_result = measure(evaluation, list(original_training)).evaluate(ranks)
            ranks = pg.Top(pg.Sweep(pg.PageRank(0.9, max_iters=1000)),
                           0.9).rank(graph, training)
            undersampled_result1 = measure(
                evaluation, list(original_training)).evaluate(ranks)
            ranks = pg.Top(2, pg.Sweep(pg.PageRank(0.9, max_iters=1000))).rank(
                graph, training)
            undersampled_result2 = measure(
                evaluation, list(original_training)).evaluate(ranks)
Пример #25
0
def test_threshold():
    _, graph, group = next(pg.load_datasets_one_community(["bigraph"]))
    for _ in supported_backends():
        training, evaluation = pg.split(list(group), training_samples=0.5)
        algorithm = pg.PageRank()
        cond1 = pg.Conductance().evaluate(
            pg.Threshold(pg.Sweep(algorithm),
                         "gap").rank(graph, {v: 1
                                             for v in training}))
        cond2 = pg.Conductance().evaluate(
            pg.Threshold(0.3).transform(
                algorithm.rank(graph,
                               {v: 1
                                for v in training})))  # try all api types
        cond3 = pg.Conductance().evaluate(
            pg.Threshold(1).transform(
                algorithm.rank(
                    graph,
                    {v: 1
                     for v in training})))  # should yield infinite conductance
        # TODO: find an algorithm other than gap to outperform 0.2 threshold too
        assert cond1 <= cond2
        assert cond2 <= cond3
Пример #26
0
def test_fair_personalizer_mistreatment():
    H = pg.PageRank(assume_immutability=True, normalization="symmetric")
    algorithms = {
        "Base": lambda G, p, s: H.rank(G, p),
        "FairPersMistreat": pg.Normalize(pg.FairPersonalizer(H, parity_type="mistreatment", pRule_weight=10)),
        "FairPersTPR": pg.Normalize(pg.FairPersonalizer(H, parity_type="TPR", pRule_weight=10)),
        "FairPersTNR": pg.Normalize(pg.FairPersonalizer(H, parity_type="TNR", pRule_weight=-1))  # TNR optimization increases mistreatment for this example
    }
    mistreatment = lambda known_scores, sensitive_signal, exclude: \
        pg.AM([pg.Disparity([pg.TPR(known_scores, exclude=1 - (1 - exclude) * sensitive_signal),
                             pg.TPR(known_scores, exclude=1 - (1 - exclude) * (1 - sensitive_signal))]),
               pg.Disparity([pg.TNR(known_scores, exclude=1 - (1 - exclude) * sensitive_signal),
                             pg.TNR(known_scores, exclude=1 - (1 - exclude) * (1 - sensitive_signal))])])
    _, graph, groups = next(pg.load_datasets_multiple_communities(["synthfeats"]))
    labels = pg.to_signal(graph, groups[0])
    sensitive = pg.to_signal(graph, groups[1])
    train, test = pg.split(labels)
    # TODO: maybe try to check for greater improvement
    base_mistreatment = mistreatment(test, sensitive, train)(algorithms["Base"](graph, train, sensitive))
    for algorithm in algorithms.values():
        if algorithm != algorithms["Base"]:
            print(algorithm.cite())
            assert base_mistreatment >= mistreatment(test, sensitive, train)(algorithm(graph, train, sensitive))
Пример #27
0
nodes = list(graph)
# graph = nx.Graph()
for node in nodes:
    graph.add_node(node)
wordnames = {i: "w"+str(i) for i in range(features.shape[1])}
for i, node in enumerate(nodes):
    for j in range(features.shape[1]):
        if features[i, j] != 0:
            graph.add_edge(node, wordnames[j], weight=features[i, j])

group_lists = list(groups.values())
groups = [pg.to_signal(graph, group) for group in groups.values()]
accs = list()
for seed in range(100):
    ranker = pg.PageRank(0.85, renormalize=True, assume_immutability=True,
                         use_quotient=False, error_type="iters", max_iters=10)  # 10 iterations

    #ranker = pg.LowPassRecursiveGraphFilter([1 - .9 / (pg.log(i + 1) + 1) for i in range(10)], renormalize=True, assume_immutability=True, tol=None)

    training, test = pg.split(nodes, 0.8, seed=seed)
    training = set(training)
    ranks_set = [ranker(graph, {node: 1 for node in group if node in training}) for group in group_lists]
    options = list(range(len(ranks_set)))
    found_set = [list() for _ in training]
    tp = 0
    for v in test:
        if max(options, key=lambda i: ranks_set[i][v]) == max(options, key=lambda i: groups[i][v]):
            tp += 1
    accs.append(tp/len(test))
    print(sum(accs)/len(accs))
Пример #28
0
                         {v: 1
                          for v in known_members
                          }) >> pg.Sweep(graph_filter) >> pg.Normalize("range")
    if top is not None:
        ranks = ranks * (1 - pg.to_signal(graph, {v: 1
                                                  for v in known_members})
                         )  # set known member scores to zero
        return sorted(list(graph), key=lambda node: -ranks[node]
                      )[:top]  # return specific number of top predictions

    threshold = pg.optimize(max_vals=[1],
                            loss=lambda p: pg.Conductance(graph)
                            (pg.Threshold(p[0]).transform(ranks)))[0]
    known_members = set(known_members)
    return [
        v for v in graph if ranks[v] > threshold and v not in known_members
    ]


_, graph, group = next(pg.load_datasets_one_community(["citeseer"]))
print(len(group))
train, test = pg.split(group, 0.1)
found = overlapping_community_detection(graph, train)

# node-based evaluation (we work on the returned list of nodes instead of graph signals)
test = set(test)
TP = len([v for v in found if v in test])
print("Precision", TP / len(found))
print("Recall", TP / len(test))
print("Match size", len(found) / len(test))
Пример #29
0
import pygrank as pg


def community_detection(graph, known_members_set):
    ranks_set = [
        pg.ParameterTuner()(graph, known_members)
        for known_members in known_members_set
    ]
    options = list(range(len(ranks_set)))
    found_set = [list() for _ in known_members_set]
    for v in graph:
        found_set[max(options, key=lambda i: ranks_set[i][v])].append(v)
    return found_set


_, graph, groups = next(pg.load_datasets_multiple_communities(["citeseer"]))
train_set, test_set = pg.split(groups, 0.5)
train_set = train_set.values()
test_set = test_set.values()
found_set = community_detection(graph, train_set)
precisions = list()
recalls = list()
for found, train, test in zip(found_set, train_set, test_set):
    train, test = set(train), set(test)
    new_nodes = [v for v in found if v not in train]
    TP = len([v for v in new_nodes if v in test])
    precisions.append(TP / len(new_nodes) if new_nodes else 0)
    recalls.append(TP / len(test))
print("Avg. precision", sum(precisions) / len(precisions))
print("Avg. recall", sum(recalls) / len(recalls))
Пример #30
0
    def call(self, inputs, training=False):
        graph, features = inputs
        predict = super().call(features, training=training)
        if not training or self.propagate_on_training:
            predict = self.ranker.propagate(
                graph,
                predict,
                graph_dropout=self.graph_dropout if training else 0)

        return tf.nn.softmax(predict, axis=1)


pg.load_backend('numpy')
graph, features, labels = pg.load_feature_dataset('cora')
for seed in range(10):
    training, test = pg.split(list(range(len(graph))), 0.8, seed=seed)
    training, validation = pg.split(training, 1 - 0.2 / 0.8, seed=seed)
    architectures = {
        "APPNP": APPNP(features.shape[1], labels.shape[1], alpha=0.9),
        #"LAPPNP": APPNP(features.shape[1], labels.shape[1], alpha=tf.Variable([0.85])),
        "APFNP": APPNP(features.shape[1], labels.shape[1], alpha="estimated")
    }

    pg.load_backend('tensorflow')
    accs = dict()
    for architecture, model in architectures.items():
        pg.gnn_train(model,
                     graph,
                     features,
                     labels,
                     training,