示例#1
0
def test_all_communities_benchmarks():
    datasets = ["bigraph"]
    pre = pg.preprocessor(assume_immutability=True, normalization="symmetric")
    tol = 1.E-9
    optimization = pg.SelfClearDict()
    algorithms = {
        "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=tol),
        "ppr0.9": pg.PageRank(alpha=0.9, preprocessor=pre, max_iters=10000, tol=tol),
        "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=tol),
        "hk3": pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization),
        "hk5": pg.HeatKernel(t=5, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization),
        "hk7": pg.HeatKernel(t=7, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization),
    }

    tuned = {"selected": pg.AlgorithmSelection(algorithms.values(), fraction_of_training=0.8)}
    loader = pg.load_datasets_all_communities(datasets, min_group_size=50)
    pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.AUC, fraction_of_training=.8, seed=list(range(1))),
                       decimals=3, delimiter=" & ", end_line="\\\\")
    loader = pg.load_datasets_all_communities(datasets, min_group_size=50)
    pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.Modularity, sensitive=pg.pRule, fraction_of_training=.8, seed=list(range(1))),
                       decimals=3, delimiter=" & ", end_line="\\\\")
    mistreatment = lambda known_scores, sensitive_signal, exclude: \
        pg.AM([pg.Disparity([pg.TPR(known_scores, exclude=1 - (1 - exclude.np) * sensitive_signal.np),
                             pg.TPR(known_scores, exclude=1 - (1 - exclude.np) * (1 - sensitive_signal.np))]),
               pg.Disparity([pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * sensitive_signal.np),
                             pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * (1 - sensitive_signal.np))])])
    loader = pg.load_datasets_all_communities(datasets, min_group_size=50)
    pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.Modularity, sensitive=mistreatment, fraction_of_training=.8, seed=list(range(1))),
                       decimals=3, delimiter=" & ", end_line="\\\\")
示例#2
0
    def rank(self, graph, personalization, sensitive, *args, **kwargs):
        original_ranks = self.ranker(graph,
                                     personalization,
                                     *args,
                                     sensitive=sensitive,
                                     **kwargs)
        base_ranks = original_ranks if self.ranker == self.base_ranker else self.base_ranker(
            graph, personalization, *args, **kwargs)
        training_objective = pg.AM()\
            .add(pg.L2(base_ranks), weight=-1.)\
            .add(pg.pRule(tf.cast(sensitive.np, tf.float32)), weight=10., max_val=0.8)

        with pg.Backend("tensorflow"):
            ranks_var = tf.Variable(pg.to_array(original_ranks.np))
            optimizer = tf.keras.optimizers.Adam(learning_rate=0.1)
            best_loss = float('inf')
            best_ranks = None
            for epoch in range(2000):
                with tf.GradientTape() as tape:
                    ranks = pg.to_signal(original_ranks, ranks_var)
                    loss = -training_objective(
                        ranks)  #+ 1.E-5*tf.reduce_sum(ranks_var*ranks_var)
                grads = tape.gradient(loss, [ranks_var])
                optimizer.apply_gradients(zip(grads, [ranks_var]))
                validation_loss = loss
                if validation_loss < best_loss:
                    patience = 100
                    best_ranks = ranks
                    best_loss = validation_loss
                patience -= 1
                if patience == 0:
                    break
        return best_ranks
示例#3
0
    def train_model(self, graph, personalization, sensitive, *args, **kwargs):
        original_ranks = self.ranker(graph, personalization, *args, **kwargs)
        #pretrained_ranks = None if self.pretrainer is None else self.pretrainer(graph, personalization, *args, sensitive=sensitive, **kwargs)
        features = tf.concat([
            tf.reshape(personalization.np, (-1, 1)),
            tf.reshape(original_ranks.np, (-1, 1)),
            tf.reshape(sensitive.np, (-1, 1))
        ],
                             axis=1)
        training_objective = pg.AM()\
            .add(pg.L2(tf.cast(original_ranks.np, tf.float32)), weight=1.)\
            .add(pg.pRule(tf.cast(sensitive.np, tf.float32)), max_val=0.8, weight=-10.)
        model = self.model()
        with pg.Backend("tensorflow"):
            best_loss = float('inf')
            best_ranks = None
            optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

            #degrade = 1
            for epoch in range(5000):
                with tf.GradientTape() as tape:
                    personalization = pg.to_signal(personalization,
                                                   model(features))
                    #personalization.np = tf.nn.relu(personalization.np*2-1)
                    ranks = self.ranker(graph, personalization, *args,
                                        **kwargs)
                    loss = training_objective(ranks)
                    for var in model.trainable_variables:
                        loss = loss + 1.E-5 * tf.reduce_sum(var * var)
                    #loss = loss * degrade
                grads = tape.gradient(loss, model.trainable_variables)
                #degrade *= 0.9
                optimizer.apply_gradients(zip(grads,
                                              model.trainable_variables))
                validation_loss = training_objective(ranks)
                if validation_loss < best_loss:
                    patience = 10
                    best_ranks = ranks
                    best_loss = validation_loss
                    print("epoch", epoch, "loss", validation_loss, "prule",
                          pg.pRule(tf.cast(sensitive.np, tf.float32))(ranks))
                patience -= 1
                if patience == 0:
                    break
        return best_ranks
示例#4
0
def test_aggregated():
    y1 = [1, 1, 0]
    y2 = [1, 0, 0]
    y3 = [1, 1, 0]
    for _ in supported_backends():
        # TODO: investigate why not exactly the same always (numerical precision should be lower for numpy)
        epsilon = 1.E-6
        assert abs(
            float(pg.GM().add(pg.AUC(y1), max_val=0.5).add(
                pg.AUC(y2), min_val=0.9).evaluate(y3)) - 0.45**0.5) < epsilon
        assert abs(
            float(pg.AM().add(pg.AUC(y1), max_val=0.5).add(
                pg.AUC(y2), min_val=0.9).evaluate(y3)) - 0.7) < epsilon
        assert abs(
            float(pg.Disparity().add(pg.AUC(y1), max_val=0.5).add(
                pg.AUC(y2), min_val=0.9).evaluate(y3)) - 0.4) < epsilon
        assert abs(
            float(pg.Disparity().add(pg.AUC(y1), max_val=0.5).add(
                pg.AUC(y2), min_val=0.9).evaluate(y3)) +
            float(pg.Parity().add(pg.AUC(y1), max_val=0.5).add(
                pg.AUC(y2), min_val=0.9).evaluate(y3) - 1)) < epsilon
示例#5
0
def test_fair_personalizer_mistreatment():
    H = pg.PageRank(assume_immutability=True, normalization="symmetric")
    algorithms = {
        "Base": lambda G, p, s: H.rank(G, p),
        "FairPersMistreat": pg.Normalize(pg.FairPersonalizer(H, parity_type="mistreatment", pRule_weight=10)),
        "FairPersTPR": pg.Normalize(pg.FairPersonalizer(H, parity_type="TPR", pRule_weight=10)),
        "FairPersTNR": pg.Normalize(pg.FairPersonalizer(H, parity_type="TNR", pRule_weight=-1))  # TNR optimization increases mistreatment for this example
    }
    mistreatment = lambda known_scores, sensitive_signal, exclude: \
        pg.AM([pg.Disparity([pg.TPR(known_scores, exclude=1 - (1 - exclude) * sensitive_signal),
                             pg.TPR(known_scores, exclude=1 - (1 - exclude) * (1 - sensitive_signal))]),
               pg.Disparity([pg.TNR(known_scores, exclude=1 - (1 - exclude) * sensitive_signal),
                             pg.TNR(known_scores, exclude=1 - (1 - exclude) * (1 - sensitive_signal))])])
    _, graph, groups = next(pg.load_datasets_multiple_communities(["synthfeats"]))
    labels = pg.to_signal(graph, groups[0])
    sensitive = pg.to_signal(graph, groups[1])
    train, test = pg.split(labels)
    # TODO: maybe try to check for greater improvement
    base_mistreatment = mistreatment(test, sensitive, train)(algorithms["Base"](graph, train, sensitive))
    for algorithm in algorithms.values():
        if algorithm != algorithms["Base"]:
            print(algorithm.cite())
            assert base_mistreatment >= mistreatment(test, sensitive, train)(algorithm(graph, train, sensitive))
示例#6
0
                            .8,
                            pRule_weight=10,
                            max_residual=1,
                            error_type=pg.Mabs,
                            error_skewing=False,
                            parameter_buckets=1,
                            parity_type="impact")
        #"FFfix-C": pg.FairTradeoff(filter, .8, pRule_weight=10, error_type=pg.Mabs)
        #"FairTf": pg.FairnessTf(filter)
    }
    algorithms = pg.create_variations(algorithms, {"": pg.Normalize})

    #import cProfile as profile
    #pr = profile.Profile()
    #pr.enable()
    mistreatment = lambda known_scores, sensitive_signal, exclude: \
        pg.AM([pg.Disparity([pg.TPR(known_scores, exclude=1-(1-exclude.np)*sensitive_signal.np),
                             pg.TPR(known_scores, exclude=1-(1-exclude.np)*(1-sensitive_signal.np))]),
               pg.Disparity([pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * sensitive_signal.np),
                             pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * (1 - sensitive_signal.np))])])
    pg.benchmark_print(pg.benchmark(algorithms,
                                    pg.load_datasets_multiple_communities(
                                        datasets, max_group_number=2),
                                    metric=pg.AUC,
                                    sensitive=pg.pRule,
                                    fraction_of_training=seed_fractions),
                       delimiter=" & ",
                       end_line="\\\\")

    #pr.disable()
    #pr.dump_stats('profile.pstat')