def test_all_communities_benchmarks(): datasets = ["bigraph"] pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") tol = 1.E-9 optimization = pg.SelfClearDict() algorithms = { "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=tol), "ppr0.9": pg.PageRank(alpha=0.9, preprocessor=pre, max_iters=10000, tol=tol), "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=tol), "hk3": pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), "hk5": pg.HeatKernel(t=5, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), "hk7": pg.HeatKernel(t=7, preprocessor=pre, max_iters=10000, tol=tol, optimization_dict=optimization), } tuned = {"selected": pg.AlgorithmSelection(algorithms.values(), fraction_of_training=0.8)} loader = pg.load_datasets_all_communities(datasets, min_group_size=50) pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.AUC, fraction_of_training=.8, seed=list(range(1))), decimals=3, delimiter=" & ", end_line="\\\\") loader = pg.load_datasets_all_communities(datasets, min_group_size=50) pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.Modularity, sensitive=pg.pRule, fraction_of_training=.8, seed=list(range(1))), decimals=3, delimiter=" & ", end_line="\\\\") mistreatment = lambda known_scores, sensitive_signal, exclude: \ pg.AM([pg.Disparity([pg.TPR(known_scores, exclude=1 - (1 - exclude.np) * sensitive_signal.np), pg.TPR(known_scores, exclude=1 - (1 - exclude.np) * (1 - sensitive_signal.np))]), pg.Disparity([pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * sensitive_signal.np), pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * (1 - sensitive_signal.np))])]) loader = pg.load_datasets_all_communities(datasets, min_group_size=50) pg.benchmark_print(pg.benchmark(algorithms | tuned, loader, pg.Modularity, sensitive=mistreatment, fraction_of_training=.8, seed=list(range(1))), decimals=3, delimiter=" & ", end_line="\\\\")
def rank(self, graph, personalization, sensitive, *args, **kwargs): original_ranks = self.ranker(graph, personalization, *args, sensitive=sensitive, **kwargs) base_ranks = original_ranks if self.ranker == self.base_ranker else self.base_ranker( graph, personalization, *args, **kwargs) training_objective = pg.AM()\ .add(pg.L2(base_ranks), weight=-1.)\ .add(pg.pRule(tf.cast(sensitive.np, tf.float32)), weight=10., max_val=0.8) with pg.Backend("tensorflow"): ranks_var = tf.Variable(pg.to_array(original_ranks.np)) optimizer = tf.keras.optimizers.Adam(learning_rate=0.1) best_loss = float('inf') best_ranks = None for epoch in range(2000): with tf.GradientTape() as tape: ranks = pg.to_signal(original_ranks, ranks_var) loss = -training_objective( ranks) #+ 1.E-5*tf.reduce_sum(ranks_var*ranks_var) grads = tape.gradient(loss, [ranks_var]) optimizer.apply_gradients(zip(grads, [ranks_var])) validation_loss = loss if validation_loss < best_loss: patience = 100 best_ranks = ranks best_loss = validation_loss patience -= 1 if patience == 0: break return best_ranks
def train_model(self, graph, personalization, sensitive, *args, **kwargs): original_ranks = self.ranker(graph, personalization, *args, **kwargs) #pretrained_ranks = None if self.pretrainer is None else self.pretrainer(graph, personalization, *args, sensitive=sensitive, **kwargs) features = tf.concat([ tf.reshape(personalization.np, (-1, 1)), tf.reshape(original_ranks.np, (-1, 1)), tf.reshape(sensitive.np, (-1, 1)) ], axis=1) training_objective = pg.AM()\ .add(pg.L2(tf.cast(original_ranks.np, tf.float32)), weight=1.)\ .add(pg.pRule(tf.cast(sensitive.np, tf.float32)), max_val=0.8, weight=-10.) model = self.model() with pg.Backend("tensorflow"): best_loss = float('inf') best_ranks = None optimizer = tf.keras.optimizers.Adam(learning_rate=0.01) #degrade = 1 for epoch in range(5000): with tf.GradientTape() as tape: personalization = pg.to_signal(personalization, model(features)) #personalization.np = tf.nn.relu(personalization.np*2-1) ranks = self.ranker(graph, personalization, *args, **kwargs) loss = training_objective(ranks) for var in model.trainable_variables: loss = loss + 1.E-5 * tf.reduce_sum(var * var) #loss = loss * degrade grads = tape.gradient(loss, model.trainable_variables) #degrade *= 0.9 optimizer.apply_gradients(zip(grads, model.trainable_variables)) validation_loss = training_objective(ranks) if validation_loss < best_loss: patience = 10 best_ranks = ranks best_loss = validation_loss print("epoch", epoch, "loss", validation_loss, "prule", pg.pRule(tf.cast(sensitive.np, tf.float32))(ranks)) patience -= 1 if patience == 0: break return best_ranks
def test_aggregated(): y1 = [1, 1, 0] y2 = [1, 0, 0] y3 = [1, 1, 0] for _ in supported_backends(): # TODO: investigate why not exactly the same always (numerical precision should be lower for numpy) epsilon = 1.E-6 assert abs( float(pg.GM().add(pg.AUC(y1), max_val=0.5).add( pg.AUC(y2), min_val=0.9).evaluate(y3)) - 0.45**0.5) < epsilon assert abs( float(pg.AM().add(pg.AUC(y1), max_val=0.5).add( pg.AUC(y2), min_val=0.9).evaluate(y3)) - 0.7) < epsilon assert abs( float(pg.Disparity().add(pg.AUC(y1), max_val=0.5).add( pg.AUC(y2), min_val=0.9).evaluate(y3)) - 0.4) < epsilon assert abs( float(pg.Disparity().add(pg.AUC(y1), max_val=0.5).add( pg.AUC(y2), min_val=0.9).evaluate(y3)) + float(pg.Parity().add(pg.AUC(y1), max_val=0.5).add( pg.AUC(y2), min_val=0.9).evaluate(y3) - 1)) < epsilon
def test_fair_personalizer_mistreatment(): H = pg.PageRank(assume_immutability=True, normalization="symmetric") algorithms = { "Base": lambda G, p, s: H.rank(G, p), "FairPersMistreat": pg.Normalize(pg.FairPersonalizer(H, parity_type="mistreatment", pRule_weight=10)), "FairPersTPR": pg.Normalize(pg.FairPersonalizer(H, parity_type="TPR", pRule_weight=10)), "FairPersTNR": pg.Normalize(pg.FairPersonalizer(H, parity_type="TNR", pRule_weight=-1)) # TNR optimization increases mistreatment for this example } mistreatment = lambda known_scores, sensitive_signal, exclude: \ pg.AM([pg.Disparity([pg.TPR(known_scores, exclude=1 - (1 - exclude) * sensitive_signal), pg.TPR(known_scores, exclude=1 - (1 - exclude) * (1 - sensitive_signal))]), pg.Disparity([pg.TNR(known_scores, exclude=1 - (1 - exclude) * sensitive_signal), pg.TNR(known_scores, exclude=1 - (1 - exclude) * (1 - sensitive_signal))])]) _, graph, groups = next(pg.load_datasets_multiple_communities(["synthfeats"])) labels = pg.to_signal(graph, groups[0]) sensitive = pg.to_signal(graph, groups[1]) train, test = pg.split(labels) # TODO: maybe try to check for greater improvement base_mistreatment = mistreatment(test, sensitive, train)(algorithms["Base"](graph, train, sensitive)) for algorithm in algorithms.values(): if algorithm != algorithms["Base"]: print(algorithm.cite()) assert base_mistreatment >= mistreatment(test, sensitive, train)(algorithm(graph, train, sensitive))
.8, pRule_weight=10, max_residual=1, error_type=pg.Mabs, error_skewing=False, parameter_buckets=1, parity_type="impact") #"FFfix-C": pg.FairTradeoff(filter, .8, pRule_weight=10, error_type=pg.Mabs) #"FairTf": pg.FairnessTf(filter) } algorithms = pg.create_variations(algorithms, {"": pg.Normalize}) #import cProfile as profile #pr = profile.Profile() #pr.enable() mistreatment = lambda known_scores, sensitive_signal, exclude: \ pg.AM([pg.Disparity([pg.TPR(known_scores, exclude=1-(1-exclude.np)*sensitive_signal.np), pg.TPR(known_scores, exclude=1-(1-exclude.np)*(1-sensitive_signal.np))]), pg.Disparity([pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * sensitive_signal.np), pg.TNR(known_scores, exclude=1 - (1 - exclude.np) * (1 - sensitive_signal.np))])]) pg.benchmark_print(pg.benchmark(algorithms, pg.load_datasets_multiple_communities( datasets, max_group_number=2), metric=pg.AUC, sensitive=pg.pRule, fraction_of_training=seed_fractions), delimiter=" & ", end_line="\\\\") #pr.disable() #pr.dump_stats('profile.pstat')