def test_fair_heuristics(): H = pg.PageRank(assume_immutability=True, normalization="symmetric") algorithms = { "FairO": lambda G, p, s: pg.Normalize(pg.AdHocFairness(H, method="O")).rank(G, sensitive=s), "FairB": lambda G, p, s: pg.Normalize()(pg.AdHocFairness("B").transform(H.rank(G, p), sensitive=s)), "LFPRN": lambda G, p, s: pg.Normalize()(pg.LFPR().rank(G, p, sensitive=s)), "LFPRP": lambda G, p, s: pg.Normalize()(pg.LFPR(redistributor="original").rank(G, p, sensitive=s)), "FairWalk": lambda G, p, s: pg.FairWalk(H).rank(G, p, sensitive=s) } import networkx as nx _, graph, groups = next(pg.load_datasets_multiple_communities(["bigraph"], graph_api=nx)) # TODO: networx needed due to edge weighting by some algorithms labels = pg.to_signal(graph, groups[0]) sensitive = pg.to_signal(graph, groups[1]) for name, algorithm in algorithms.items(): ranks = algorithm(graph, labels, sensitive) if name == "FairWalk": assert pg.pRule(sensitive)(ranks) > 0.6 # TODO: Check why fairwalk fails by that much and increase the limit. else: assert pg.pRule(sensitive)(ranks) > 0.98 sensitive = 1 - sensitive.np for name, algorithm in algorithms.items(): ranks = algorithm(graph, labels, sensitive) if name == "FairWalk": assert pg.pRule(sensitive)(ranks) > 0.6 else: assert pg.pRule(sensitive)(ranks) > 0.98
def test_fair_heuristics(): H = pg.PageRank(assume_immutability=True, normalization="symmetric") algorithms = { "FairO": lambda G, p, s: pg.Normalize(pg.AdHocFairness(H, method="O")).rank( G, sensitive=s), "FairB": lambda G, p, s: pg.Normalize() (pg.AdHocFairness("B").transform(H.rank(G, p), sensitive=s)), "FairWalk": lambda G, p, s: pg.FairWalk(H).rank(G, p, sensitive=s) } _, graph, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) labels = pg.to_signal(graph, groups[0]) sensitive = pg.to_signal(graph, groups[1]) for algorithm in algorithms.values(): ranks = algorithm(graph, labels, sensitive) assert pg.pRule(sensitive)( ranks ) > 0.6 # TODO: Check why fairwalk fails by that much and increase the limit. sensitive = 1 - sensitive.np for algorithm in algorithms.values(): ranks = algorithm(graph, labels, sensitive) assert pg.pRule(sensitive)(ranks) > 0.6
def test_fair_personalizer(): H = pg.PageRank(assume_immutability=True, normalization="symmetric") algorithms = { "FairPers": lambda G, p, s: pg.Normalize( pg.FairPersonalizer(H, error_type=pg.Mabs, max_residual=0)).rank( G, p, sensitive=s), "FairPers-C": lambda G, p, s: pg.Normalize( pg.FairPersonalizer( H, .80, pRule_weight=10, error_type=pg.Mabs, max_residual=0)). rank(G, p, sensitive=s), "FairPersSkew": lambda G, p, s: pg.Normalize( pg.FairPersonalizer(H, error_skewing=True, max_residual=0)).rank( G, p, sensitive=s), "FairPersSkew-C": lambda G, p, s: pg.Normalize( pg.FairPersonalizer( H, .80, error_skewing=True, pRule_weight=10, max_residual=0) ).rank(G, p, sensitive=s), } _, graph, groups = next(pg.load_datasets_multiple_communities(["bigraph"])) labels = pg.to_signal(graph, groups[0]) sensitive = pg.to_signal(graph, groups[1]) for algorithm in algorithms.values(): ranks = algorithm(graph, labels, sensitive) assert pg.pRule(sensitive)( ranks ) > 0.79 # allow a leeway for generalization capabilities compared to 80%
def rank(self, graph, personalization, sensitive, *args, **kwargs): original_ranks = self.ranker(graph, personalization, *args, sensitive=sensitive, **kwargs) base_ranks = original_ranks if self.ranker == self.base_ranker else self.base_ranker( graph, personalization, *args, **kwargs) training_objective = pg.AM()\ .add(pg.L2(base_ranks), weight=-1.)\ .add(pg.pRule(tf.cast(sensitive.np, tf.float32)), weight=10., max_val=0.8) with pg.Backend("tensorflow"): ranks_var = tf.Variable(pg.to_array(original_ranks.np)) optimizer = tf.keras.optimizers.Adam(learning_rate=0.1) best_loss = float('inf') best_ranks = None for epoch in range(2000): with tf.GradientTape() as tape: ranks = pg.to_signal(original_ranks, ranks_var) loss = -training_objective( ranks) #+ 1.E-5*tf.reduce_sum(ranks_var*ranks_var) grads = tape.gradient(loss, [ranks_var]) optimizer.apply_gradients(zip(grads, [ranks_var])) validation_loss = loss if validation_loss < best_loss: patience = 100 best_ranks = ranks best_loss = validation_loss patience -= 1 if patience == 0: break return best_ranks
def train_model(self, graph, personalization, sensitive, *args, **kwargs): original_ranks = self.ranker(graph, personalization, *args, **kwargs) #pretrained_ranks = None if self.pretrainer is None else self.pretrainer(graph, personalization, *args, sensitive=sensitive, **kwargs) features = tf.concat([ tf.reshape(personalization.np, (-1, 1)), tf.reshape(original_ranks.np, (-1, 1)), tf.reshape(sensitive.np, (-1, 1)) ], axis=1) training_objective = pg.AM()\ .add(pg.L2(tf.cast(original_ranks.np, tf.float32)), weight=1.)\ .add(pg.pRule(tf.cast(sensitive.np, tf.float32)), max_val=0.8, weight=-10.) model = self.model() with pg.Backend("tensorflow"): best_loss = float('inf') best_ranks = None optimizer = tf.keras.optimizers.Adam(learning_rate=0.01) #degrade = 1 for epoch in range(5000): with tf.GradientTape() as tape: personalization = pg.to_signal(personalization, model(features)) #personalization.np = tf.nn.relu(personalization.np*2-1) ranks = self.ranker(graph, personalization, *args, **kwargs) loss = training_objective(ranks) for var in model.trainable_variables: loss = loss + 1.E-5 * tf.reduce_sum(var * var) #loss = loss * degrade grads = tape.gradient(loss, model.trainable_variables) #degrade *= 0.9 optimizer.apply_gradients(zip(grads, model.trainable_variables)) validation_loss = training_objective(ranks) if validation_loss < best_loss: patience = 10 best_ranks = ranks best_loss = validation_loss print("epoch", epoch, "loss", validation_loss, "prule", pg.pRule(tf.cast(sensitive.np, tf.float32))(ranks)) patience -= 1 if patience == 0: break return best_ranks
def test_edge_cases(): assert pg.pRule([0])([0]) == 0 assert pg.Cos([0])([0]) == 0 with pytest.raises(Exception): pg.Measure()([0, 1, 0]) with pytest.raises(Exception): pg.AUC([0, 0, 0])([0, 1, 0]) with pytest.raises(Exception): pg.AUC([1, 1, 1])([0, 1, 0]) with pytest.raises(Exception): pg.KLDivergence([0], exclude={"A": 1})([1]) with pytest.raises(Exception): pg.Conductance(next(pg.load_datasets_graph(["graph5"])), max_rank=0.5)([1, 1, 1, 1, 1]) import networkx as nx for _ in supported_backends(): assert pg.Conductance(nx.Graph())([]) == float( "inf") # this is indeed correct in python assert pg.Density(nx.Graph())([]) == 0 assert pg.Modularity(nx.Graph())([]) == 0 assert pg.KLDivergence([0, 1, 0])([0, 1, 0]) == 0 assert pg.MKLDivergence([0, 1, 0])([0, 1, 0]) == 0 assert pg.KLDivergence([0])([-1]) == 0