def _equalize(self, equality_sets): # pylint: disable=R0914 self._is_direction_identified() if self._verbose: words_data = [] for equality_set_index, equality_set_words in enumerate(equality_sets): equality_set_vectors = [ normalize(self[word]) for word in equality_set_words ] center = np.mean(equality_set_vectors, axis=0) (projected_center, rejected_center) = project_reject_vector(center, self.direction) scaling = np.sqrt(1 - np.linalg.norm(rejected_center)**2) for word, vector in zip(equality_set_words, equality_set_vectors): projected_vector = project_vector(vector, self.direction) projected_part = normalize(projected_vector - projected_center) # In the code it is different of Bolukbasi # It behaves the same only for equality_sets # with size of 2 (pairs) - not sure! # However, my code is the same as the article # equalized_vector = rejected_center + scaling * self.direction # https://github.com/tolga-b/debiaswe/blob/10277b23e187ee4bd2b6872b507163ef4198686b/debiaswe/debias.py#L36-L37 # For pairs, projected_part_vector1 == -projected_part_vector2, # and this is the same as # projected_part_vector1 == self.direction equalized_vector = rejected_center + scaling * projected_part update_word_vector(self.model, word, equalized_vector) if self._verbose: words_data.append({ 'equality_set_index': equality_set_index, 'word': word, 'scaling': scaling, 'projected_scalar': vector @ self.direction, 'equalized_projected_scalar': (equalized_vector @ self.direction), }) if self._verbose: print('Equalize Words Data ' '(all equal for 1-dim bias space (direction):') words_data_df = (pd.DataFrame(words_data).set_index( ['equality_set_index', 'word'])) print(tabulate(words_data_df, headers='keys')) self.model.init_sims(replace=True)
def test_project_params(): v = np.array([1, 2, 3]) u = np.array([-4, 5, -6]) (_, projected_vector_v1, rejected_vector_v1) = project_params(v, u) projected_vector_v2, rejected_vector_v2 = project_reject_vector(u, v) np.testing.assert_allclose(projected_vector_v1, projected_vector_v2) np.testing.assert_allclose(rejected_vector_v1, rejected_vector_v2)
def test_project_params(): # pylint: disable=arguments-out-of-order v = np.array([1, 2, 3]) u = np.array([-4, 5, -6]) (_, projected_vector_v1, rejected_vector_v1) = project_params(v, u) projected_vector_v2, rejected_vector_v2 = project_reject_vector(u, v) np.testing.assert_allclose(projected_vector_v1, projected_vector_v2) np.testing.assert_allclose(rejected_vector_v1, rejected_vector_v2)
def test_equalize(gender_biased_w2v_small, is_preforming=True): """Test _equalize method in GenderBiasWE.""" # pylint: disable=line-too-long equality_sets = { tuple(w) for w in gender_biased_w2v_small._data['equalize_pairs'] } equality_sets |= { tuple(w) for w in gender_biased_w2v_small._data['definitional_pairs'] } equality_sets = gender_biased_w2v_small._generate_pair_candidates( equality_sets) if is_preforming: gender_biased_w2v_small._equalize(equality_sets) for equality_set in equality_sets: projection_vectors = [] rejection_vectors = [] for equality_word in equality_set: vector = gender_biased_w2v_small[equality_word] np.testing.assert_allclose(np.linalg.norm(vector), 1, atol=ATOL) (projection_vector, rejection_vector) = project_reject_vector( vector, gender_biased_w2v_small.direction) projection_vectors.append(projection_vector) rejection_vectors.append(rejection_vector) # <e1, d> == -<e2, d> # assuming equality sets of size 2 assert len(projection_vectors) == 2 np.testing.assert_allclose( projection_vectors[0] @ gender_biased_w2v_small.direction, -projection_vectors[1] @ gender_biased_w2v_small.direction, atol=ATOL) # all rejection part is equal for all the vectors for rejection_vector in rejection_vectors[1:]: np.testing.assert_allclose(rejection_vectors[0], rejection_vector, atol=ATOL) check_all_vectors_unit_length(gender_biased_w2v_small)