Exemplo n.º 1
0
    def calc_indirect_bias(self, word1, word2):
        """Calculate the indirect bias between two words.

        Based on the amount of shared projection of the words on the direction.

        Also called PairBias.
        :param str word1: First word
        :param str word2: Second word
        :type c: float or None
        :return The indirect bias between the two words
        """

        self._is_direction_identified()

        vector1 = normalize(self[word1])
        vector2 = normalize(self[word2])

        perpendicular_vector1 = reject_vector(vector1, self.direction)
        perpendicular_vector2 = reject_vector(vector2, self.direction)

        inner_product = vector1 @ vector2
        perpendicular_similarity = cosine_similarity(perpendicular_vector1,
                                                     perpendicular_vector2)

        indirect_bias = ((inner_product - perpendicular_similarity)
                         / inner_product)
        return indirect_bias
Exemplo n.º 2
0
    def _equalize(self, equality_sets):
        # pylint: disable=R0914

        self._is_direction_identified()

        if self._verbose:
            words_data = []

        for equality_set_index, equality_set_words in enumerate(equality_sets):
            equality_set_vectors = [normalize(self[word])
                                    for word in equality_set_words]
            center = np.mean(equality_set_vectors, axis=0)
            (projected_center,
             rejected_center) = project_reject_vector(center,
                                                      self.direction)
            scaling = np.sqrt(1 - np.linalg.norm(rejected_center)**2)

            for word, vector in zip(equality_set_words, equality_set_vectors):
                projected_vector = project_vector(vector, self.direction)

                projected_part = normalize(projected_vector - projected_center)

                # In the code it is different of Bolukbasi
                # It behaves the same only for equality_sets
                # with size of 2 (pairs) - not sure!
                # However, my code is the same as the article
                # equalized_vector = rejected_center + scaling * self.direction
                # https://github.com/tolga-b/debiaswe/blob/10277b23e187ee4bd2b6872b507163ef4198686b/debiaswe/debias.py#L36-L37
                # For pairs, projected_part_vector1 == -projected_part_vector2,
                # and this is the same as
                # projected_part_vector1 == self.direction
                equalized_vector = rejected_center + scaling * projected_part

                update_word_vector(self.model, word, equalized_vector)

                if self._verbose:
                    words_data.append({
                        'equality_set_index': equality_set_index,
                        'word': word,
                        'scaling': scaling,
                        'projected_scalar': vector @ self.direction,
                        'equalized_projected_scalar': (equalized_vector
                                                       @ self.direction),
                    })

        if self._verbose:
            print('Equalize Words Data '
                  '(all equal for 1-dim bias space (direction):')
            words_data_df = (pd.DataFrame(words_data)
                             .set_index(['equality_set_index', 'word']))
            print(tabulate(words_data_df, headers='keys'))

        self.model.init_sims(replace=True)
Exemplo n.º 3
0
    def calc_projection_data(self, words):
        """
        Calculate projection, projected and rejected vectors of a words list.

        :param list words: List of words
        :return: :class:`pandas.DataFrame` of the projection,
                 projected and rejected vectors of the words list
        """
        projection_data = []
        for word in words:
            vector = self[word]
            projection = self.project_on_direction(word)
            normalized_vector = normalize(vector)

            (projection,
             projected_vector,
             rejected_vector) = project_params(normalized_vector,
                                               self.direction)

            projection_data.append({'word': word,
                                    'vector': vector,
                                    'projection': projection,
                                    'projected_vector': projected_vector,
                                    'rejected_vector': rejected_vector})

        return pd.DataFrame(projection_data)
Exemplo n.º 4
0
    def generate_closest_words_indirect_bias(self,
                                             neutral_positive_end,
                                             neutral_negative_end,
                                             words=None, n_extreme=5):
        """
        Generate closest words to a neutral direction and their indirect bias.

        The direction of the neutral words is used to find
        the most extreme words.
        The indirect bias is calculated between the most extreme words
        and the closest end.

        :param str neutral_positive_end: A word that define the positive side
                                         of the neutral direction.
        :param str neutral_negative_end: A word that define the negative side
                                         of the neutral direction.
        :param list words: List of words to project on the neutral direction.
        :param int n_extreme: The number for the most extreme words
                              (positive and negative) to show.
        :return: Data Frame of the most extreme words
                 with their projection scores and indirect biases.
        """

        neutral_direction = normalize(self[neutral_positive_end]
                                      - self[neutral_negative_end])

        vectors = [normalize(self[word]) for word in words]
        df = (pd.DataFrame([{'word': word,
                             'projection': vector @ neutral_direction}
                            for word, vector in zip(words, vectors)])
              .sort_values('projection', ascending=False))

        df = take_two_sides_extreme_sorted(df, n_extreme,
                                           'end',
                                           neutral_positive_end,
                                           neutral_negative_end)

        df['indirect_bias'] = df.apply(lambda r:
                                       self.calc_indirect_bias(r['word'],
                                                               r['end']),
                                       axis=1)

        df = df.set_index(['end', 'word'])
        df = df[['projection', 'indirect_bias']]

        return df
Exemplo n.º 5
0
    def learn_full_specific_words(self, seed_specific_words,
                                  max_non_specific_examples=None, debug=None):
        """Learn specific words given a list of seed specific wordsself.

        Using Linear SVM.

        :param list seed_specific_words: List of seed specific words
        :param int max_non_specific_examples: The number of non-specific words
                                              to sample for training
        :return: List of learned specific words and the classifier object
        """

        if debug is None:
            debug = False

        if max_non_specific_examples is None:
            max_non_specific_examples = MAX_NON_SPECIFIC_EXAMPLES

        data = []
        non_specific_example_count = 0

        for word in self.model.vocab:
            is_specific = word in seed_specific_words

            if not is_specific:
                non_specific_example_count += 1
                if non_specific_example_count <= max_non_specific_examples:
                    data.append((self[word], is_specific))
            else:
                data.append((self[word], is_specific))

        np.random.seed(RANDOM_STATE)
        np.random.shuffle(data)

        X, y = zip(*data)

        X = np.array(X)
        X /= np.linalg.norm(X, axis=1)[:, None]

        y = np.array(y).astype('int')

        clf = LinearSVC(C=1, class_weight='balanced',
                        random_state=RANDOM_STATE)

        clf.fit(X, y)

        full_specific_words = []
        for word in self.model.vocab:
            vector = [normalize(self[word])]
            if clf.predict(vector):
                full_specific_words.append(word)

        if not debug:
            return full_specific_words, clf

        return full_specific_words, clf, X, y
Exemplo n.º 6
0
    def _identify_subspace_by_pca(self, definitional_pairs, n_components):
        matrix = []

        for word1, word2 in definitional_pairs:
            vector1 = normalize(self[word1])
            vector2 = normalize(self[word2])

            center = (vector1 + vector2) / 2

            matrix.append(vector1 - center)
            matrix.append(vector2 - center)

        pca = PCA(n_components=n_components)
        pca.fit(matrix)

        if self._verbose:
            table = enumerate(pca.explained_variance_ratio_, start=1)
            headers = ['Principal Component',
                       'Explained Variance Ratio']
            print(tabulate(table, headers=headers))

        return pca
Exemplo n.º 7
0
    def _identify_direction(self, positive_end, negative_end,
                            definitional, method='pca'):
        if method not in DIRECTION_METHODS:
            raise ValueError('method should be one of {}, {} was given'.format(
                DIRECTION_METHODS, method))

        if positive_end == negative_end:
            raise ValueError('positive_end and negative_end'
                             'should be different, and not the same "{}"'
                             .format(positive_end))
        if self._verbose:
            print('Identify direction using {} method...'.format(method))

        direction = None

        if method == 'single':
            direction = normalize(normalize(self[definitional[0]])
                                  - normalize(self[definitional[1]]))

        elif method == 'sum':
            group1_sum_vector = np.sum([self[word]
                                        for word in definitional[0]], axis=0)
            group2_sum_vector = np.sum([self[word]
                                        for word in definitional[1]], axis=0)

            diff_vector = (normalize(group1_sum_vector)
                           - normalize(group2_sum_vector))

            direction = normalize(diff_vector)

        elif method == 'pca':
            pca = self._identify_subspace_by_pca(definitional, 10)
            if pca.explained_variance_ratio_[0] < FIRST_PC_THRESHOLD:
                raise RuntimeError('The Explained variance'
                                   'of the first principal component should be'
                                   'at least {}, but it is {}'
                                   .format(FIRST_PC_THRESHOLD,
                                           pca.explained_variance_ratio_[0]))
            direction = pca.components_[0]

            # if direction is opposite (e.g. we cannot control
            # what the PCA will return)
            ends_diff_projection = cosine_similarity((self[positive_end]
                                                      - self[negative_end]),
                                                     direction)
            if ends_diff_projection < 0:
                direction = -direction  # pylint: disable=invalid-unary-operand-type

        self.direction = direction
        self.positive_end = positive_end
        self.negative_end = negative_end
Exemplo n.º 8
0
    def plot_most_biased_clustering(biased, debiased,
                                    seed='ends', n_extreme=500,
                                    random_state=1):
        """Plot clustering as classification of biased neutral words.

        :param biased: Biased word embedding of
                       :class:`~ethically.we.bias.BiasWordEmbedding`.
        :param debiased: Debiased word embedding of
                         :class:`~ethically.we.bias.BiasWordEmbedding`.
        :param seed: The definition of the seed vector.
                    Either by a tuple of two word ends,
                    or by `'ends` for the pre-defined ends
                    or by `'direction'` for
                    the pre-defined direction vector.
        :param n_extrem: The number of extreme biased
                         neutral words to use.
        :return: Tuple of list of ax objects of the plot,
                 and a dictionary with the most positive
                 and negative words.

        Based on:

        - Gonen, H., & Goldberg, Y. (2019).
          `Lipstick on a Pig:
          Debiasing Methods Cover up Systematic Gender Biases
          in Word Embeddings But do not Remove
          Them <https://arxiv.org/abs/1903.03862>`_.
          arXiv preprint arXiv:1903.03862.

        - https://github.com/gonenhila/gender_bias_lipstick
        """
        # pylint: disable=protected-access,too-many-locals,line-too-long

        assert biased.positive_end == debiased.positive_end, \
            'Postive ends should be the same.'
        assert biased.negative_end == debiased.negative_end, \
            'Negative ends should be the same.'

        seed_vector, _, _ = get_seed_vector(seed, biased)

        neutral_words = biased._data['neutral_words']
        neutral_word_vectors = (biased[word] for word in neutral_words)
        neutral_word_projections = [(normalize(vector) @ seed_vector, word)
                                    for word, vector
                                    in zip(neutral_words,
                                           neutral_word_vectors)]

        neutral_word_projections.sort()

        _, most_negative_words = zip(*neutral_word_projections[:n_extreme])
        _, most_positive_words = zip(*neutral_word_projections[-n_extreme:])

        most_biased_neutral_words = most_negative_words + most_positive_words

        y_bias = [False] * n_extreme + [True] * n_extreme

        _, axes = plt.subplots(1, 2, figsize=(20, 5))

        acc_biased = biased._plot_most_biased_one_cluster(most_biased_neutral_words,
                                                          y_bias,
                                                          random_state=random_state,
                                                          ax=axes[0])
        axes[0].set_title('Biased - Accuracy={}'.format(acc_biased))

        acc_debiased = debiased._plot_most_biased_one_cluster(most_biased_neutral_words,
                                                              y_bias,
                                                              random_state=random_state,
                                                              ax=axes[1])
        axes[1].set_title('Debiased - Accuracy={}'.format(acc_debiased))

        return axes, {biased.positive_end: most_positive_words,
                      biased.negative_end: most_negative_words}