Exemplo n.º 1
0
def soft(E, embed, gender_words, defs):
    """
    Soft debiasing of word embedding E.

    :param WordEmbedding E: Biased word embedding E.
    :param string embed: Name of the embedding.
    :param list gender_words: List of gender specific words.
    :param list defs: List of tuples with definitional pairs.
    :returns: Soft debiased WordEmbedding object.
    """
    print("\nSoft debiasing...")
    # If do_soft is True, do soft debiasing from scratch
    if FLAGS.do_soft:
        params = SOFT_PARAMS[embed.split("_")[0]]
        E_soft = deepcopy(E)
        soft_debias(
            E_soft,
            gender_words,
            defs,
            epochs=params["epochs"],
            lr=params["lr"],
            gamma=params["gamma"],
            decrease_times=params["decrease_times"],
        )
    # If do_soft is False, load precomputed soft debiased embedding
    else:
        E_soft = WordEmbedding(embed + "_soft_debiased")
    return E_soft
Exemplo n.º 2
0
def debiasEmbedding(filename, bootstrapped=False):
    outfile_path = './embeddings/' + 'debiased_' + filename
    if(bootstrapped or not os.path.exists(outfile_path)):
        E = WordEmbedding('./embeddings/' + filename)


        with open('./data/definitional_pairs.json', "r") as f:
            defs = json.load(f)
        #print("definitional", defs)

        with open('./data/equalize_pairs.json', "r") as f:
            equalize_pairs = json.load(f)

        with open('./data/gender_specific_seed.json', "r") as f:
            gender_specific_words = json.load(f)
        #print("gender specific", len(gender_specific_words), gender_specific_words[:10])

        debias(E, gender_specific_words, defs, equalize_pairs)

        E.save(outfile_path)
Exemplo n.º 3
0
def main():
    # Print basic experiment information
    print_details()

    # For each embedding, do the experiments
    for embed in FLAGS.embeddings:
        print("\n" + "#" * 56)
        print("# " + f"Doing the {embed} embedding".center(53) + "#")
        print("#" * 56)

        # Load the embedding
        E = WordEmbedding(embed)
        # Load professions and gender related lists from
        # Bolukbasi et al. for word2vec
        gender_words, defs, equalize_pairs, profession_words = load_data(E.words)
        # Define gender direction with PCA
        v_gender = we.doPCA(defs, E).components_[0]

        # Bias without debiasing
        if not FLAGS.no_show:
            show_bias(E, v_gender, profession_words, info="with bias")

        # Hard debiasing
        E_hard = hard(E, gender_words, defs, equalize_pairs)
        if not FLAGS.no_show:
            show_bias(E_hard, v_gender, profession_words, info="hard debiased")

        E_soft = None
        # Only do soft debiasing for small embeddings
        if embed.split("_")[-1] != "large":
            # Soft debiasing
            E_soft = soft(E, embed, gender_words, defs)
            if not FLAGS.no_show:
                show_bias(E_soft, v_gender, profession_words, info="soft debiased")

        # Run the benchmarks if nescessary
        if not FLAGS.no_bench:
            run_benchmark(E, E_hard, E_soft, embed)
Exemplo n.º 4
0
from matplotlib import pyplot as plt
import json
import random
import numpy as np

logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s',
                    level=logging.DEBUG,
                    datefmt='%I:%M:%S')

import debiaswe as dwe
import debiaswe.we as we
from debiaswe.we import WordEmbedding
from debiaswe.data import load_professions
from debiaswe.debias import debias

E = WordEmbedding('./embeddings/w2v_gnews_small.txt')

with open('./data/definitional_pairs.json', "r") as f:
    defs = json.load(f)
#print("definitional", defs)

with open('./data/equalize_pairs.json', "r") as f:
    equalize_pairs = json.load(f)

with open('./data/gender_specific_seed.json', "r") as f:
    gender_specific_words = json.load(f)
#print("gender specific", len(gender_specific_words), gender_specific_words[:10])

debias(E, gender_specific_words, defs, equalize_pairs)

v = Vocabulary(E.words)
Exemplo n.º 5
0
from __future__ import print_function, division
from matplotlib import pyplot as plt
import json
import random
import numpy as np

import debiaswe as dwe
import debiaswe.we as we
from debiaswe.we import WordEmbedding
from debiaswe.data import load_professions
from debiaswe.debias import debias

# Step 0: load google news wordvec
E = WordEmbedding('./embeddings/w2v_gnews_small.txt')

# Step 1: load professions
professions = load_professions()
profession_words = [p[0] for p in professions]

# Step 2: define racial direction
names = [
    "Emily", "Aisha", "Anne", "Keisha", "Jill", "Tamika", "Allison", "Lakisha",
    "Laurie", "Tanisha", "Sarah", "Latoya", "Meredith", "Kenya", "Carrie",
    "Latonya", "Kristen", "Ebony", "Todd", "Rasheed", "Neil", "Tremayne",
    "Geoffrey", "Kareem", "Brett", "Darnell", "Brendan", "Tyrone", "Greg",
    "Hakim", "Matthew", "Jamal", "Jay", "Leroy", "Brad", "Jermaine"
]
names_group1 = [names[2 * i] for i in range(len(names) // 2)]
names_group2 = [names[2 * i + 1] for i in range(len(names) // 2)]

vs = [sum(E.v(w) for w in names) for names in (names_group2, names_group1)]
Exemplo n.º 6
0
from matplotlib import pyplot as plt

import sys
import json
import random
import numpy as np

import debiaswe as dwe
import debiaswe.we as we
from debiaswe.we import WordEmbedding
from debiaswe.data import load_professionals
'''
Part1: Find Bias
'''
# Step 1: Load Data (Word embedding & professionals)
E = WordEmbedding('./data/file_small.txt')
professionals = load_professionals()

# Step 2: Define regional (Northern China people - Southern China people) direction directly
v_region = E.diff('北京人', '上海人')
v_gender = E.diff('男', '女')

# Step 3: Generating analogies of "Northern people: x :: Southern people: y
# a_region = E.best_analogies_dist_thresh(v_region)
# before_map = {}
output = open("output.txt", "a")

# for (a,b,c) in a_region:
#     before_map[a] = b
#     print(a+"-"+b+"-"+str(c), file=output)
from __future__ import print_function, division
from matplotlib import pyplot as plt
import json
import random
import numpy as np

import debiaswe as dwe
import debiaswe.we as we
from debiaswe.we import WordEmbedding
from debiaswe.data import load_professions
from debiaswe.data import load_sports
from debiaswe.debias import debias

# Step 0: load google news wordvec
E = WordEmbedding('./embeddings/w2v_gnews_small.txt')

# Step 1: load professions
professions = load_professions()
profession_words = [p[0] for p in professions]
# Step 1: load sports
sports = load_sports()
sport_words = [s[0] for s in sports]

print("\n")

# Step 2: define gender direction
v_gender = E.diff('she', 'he')

print("\n")

# Step 3: generate analogies based on gender = 'man:x :: woman:y'