def callApi(terms, gender): # to store words from this source and check for multiple definitions of the same word words = [] for term in terms: results = api.words(ml=term, max=1000, md='dp') for result in results: word = result['word'].lower() # check if it's a noun if word not in discardSet: if ('tags' in result and 'n' in result['tags']): if ('defs' in result): definition = result['defs'] else: definition = getWordDefinition(word) if (definition != ' ' and definition is not None and len(definition) > 0): if word in wordSet: entry = findWordInArray(word, words) if entry is not None: addDefinition(entry, definition) continue elif (word not in wordSet and isValidWord(word)): processWord(word, definition, source, words, gender) allWords.extend(words)
def createSets(words): allSets = [] wordsInSet = set() wordsInSet |= set([entry['word'] for entry in all]) end = len(words) for count, entry in enumerate(words): word = entry['word'] gender = entry['gender'] # create set with word synonyms_set = set([word]) synonyms = getSynonyms(word) # add it's synonyms to set if (synonyms): for syn in synonyms: syn = syn.lower() if isValidWord(syn): if syn in wordsInSet: result = isSameGender(word, gender, syn) sameGender = result[1] syn = result[0] if sameGender: synonyms_set.add(syn) continue elif syn not in wordsInSet: if not isNoun(syn): continue definition = getWordDefinition(word) if definition != ' ': result = searchTextForGenderedTerm(definition) if result is not None: isGenderedTerm = result[0] syn_gender = result[1] if isGenderedTerm and syn_gender == gender: all.append({ 'word': word, 'definition': definition, 'gender': syn_gender }) wordsInSet.add(word) continue if isNoun(syn) and isGendered( syn, gender, definition): synonyms_set.add(syn) if len(synonyms_set) > 1: allSets.append(list(synonyms_set)) return allSets
def getGSFull(): with open('data/gender_specific_full.json', 'r') as f: results = json.load(f) source = 'debiaswe' # all words in this file are gendered, so put the ones we can't get definitions for # in a separate file we will address later words = [] for result in results: word = result.lower() if (word not in wordSet and word not in discardSet and isValidWord(word)): definition = getWordDefinition(word) if (definition != ' ' and definition is not None and len(definition) > 0): processWord(word, definition, source, allWords) print('gender specific done')
def checkEquivalent(equivalent): definition = getWordDefinition(equivalent) if definition != ' ' and definition is not None: if gender == 'female': opp_gender = 'male' maleTerms = r'\bman\b|\bmale\b|\bboy\b|\bmen\b|\bboys\b|\bson\b|\b[\w]*?father\b|\bhusband\b' maleRegex = re.compile(maleTerms) termsInString = maleRegex.search(definition) else: opp_gender = 'female' femaleTerms = r'\b[\w]*?woman\b|\bfemale\b|\b[\w]*?girl\b|\bgirls\b|\b[\w]*?women\b|\blady\b|\b[\w]*?mother\b|\b[\w]*?daughter\b|\bwife\b' femaleRegex = re.compile(femaleTerms) termsInString = femaleRegex.search(definition) all.append({ 'word': equivalent, 'definition': definition, 'gender': opp_gender, 'note': 'neg' }) all_words_only.append(equivalent) return equivalent return ' '
def addTerms(terms, gender): for word in terms: definition = getWordDefinition(word) if word not in wordSet and word not in discardSet: wordSet.add(word) addEntry(word, definition, gender, 'wordnik', allWords)