Пример #1
0
 def callApi(terms, gender):
     # to store words from this source and check for multiple definitions of the same word
     words = []
     for term in terms:
         results = api.words(ml=term, max=1000, md='dp')
         for result in results:
             word = result['word'].lower()
             # check if it's a noun
             if word not in discardSet:
                 if ('tags' in result and 'n' in result['tags']):
                     if ('defs' in result):
                         definition = result['defs']
                     else:
                         definition = getWordDefinition(word)
                     if (definition != ' ' and definition is not None
                             and len(definition) > 0):
                         if word in wordSet:
                             entry = findWordInArray(word, words)
                             if entry is not None:
                                 addDefinition(entry, definition)
                             continue
                         elif (word not in wordSet and isValidWord(word)):
                             processWord(word, definition, source, words,
                                         gender)
     allWords.extend(words)
Пример #2
0
def createSets(words):
    allSets = []
    wordsInSet = set()
    wordsInSet |= set([entry['word'] for entry in all])
    end = len(words)
    for count, entry in enumerate(words):
        word = entry['word']
        gender = entry['gender']
        # create set with word
        synonyms_set = set([word])
        synonyms = getSynonyms(word)
        # add it's synonyms to set
        if (synonyms):
            for syn in synonyms:
                syn = syn.lower()
                if isValidWord(syn):
                    if syn in wordsInSet:
                        result = isSameGender(word, gender, syn)
                        sameGender = result[1]
                        syn = result[0]
                        if sameGender:
                            synonyms_set.add(syn)
                            continue
                    elif syn not in wordsInSet:
                        if not isNoun(syn):
                            continue
                        definition = getWordDefinition(word)
                        if definition != ' ':
                            result = searchTextForGenderedTerm(definition)
                            if result is not None:
                                isGenderedTerm = result[0]
                                syn_gender = result[1]
                                if isGenderedTerm and syn_gender == gender:
                                    all.append({
                                        'word': word,
                                        'definition': definition,
                                        'gender': syn_gender
                                    })
                                    wordsInSet.add(word)
                                    continue
                            if isNoun(syn) and isGendered(
                                    syn, gender, definition):
                                synonyms_set.add(syn)

            if len(synonyms_set) > 1:
                allSets.append(list(synonyms_set))
    return allSets
Пример #3
0
def getGSFull():
    with open('data/gender_specific_full.json', 'r') as f:
        results = json.load(f)

    source = 'debiaswe'
    # all words in this file are gendered, so put the ones we can't get definitions for
    # in a separate file we will address later
    words = []
    for result in results:
        word = result.lower()
        if (word not in wordSet and word not in discardSet
                and isValidWord(word)):
            definition = getWordDefinition(word)
            if (definition != ' ' and definition is not None
                    and len(definition) > 0):
                processWord(word, definition, source, allWords)
    print('gender specific done')
Пример #4
0
 def checkEquivalent(equivalent):
     definition = getWordDefinition(equivalent)
     if definition != ' ' and definition is not None:
         if gender == 'female':
             opp_gender = 'male'
             maleTerms = r'\bman\b|\bmale\b|\bboy\b|\bmen\b|\bboys\b|\bson\b|\b[\w]*?father\b|\bhusband\b'
             maleRegex = re.compile(maleTerms)
             termsInString = maleRegex.search(definition)
         else:
             opp_gender = 'female'
             femaleTerms = r'\b[\w]*?woman\b|\bfemale\b|\b[\w]*?girl\b|\bgirls\b|\b[\w]*?women\b|\blady\b|\b[\w]*?mother\b|\b[\w]*?daughter\b|\bwife\b'
             femaleRegex = re.compile(femaleTerms)
             termsInString = femaleRegex.search(definition)
         all.append({
             'word': equivalent,
             'definition': definition,
             'gender': opp_gender,
             'note': 'neg'
         })
         all_words_only.append(equivalent)
         return equivalent
     return ' '
Пример #5
0
def addTerms(terms, gender):
    for word in terms:
        definition = getWordDefinition(word)
        if word not in wordSet and word not in discardSet:
            wordSet.add(word)
            addEntry(word, definition, gender, 'wordnik', allWords)