Пример #1
0
def simplify_word(a):

    # print "[{0}],正在分析词汇: {1}".format(time.ctime().split()[3], a),

    try:#测试是否为动词,如果是则返回
        try_present_verb = en.verb.present(a)#try
        if en.is_verb(try_present_verb):
            # if try_present_verb != a:
            #     print " 动词现在时化:{0} -> {1}".format(a,try_present_verb)
            # else:
            #     print ""
            return try_present_verb
    except:#否则继续检查
        pass

    #测试是否是名词
    try_singular_noun = en.noun.singular(a)
    if en.is_noun(try_singular_noun):
        # if try_singular_noun != a:
        #     print " 名词单数化:{0} -> {1}".format(a,try_singular_noun)
        # else:
        #     print ""
        return try_singular_noun

    #如果已经可以判断是名词,动词,形容词,副词,连词
    if en.is_noun(a) or en.is_verb(a) or en.is_adjective(a) or en.is_adverb(a) or en.is_connective(a):
        # print ""
        return a

    return ''
Пример #2
0
def check_verb( listVals, iVerb , verb):
  for i in range( iVerb + 1, len( listVals ) ):
    if listVals[i]:
      verb2 = verb + " " + listVals[i]
      verb = verb2 if en.is_verb( verb2 ) else verb
      break
  return verb                  
Пример #3
0
 def resolvePastTense(self, sentence):
     i = 0
     while i < len(sentence):
         if sentence[i] == '<PAST>':
             j = i-1
             while j >= 0:
                 prev_token = sentence[j]
                 if '<VERB>' in prev_token and en.is_verb(prev_token[:prev_token.find('<')]):
                     if prev_token.startswith('do not'): #Resolves negated past tense
                         past_tense = 'did not' + prev_token[6:]
                     else:
                         past_tense = en.verb.past(prev_token[:prev_token.find('<')])
                     prev_token = prev_token.replace(prev_token[:prev_token.find('<')], "") # Remove word
                     sentence[j] = past_tense + prev_token
                     sentence.pop(i)
                     break
                 if prev_token == 'be':
                     past_tense = 'was'
                     sentence[j] = past_tense + '<VERB>'
                     sentence.pop(i)
                 if prev_token == 'do':
                     past_tense = 'did'
                     sentence[j] = past_tense + '<VERB>'
                     sentence.pop(i)
                 j -= 1
         i += 1
Пример #4
0
def get_verbs(tokens_tagged):

    r = re.compile(r'[^a-zA-Z]')
    verbs = []
    for i in range(len(tokens_tagged)):
        s = tokens_tagged[i]
        for j in range(len(s)):
            (w, t) = s[j]
            if t and t.startswith('V') and not r.match(w):
                verbs.append([w, t, unicode(en.verb.infinitive(w.lower())), i, j])

    count = defaultdict(int)
    for liste in verbs:
        count[liste[2]] += 1
    
    sorted_counts = sorted(count.items(), key=operator.itemgetter(1), reverse=True)
    blocked = ["", "be", "have", "do", "can"]
    sorted_counts = [(w, c) for (w, c) in sorted_counts if en.is_verb(w) and w not in blocked]

    verbs_all = []
    for (w, c) in sorted_counts:
        liste = [l for l in verbs if l[2]== w]
        verbs_all.append([w, c, liste])

    return verbs_all
Пример #5
0
    def find_grammatical_kind(self):

        st = self.get_sentence()
        st = re.sub(",", "", st)  # delete all commas
        result = []

        m = st.split(" ")

        for each in m:
            flag = False
            if en.noun.is_emotion(each):
                result.append("emotion")
                flag = True
            elif en.is_connective(each):
                result.append("connective")
                flag = True
            elif en.is_verb(each):
                result.append("verb")
                flag = True
            elif en.is_adjective(each):
                result.append("adjective")
                flag = True
            elif en.is_noun(each):
                result.append("noun")
                flag = True
            elif en.is_persuasive(each):
                result.append("persuasive")
                flag = True
            elif en.is_number(each):
                result.append("number")
                flag = True
            if flag == False:
                result.append("unclear")

        return result
Пример #6
0
def autoLemmatize(word):
	wnl = nltk.stem.wordnet.WordNetLemmatizer()
	infVerb = en.verb.infinitive(word)
	if infVerb!='' and en.is_verb(infVerb):
		return wnl.lemmatize(infVerb, 'v')
	else:
		return wnl.lemmatize(word)
Пример #7
0
def translate_indicative_x_for_assertion(brain,a):
    toReturn=""
    prefix = a.relation[:-4]
    verb = get_tense(a, "was", a.l, brain)
    if vals_are_string_things(a.r):
        useWords = []
        for useWord in a.r:
            if en.is_verb(useWord):
                useWords.append(en.verb.present_participle(useWord))
            else:
                useWords.append(useWord)
        uses=list_words_naturally(useWords)
        toReturn = list_concepts_naturally(brain,a.l) + " "+verb+" " + prefix + " for " + uses
        toReturn = add_end_marks(a, toReturn)
    elif get_type(a.r)=="hash_array":
        if a.l[0]==a.r[0]["l"][0]:
            newRights = []
            for newRight in a.r:
                newRight["tense"] = "present participle"
                newRight["grammatical_mood"] = "imperative"
                newRights.append(newRight)
            newRights = list_clauses_naturally(brain, newRights)
            toReturn = list_concepts_naturally(brain,a.l) + " "+verb+" " + prefix + " for " + newRights
            toReturn = add_end_marks(a, toReturn)
    return toReturn
Пример #8
0
def dict_ingest(path_to_dict):
    noun = []
    verb = []
    adjective = []
    adverb = []
    miscel = []
    f = open(path_to_dict,'r')
    for l in f:
        word = l.strip()
        if en.is_noun(word):
            noun.append(word)
        elif en.is_verb(word):
            verb.append(word)
        elif en.is_adjective(word):
            adjective.append(word)
        elif en.is_adverb(word):
            adverb.append(word)
        else:
            miscel.append(word)
    print noun[:5]
    print verb[:5]
    print adjective[:5]
    print adverb[:5]
    print miscel[:5]
    return noun, verb, adjective, adverb, miscel
Пример #9
0
def verse(word):

    """Creates a small rhyme for a given word.

    The rhyme is based on WordNet's description for the word.
    This description is eloquated (alliterated or antonated), incorporated.

    """

    g = en.noun.gloss(word)
    words = g.split(" ")

    for i in range(len(words)):

        w = words[i]
        w = w.replace("\"", "")

        if en.is_noun(w):
            w = eloquate(w)

        if random(100) > 60:

            if en.is_noun(w): w = incorporate(w).upper()
            if en.is_verb(w): w = incorporate(w, VERB)
            if en.is_adjective(w): w = incorporate(w, ADJECTIVE)

        if i > 0 and i % 3 == 0:
            words[i] = words[i] + "\n"

        words[i] = w

    g = " ".join(words)
    g = g.replace("type A ", "!")
    g = g.replace("group A ", "!")
    return g
Пример #10
0
def autoWordToVerb(rawword):
	word = rawword
	infVerb = en.verb.infinitive(rawword)
	if infVerb!='' and en.is_verb(infVerb):
		word = infVerb 
	nounsTemp = []
	for val in [(w, w.derivationally_related_forms()) for w in nltk.corpus.wordnet.lemmas(word)]:
		try:
			nounsTemp.append(set([(w.name).lower() for w in val[1] if '.v.' in str(w)]))
		except:
			pass

	for synword in nltk.corpus.wordnet.synsets(word):
		try:
			for val in [(w, w.derivationally_related_forms()) for w in synword.lemmas]:
				try:
					nounsTemp.append(set([(w.name).lower() for w in val[1] if '.v.' in str(w)]))
				except:
					pass
		except:
			pass

	nouns = []
	for s in nounsTemp:
		for w in s:
			nouns.append(w)
	return list(set(nouns))
Пример #11
0
def stem(word):
	result = en.verb.infinitive(word)
	if len(result) != 0 and en.is_verb(result):
		return result
	result = en.noun.singular(word)
	if len(result) != 0 and en.is_noun(result):
		return result
	return word
Пример #12
0
 def is_a_expression(self, word):
     return self.is_a_hash_tag(word)\
            or self.is_negation(word) \
            or en.is_noun(word) \
            or en.is_adjective(word) \
            or en.is_verb(word) \
            or en.is_adverb(word) \
            or self.is_orality(word)
Пример #13
0
Файл: views.py Проект: mitnk/mc
def normalize(word):
    ## TODO: make this function nicer (UT, shorter).

    ## all verb to present
    try:
        new_word = en.verb.present(word)
        if new_word != word and en.is_verb(new_word):
            return new_word
    except KeyError:
        pass

    new_word = en.noun.singular(word)
    if new_word != word and en.is_noun(new_word):
        return new_word

    if en.is_noun(word):
        new_word = re.sub(r'er$', '', word)
        if new_word != word and en.is_verb(new_word):
            return new_word
        new_word = re.sub(r'r$', '', word)
        if new_word != word and en.is_verb(new_word):
            return new_word
        new_word = re.sub(r'ment$', '', word)
        if new_word != word and en.is_verb(new_word):
            return new_word
        new_word = re.sub(r'ness', '', word)
        if new_word != word and en.is_adjective(new_word):
            return new_word

    ## adv to adj
    ## TODO: is there a quick way to do this in "en" libs
    new_word = re.sub(r'ly$', '', word)
    if new_word != word and en.is_adjective(new_word):
        return new_word

    if word.endswith('ly'):
        new_word = re.sub(r'ly$', '', word) + 'e'
        if new_word != word and en.is_adjective(new_word):
            return new_word

    if en.is_adjective(word):
        new_word = re.sub(r'ory$', '', word) + 'e'
        if new_word != word and en.is_verb(new_word):
            return new_word
        new_word = re.sub(r'ive$', '', word) + 'e'
        if new_word != word and en.is_verb(new_word):
            return new_word
        new_word = re.sub(r'ive$', '', word)
        if new_word != word and en.is_verb(new_word):
            return new_word
        new_word = re.sub(r'er$', '', word)
        if new_word != word and en.is_adjective(new_word):
            return new_word
        new_word = re.sub(r'r$', '', word)
        if new_word != word and en.is_adjective(new_word):
            return new_word

    return word
Пример #14
0
def verbs(list):

    """Parses verbs from a list of words.
    """

    words = []
    for word in list:
        word = word.strip()
        if en.is_verb(word): words.append(word)

    return words
Пример #15
0
Файл: views.py Проект: mitnk/mc
def get_gloss(word):
    if en.is_verb(word):
        return en.verb.gloss(word)
    elif en.is_adjective(word):
        return en.adjective.gloss(word)
    elif en.is_adverb(word):
        return en.adverb.gloss(word)
    elif en.is_noun(word):
        return en.noun.gloss(word)
    else:
        return en.wordnet.gloss(word)
Пример #16
0
def simplify_word(a):
    
    try:#测试是否为动词,如果是则返回
        en.is_verb(en.verb.present(a))
        return en.verb.present(a)
    except:#否则继续检查
        pass
    
    #测试是否是名词
    if en.is_noun(en.noun.singular(a)):
        return en.noun.singular(a)
    
    #如果已经可以判断是名词,动词,形容词,副词,连词
    if en.is_noun(a) or en.is_verb(a) or en.is_adjective(a) or en.is_adverb(a) or en.is_connective(a):
        return a
        
    
    
    
    otherwordlist.append(a)
    return a
Пример #17
0
def generate_word(list, pos):
    #% chance to generate new word
    if random.random() < percentage_chance:
        #repeat until word = pos
        while True:
            #get all synsets of random word in list
            synsets = wn.synsets(list[random.randint(0, len(list) - 1)], pos=pos)
            #get random synset
            synset = synsets[random.randint(0, len(synsets) - 1)]
            ran = random.randint(0,3)
            if ran == 0 and synset.hypernyms():
                synset = synset.hypernyms()[random.randint(0, len(synset.hypernyms()) - 1)]
            elif ran == 1 and synset.hyponyms():
                synset = synset.hyponyms()[random.randint(0, len(synset.hyponyms()) - 1)]
            #get random name from synset that does not contain an _ or - (these make the lib go insane)
            #words = the names of the synset
            words = synset.lemma_names()
            #this loop is to make sure an infinite loop does not occur
            #where you are picking from all invalid choices
            while len(words) > 0:
                word = words[random.randint(0, len(words) - 1)]
                if "_" not in word and "-" not in word:
                    break
                else:
                    words.remove(word)
                    continue
            #if words doesn't have words in it, pick a new word from beginning
            if(len(words) == 0):
                continue
            if ((pos == wn.NOUN and en.is_noun(word)) or 
                (pos == wn.VERB and en.is_verb(word)) or
                (pos == wn.ADJ and en.is_adjective(word))):
                
                #fix word based on pos
                #if verb, make sure the verb has a conjugation,
                #if it does, or is not a verb, the word gets appended to the word array,
                #and a word is returned 
                if pos == wn.VERB:
                    try:
                        en.verb.present(word, person=3, negate=False)
                    except KeyError:
                        continue
                    else:
                        if word not in list:
                            list.append(word)
                        return word
                else:
                    if word not in list:
                        list.append(word)
                    return word
    else:
        #just select a random word from the existing ones
        return list[random.randint(0, len(list) - 1)]
Пример #18
0
def get_wh_structure(q, wh_tag):
    
    search_string = wh_tag + " MD|VB* *+ VB|VB*"

    m = search(search_string, q)
    if len(m) == 0:
        #This solves ambiguity. If a verb can also be a noun and is misclassified, we change it back to a verb
        for i in range(len(q.words)):
            if english_pack.is_verb(q.words[i].string) and not q.words[i].type.startswith("V"):
                q.words[i].type = "VB"
                break
        m = search(search_string, q)

    m = m[0]
    wh_word = m.words[0]

    #Creates part of answer that was before the where clause(contextual information)
    initial_aux = ""
    if wh_word.index != 0:
        initial_aux = " ".join([q.words[w].string for w in range(wh_word.index)])
        
    #Create final part of answer, that comes after the verb
    final_aux = ""
    if m[-1].index < len(q.words) -1:
        final_aux = " ".join([w.string for w in q.words[m[-1].index+1:] if w.string != "?"])

    #Find NP between the two verbs

    m = m[1:]
    current = 0
    while not m[current +1].type.startswith("V") or  m[current+1].string[0].isupper():
        m[current],m[current+1] = m[current+1],m[current]
        current +=1

    #Handles 'do' in the past on in the third form

    if m[current].lemma.lower() == "do":
        v = m[current]
        m = m[:current] + m[current+1:]
        if english_pack.verb.is_past(v.string):
            m[-1].string = english_pack.verb.past(m[-1].string)
        elif english_pack.verb.is_present(v.string, person=3):
            m[-1].string = english_pack.verb.present(m[-1].string,person  = 3)
    
    main_part  = [w.string for w in m]

    answer = " ".join(main_part)
 
    answer = " ".join([initial_aux,answer,final_aux]).strip()

    return answer, main_part
Пример #19
0
 def resolveCompoundVerbs(self, sentence):
     i = len(sentence) - 1
     while  i > 0 :
         j = i - 1
         currToken = sentence[i]
         prevToken = sentence[j]
         if '<VERB>' in currToken and '<VERB>' in prevToken:
             currWord = currToken[:currToken.find('<')]
             prevWord = prevToken[:prevToken.find('<')]
             if prevWord.endswith(' to'):
                 sentence[i] = prevWord + ' ' + currToken
                 sentence.pop(j)
             elif (en.is_verb(prevWord) or 'do not ' in prevWord or 'did not' in prevWord) and en.is_verb(currWord):
                 sentence[i] = prevWord + ' ' + en.verb.present_participle(currWord) + currToken[currToken.find('<'):]
                 sentence.pop(j)
                 i -= 2
             else:
                 i -= 1
         else:
             i -=1
    def add_forms(self):
        forms = []
        for w in self.phrases:
            if en.is_verb(w.name):
                try:
                    vb = en.verb.infinitive(w.name)
                    vbd = en.verb.past(w.name)
                    vbp1 = en.verb.present(w.name, person = 1)
                    vbp2 = en.verb.present(w.name, person = 2)
                    vbz = en.verb.present(w.name, person = 3)
                    vbg = en.verb.present_participle(w.name)
                    forms.append(Word(vb,"VB"))
                    forms.append(Word(vbd,"VBD"))
                    forms.append(Word(vbp1,"VBP"))
                    forms.append(Word(vbz,"VBZ"))
                    forms.append(Word(vbg,"VBG"))
                except:
                    print "Error in conjugation for verb:" + w.name
            elif en.is_noun(w.name):
                nns = en.noun.plural(w.name)
                forms.append(Word(nns, "NNS"))

        return forms
Пример #21
0
def translate_x_of_assertion(brain,a):
    prefix = a.relation[:-3]
    prefix_article = en.noun.article(prefix)
    # prefix_article_only = prefix_article.split(" ")[0]
    verb = get_tense(a, "was", a.l, brain)

    toReturn = ""
    if en.is_noun(en.noun.singular(prefix)):
        if is_plural(a.l, brain):
            prefix_article = en.noun.plural(prefix)
        toReturn = list_concepts_naturally(brain,a.l) + " "+verb+" " + prefix_article + " of " + list_words_naturally(a.r)
    elif en.is_verb(en.verb.infinitive(prefix)) and en.verb.infinitive(prefix) !="":
        if hasattr(a,"owner") and len(a.owner)>0:
            owner = list_concepts_naturally(brain, a.owner)
        else:
            owner = "everyone"
        toReturn = list_concepts_naturally(brain, a.l) + " "+prefix +" "+owner+ " of " + list_concepts_naturally(brain, a.r)
    elif en.is_adjective(prefix):
        # TODO for capable_of >> deal with action, action_object, action_recipient...
        # Similar for used_for >> when used_for is action / verbs
        toReturn = list_concepts_naturally(brain,a.l) + " "+verb+" " + prefix + " of " + list_words_naturally(a.r)
    toReturn = add_end_marks(a, toReturn)
    return toReturn
Пример #22
0
def getHeadVerbTense(question_parts):
  headVerb = None

  for origWord in question_parts:
    try:
      word = en.verb.present(origWord)
    except:
      continue

    if en.is_verb(word):
      headVerb = origWord
      break

  if headVerb == None:
    return "NO_HEAD_VERB"

  tense = en.verb.tense(headVerb)

  if "past" in tense:
    return PAST_TENSE
  elif "present" in tense or "infinitive" in tense:
    return PRESENT_TENSE
  else:
    return None
Пример #23
0
def get_frequncy_dist(dir_path):
    files = os.listdir(dir_path)

    all_words = 0
    words_wt_freq = {}   
    '''get words'''
    for filename in files:
        if (filename.endswith('.srt')):
            file_handler = open(dir_path + '\\' + filename, 'r')
            for line in file_handler :
                for word in line.strip().split():
                    sword = word.strip(punctuation)
                    if (sword.isalpha()):
                        lword = sword.lower()
                        words_wt_freq[lword] = words_wt_freq.get(lword, 0) + 1
                        all_words += 1
            file_handler.close()
    logger.debug('# all words: ' + str (all_words - 1))
    logger.debug('# unique words: ' + str (len(words_wt_freq.keys())))
    lexical_diversity_for_freq(words_wt_freq.values())
    
    lemmatized_words_wt_freq = {}
    for word in words_wt_freq.keys():
        lemmatized_word = nltk.WordNetLemmatizer().lemmatize(word)
        if (word != lemmatized_word and lemmatized_word != None):
            lemmatized_words_wt_freq[lemmatized_word] = lemmatized_words_wt_freq.get(lemmatized_word, 0) + words_wt_freq.get(word)
            #print(lemmatized_word, word)
        else:
            lemmatized_words_wt_freq[word] = words_wt_freq.get(word)
    lemmatized_size = len(lemmatized_words_wt_freq.keys())            
    logger.debug ('# words after lemmatized: ' + str (lemmatized_size) + " diff: " + str (len(words_wt_freq.keys()) - lemmatized_size))
    lexical_diversity_for_freq(lemmatized_words_wt_freq.values())
    words_wt_freq = {} # Save memory

    
    stopwords_en = stopwords.words('english')
    male_names = names.words('male.txt')
    female_names = names.words('female.txt')
    comparative = swadesh.words('en')
    ignore_list = [] ;
    ignore_list.extend(stopwords_en)
    ignore_list.extend(male_names)
    ignore_list.extend(female_names)
    ignore_list.extend(comparative)            
    filtered_words = []

    out_file = open(dir_path + '\\wfd.csv', 'w')
    out_file.write ('Word, Type, Frequency \n')
        
    for word in lemmatized_words_wt_freq.keys():
        if len(word) > 2 and word not in ignore_list:
            filtered_words.append(word)   
        else:
            out_file.write(word + ',stop words,' + str(lemmatized_words_wt_freq.get(word)) + '\n')
    logger.debug ('# words after filtering stop words: ' + str (len(filtered_words)) + " diff: " + str (len(lemmatized_words_wt_freq.keys()) - len(filtered_words)))
    ignore_list = [] #save memory

    '''wordnet has 155k'''                                 
    usual_words = []
    for word in  filtered_words:
        if (len(wordnet.synsets(word)) != 0):
            usual_words.append(word)
        else:
            out_file.write(word + ',not in wordnet,' + str(lemmatized_words_wt_freq.get(word)) + '\n')
    logger.debug ('# words after filtering unused words: ' + str (len(usual_words)) + " diff: " + str (lemmatized_size - len(usual_words)))
    filtered_words = [] # save memory 

    tag_filtered_words_wt_freq = {}
    words_wt_tags = nltk.pos_tag(usual_words)
    for (word, tag) in words_wt_tags:
        if (tag not in ['EX', 'DET', 'CNJ', 'FW', 'MD', 'NP', 'NUM', 'PRO', 'P', 'TO', 'UH', 'WH', 'WP', 'NNP', 'MOD']):
            if(en.is_adverb(word)):
                tag_filtered_words_wt_freq[word] = lemmatized_words_wt_freq[word]  
                #print ('ADV,' + word)
            elif (en.is_adjective(word)):
                tag_filtered_words_wt_freq[word] = lemmatized_words_wt_freq[word]  
                #print ('ADJ,' + word)
            elif (en.is_verb(word)):
                tag_filtered_words_wt_freq[word] = lemmatized_words_wt_freq[word]  
                #print ('VB,' + word)
            elif (en.is_noun(word)):
                tag_filtered_words_wt_freq[word] = lemmatized_words_wt_freq[word]  
                #print ('N,' + word) 
            else:
                if (tag in ['VBZ', 'NNS']):
                    if word.endswith('s'):
                        new_word = word[:-1]
                        tag_filtered_words_wt_freq[new_word] = lemmatized_words_wt_freq[word] + tag_filtered_words_wt_freq.get(new_word, 0)
                        #print (word , new_word,tag)    
                elif (tag == 'VBG'):
                    new_word = en.verb.infinitive(word)
                    if new_word != None and word != new_word:
                        tag_filtered_words_wt_freq[new_word] = lemmatized_words_wt_freq[word] + tag_filtered_words_wt_freq.get(new_word, 0)
                elif (tag == 'JJS'):
                    if word.endswith('est'):
                        new_word = word[:-3]
                        tag_filtered_words_wt_freq[new_word] = lemmatized_words_wt_freq[word] + tag_filtered_words_wt_freq.get(new_word, 0)     
                else:
                    tag_filtered_words_wt_freq[word] = lemmatized_words_wt_freq[word]        
                    #print (word,tag)   
        else:
            out_file.write(word + ',unwanted pos,' + str(lemmatized_words_wt_freq.get(word)) + '\n')
    logger.debug ('# words after filtering unwanted pos:' + str (len(tag_filtered_words_wt_freq.keys())) + " diff: " + str (len(usual_words) - len(tag_filtered_words_wt_freq.keys())))
    lexical_diversity_for_freq(tag_filtered_words_wt_freq.values())
    lemmatized_words_wt_freq = {} # save memory
    usual_words = [] #save memory

    basic_english_vocab = en.basic.words
    non_basic_words = set(tag_filtered_words_wt_freq.keys()).difference(basic_english_vocab)
    non_basic_words_wt_freq = {}
    for non_basic_word in non_basic_words:
        non_basic_words_wt_freq[non_basic_word] = tag_filtered_words_wt_freq[non_basic_word] 
    words_in_both = set(tag_filtered_words_wt_freq.keys()).intersection(basic_english_vocab)
    for word in words_in_both:
        out_file.write(word + ',en.basic.words,' + str(tag_filtered_words_wt_freq.get(word)) + '\n')
    logger.debug ('# words after filtering basic words: ' + str (len(non_basic_words_wt_freq.keys())) + " diff: " + str (len(tag_filtered_words_wt_freq.keys()) - len(non_basic_words_wt_freq.keys())))
    lexical_diversity_for_freq(non_basic_words_wt_freq.values())
    tag_filtered_words_wt_freq = {} #save memory


    fh = open(os.path.join(base.app_root(), 'etc\\basic_words.csv'), 'r')
    my_words = [word.lower() for line in fh for word in line.strip().split()]
    fh.close()
    new_words = set(non_basic_words).difference(my_words)
    words_in_both = set(non_basic_words).intersection(my_words)
    for word in words_in_both:
        out_file.write(word + ',en.basic.words.mine,' + str(non_basic_words_wt_freq.get(word)) + '\n')    
    new_words_wt_freq = {}
    for new_word in new_words:
        new_words_wt_freq[new_word] = non_basic_words_wt_freq[new_word] 
    logger.debug ('# words after filtering my words: ' + str (len(new_words_wt_freq.keys())) + " diff: " + str (len(non_basic_words_wt_freq.keys()) - len(new_words_wt_freq.keys())))
    lexical_diversity_for_freq(new_words_wt_freq.values())
    
    sorted_words = sorted(new_words_wt_freq.items(), key=itemgetter(1, 0))
    for (word, frequency) in sorted_words:
        out_file.write (word + ',lexicon,' + str(frequency) + '\n')
    out_file.close()
    
    return new_words_wt_freq
Пример #24
0
def tonoun(term):
     if en.is_verb(term):
        nterm=lmtzr.lemmatize(term)
        if nterm is not term:
           variations[term].add(nterm)
Пример #25
0
import re
import en

if __name__ == "__main__":
    print(en.is_adjective("accomplished"))
    print(en.is_noun("wizard"))
    print(en.is_verb("accomplish"))
    print(
        en.parser.sentence_tag(
            "The day after today, before yesterday. And in pase years, later"))
    en.parser.matches(
        "The day after today, before yesterday. And in pase years, later",
        "JJ NN")
Пример #26
0
 def check_verb( self, ID ):
   currentWord = verbConj.infinitive( self.IDs[ID]['w'] )
   idNext = self.IDs[ID]['nw']
   nextWord = self.IDs[ idNext ]['w']
   verb = currentWord + " " + nextWord
   return ( verb, idNext ) if en.is_verb( verb ) else ( currentWord , ID )
Пример #27
0
def is_verb_assertion(a):
    return en.is_verb(a.relation)
Пример #28
0
import en

rawdata = open("bigOlDictionary.txt")

chosenWords = {}
while True:
	word = rawdata.readline()
	if word == '':
		break
	word = word[:-1]
	if en.is_verb(word):
		if en.verb.infinitive(word) != '':
			chosenWords[en.verb.infinitive(word)] = en.verb.past(en.verb.infinitive(word))

data = open("chosenWords2.txt", 'w')
for word in chosenWords:
	data.write(word + ',' + chosenWords[word] + '\n')
Пример #29
0
def tonoun(term):
    if en.is_verb(term):
        nterm = lmtzr.lemmatize(term)
        if nterm is not term:
            variations[term].add(nterm)
Пример #30
0
def stemWord( word ):
  if en.is_verb( word ):
    word = verbConj.infinitive( word ) 
  return stemmer.stem( word.decode( "ascii", "ignore") )
Пример #31
0
 def is_major(word):
     return en.is_verb(word) or en.is_adjective(word) or\
     en.is_adverb(word) or (word in MODAL_VERBS)
Пример #32
0
import en

print en.is_basic_emotion("anxious")
print en.is_persuasive("money")
print en.noun.is_emotion("anger")


print en.adjective.is_emotion("anxious", boolean=False)

print en.is_noun("comptuer")
print en.spelling.suggest("computer")[0]
print en.verb.is_emotion("love", boolean=False)
print en.verb.infinitive("announced")
print en.verb.infinitive("dont")
print en.is_verb("went")
a=en.verb.infinitive("dont")
print en.verb.is_emotion(a, boolean=False)
print en.is_noun("people")

print en.is_noun(en.noun.singular("adore"))
print en.noun.lexname("book")
print en.noun.lexname("music")
print en.noun.lexname("water")
print en.noun.lexname("fear")
print en.noun.lexname("love")
print en.noun.lexname("like")
print en.noun.lexname("hate")
print en.noun.lexname("overcome")
print en.adverb.lexname("actually")

Пример #33
0
def is_verb_assertion(a):
    return en.is_verb(a.relation)