def test_number(self): # Assert numeric string = actual number (after rounding). for i in range(100): x = random.random() y = en.number(en.numerals(x, round=10)) self.assertAlmostEqual(x, y, places=10) print "pattern.en.number()"
def test_number(self): # Assert numeric string = actual number (after rounding). for i in range(100): x = random.random() y = en.number(en.numerals(x, round=10)) self.assertAlmostEqual(x, y, places=10) print("pattern.en.number()")
# else: # sent_tokens.append(user_response) # word_tokens=word_tokens+nltk.word_tokenize(user_response) # final_words=list(set(word_tokens)) # print("ROBO: ",end="") # print(response(user_response)) # sent_tokens.remove(user_response) # else: # flag=False # print("ROBO: Bye! take care..") # ===============================================IDENTIFY ORDINAL NUMBER====================================================== import spacy from spacy import displacy import en_core_web_sm nlp = en_core_web_sm.load() doc = nlp( 'European authorities fined Google a record $5.1 billion on Wednesday for abusing its power in the mobile phone market and ordered the company to alter its practices' ) print([(X, X.ent_iob_, X.ent_type_) for X in doc]) print([(X.text, X.label_) for X in doc.ents]) [(x.orth_, x.pos_, x.lemma_) for x in [y for y in doc if not y.is_stop and y.pos_ != 'PUNCT']] from pattern.en import number number('two thousand fifty and a half')
print(modality(sent)) # ### Spelling Corrections from pattern.en import suggest print(suggest("Whitle")) from pattern.en import suggest print(suggest("Fracture")) # ### Working with Numbers from pattern.en import number, numerals print(number("one hundred and twenty two")) print(numerals(256.390, round=2)) from pattern.en import quantify print( quantify([ 'apple', 'apple', 'apple', 'banana', 'banana', 'banana', 'mango', 'mango' ])) from pattern.en import quantify print(quantify({'strawberry': 200, 'peach': 15})) print(quantify('orange', amount=1200))
import os, sys; sys.path.insert(0, os.path.join("..", "..")) from pattern.en import number, numerals, quantify, reflect # The number() command returns an int or float from a written representation. # This is useful, for example, in combination with a parser # to transform "CD" parts-of-speech to actual numbers. # The algorithm ignores words that aren't recognized as numerals. print number("two thousand five hundred and eight") print number("two point eighty-five") print # The numerals() command returns a written representation from an int or float. print numerals(1.249, round=2) print numerals(1.249, round=3) print # The quantify() commands uses pluralization + approximation to enumerate words. # This is useful to generate a human-readable summary of a set of strings. print quantify(["goose", "goose", "duck", "chicken", "chicken", "chicken"]) print quantify(["penguin", "polar bear"]) print quantify(["carrot"] * 1000) print quantify("parrot", amount=1000) print quantify({"carrot": 100, "parrot": 20}) print # The quantify() command only works with words (strings). # To quantify a set of Python objects, use reflect(). # This will first create a human-readable name for each object and then quantify these. print reflect([0, 1, {}, False, reflect]) print reflect(os.path)
from __future__ import print_function from __future__ import unicode_literals from builtins import str, bytes, dict, int import os import sys sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) from pattern.en import number, numerals, quantify, reflect # The number() command returns an int or float from a written representation. # This is useful, for example, in combination with a parser # to transform "CD" parts-of-speech to actual numbers. # The algorithm ignores words that aren't recognized as numerals. print(number("two thousand five hundred and eight")) print(number("two point eighty-five")) print("") # The numerals() command returns a written representation from an int or float. print(numerals(1.249, round=2)) print(numerals(1.249, round=3)) print("") # The quantify() commands uses pluralization + approximation to enumerate words. # This is useful to generate a human-readable summary of a set of strings. print(quantify(["goose", "goose", "duck", "chicken", "chicken", "chicken"])) print(quantify(["penguin", "polar bear"])) print(quantify(["carrot"] * 1000)) print(quantify("parrot", amount=1000)) print(quantify({"carrot": 100, "parrot": 20}))
def extract_information(sentence): from_ = 0 to = 0 copies = 1 keywords = preProcessText(sentence) for index, k in enumerate(keywords): 'Checking if the number given is ordinal viz. 3rd 5th etc.' keywords[index] = re.sub(r'nd$|th$|rd$|st$', "", (k)) wordlists = readCorpora() #print ("file Ids = ") #print wordlists.fileids() #rewriting the command so that if user has shortforms or any spelling mistake copy_words = wordlists.words("copy_words") from_words = wordlists.words("from_words") to_words = wordlists.words("to_words") page_words = wordlists.words("page_words") for index, key in enumerate(keywords): if (key in copy_words): keywords[index] = "copies" if (key in from_words): keywords[index] = "from" if (key in to_words): keywords[index] = "to" if (key in page_words): keywords[index] = "page" if (DEBUG): print("Keywords = ") print keywords result = parseValues( keywords, ''' NP: {<NN.>} CP: {<CD>} TO: {<TO>} VP: {<VB.> | <NN>} ''', True) if (DEBUG): print((result)) #result.draw() for index, res in enumerate(result.subtrees()): if res.label() == "TO": if (DEBUG): print "TO" print result[index - 2].leaves() for leaf in result[index - 2]: from_ = number(leaf[0]) if (DEBUG): print from_ if (DEBUG): try: print result[index].leaves() except: print "I didnt quite understand what you said can you rephrase your sentence?" return for leaf in result[index]: to = number(leaf[0]) if (DEBUG): print to if (DEBUG): print "TO" if res.label() == "NP": if (DEBUG): print "COPIES" try: if (DEBUG): try: print result[index - 2].leaves() except: print "I didnt quite understand what you said can you rephrase your sentence?" return for leaf in result[index - 2]: if (to != number(leaf[0])): if (leaf[1] == "CD"): copies = number(leaf[0]) if (DEBUG): print "if copies" print copies else: for leaf in result[index]: if (leaf[1] == "CD"): copies = number(leaf[0]) if (DEBUG): print "else copies" print copies except: try: for leaf in result[index]: if (leaf[1] == "CD"): copies = number(leaf[0]) if (DEBUG): print "except copies" print copies except: copies = to if (DEBUG): print "TO = COPIES" if (from_ == 0 or to == 0): print "\n" print "\n" print "\n" print "it seems like you want to print " + str( copies) + " copies of the whole document" from_ = 1 to = 1 return print "\n" print "\n" print "\n" print "it seems like you want to print " + str( copies) + " copies of the document " + "from pages " + str( from_) + " to " + str(to) print "\n" print "Have I guessed correctly :)?" FROM_ = from_ TO_ = to COPIES_ = copies
def extract_information(sentence): fromToSet = False from_ = 0 to = 0 copies = 1 keywords = preProcessText(sentence) for index,k in enumerate(keywords): 'Checking if the number given is ordinal viz. 3rd 5th etc.' keywords[index] = re.sub(r'nd$|th$|rd$|st$',"",(k)) if(DEBUG): print ("Keywords = ") print keywords if not keywords: from_ = -1 to = -1 copies = 1 d = {} d['from'] = str(from_) d['to'] = str(to) d['copies'] = str(copies) return d result = parseValues(keywords,''' NP: {<NN.>} CP: {<CD>} TO: {<TO>} VP: {<VB.> | <NN>} ''',True) if(DEBUG): print((result)) #result.draw() for index,res in enumerate(result.subtrees()): if(isSinglePageRange): if(res.label() == "CP"): if(DEBUG): print "PAGE CP" print fromToSet if(fromToSet == False): if("page" in result[index-2].leaves()[0][0]): for leaf in res.leaves(): from_ = number(leaf[0]) to = number(leaf[0]) fromToSet = True elif("page" in result[index].leaves()[0][0]): for leaf in res.leaves(): from_ = number(leaf[0]) to = number(leaf[0]) fromToSet = True else: pass else: if res.label() == "TO": if(DEBUG): print "TO" print result[index-2].leaves() for leaf in result[index-2]: from_ = number(leaf[0]) if(DEBUG): print from_ if(DEBUG): try: print result[index].leaves() except: print "I didnt quite understand what you said can you rephrase your sentence?" from_ = -1 to = -1 copies = 1 d = {} d['from'] = str(from_) d['to'] = str(to) d['copies'] = str(copies) return d for leaf in result[index]: to = number(leaf[0]) if(DEBUG): print to if(DEBUG): print "TO" if res.label() == "NP": if(DEBUG): print "COPIES" try: if(DEBUG): try: print result[index-2].leaves() except: print "I didnt quite understand what you said can you rephrase your sentence?" from_ = -1 to = -1 copies = 1 d = {} d['from'] = str(from_) d['to'] = str(to) d['copies'] = str(copies) return d for leaf in result[index-2]: if(to!=number(leaf[0])): if(leaf[1]=="CD"): copies = number(leaf[0]) if(DEBUG): print "if copies" print copies else: for leaf in result[index]: if(leaf[1]=="CD"): copies = number(leaf[0]) if(DEBUG): print "else copies" print copies except: try: for leaf in result[index]: if(leaf[1]=="CD"): copies = number(leaf[0]) if(DEBUG): print "except copies" print copies except: copies = to if(DEBUG): print "TO = COPIES" if(from_ == 0 or to == 0): print "\n" print "\n" print "\n" print "it seems like you want to print " + str(copies)+" copies of the whole document" from_=-1 to=-1 print "\n" print "\n" print "\n" print "it seems like you want to print " + str(copies) +" copies of the document " + "from pages " + str(from_) + " to " + str(to) print "\n" print "Have I guessed correctly :)?" d = {} d['from'] = str(from_) d['to'] = str(to) d['copies'] = str(copies) return d
def convertword2num(self, sent_list): """ This is the main function where numbers are extracted from their alphabetic equivalents Params: Input: sent_list - list of broken sentences Output: new_list - same list of sentences, but with numbers """ new_list = [] print(sent_list) for sent in sent_list: is_a_part = False words = [] temp_sent = [x for x in list(_timelex(sent)) if x != ' '] for ind, wrd in enumerate(temp_sent): word = '' if wrd in self.num_list + self.sim_list: if not is_a_part: is_a_part = True if wrd in self.sim_list: if ind == 0: sent = sent.replace(wrd, u'one ' + wrd) wrd = u'one ' + wrd words.append(wrd) elif number(temp_sent[ind - 1]) == 0: sent = sent.replace(wrd, u'one ' + wrd) wrd = u'one ' + wrd words.append(wrd) if ind != len(temp_sent) - 1 and \ number(temp_sent[ind]) in range(1, 21) and \ temp_sent[ind+1] in self.num_list and \ number(temp_sent[ind+1]) > 9: sent = sent.replace(wrd, wrd + u' hundred') wrd = wrd + u' hundred' words.append(wrd) elif wrd not in words: words.append(wrd) else: if ind != len(temp_sent)-1 and \ number(temp_sent[ind]) in range(1, 21) and \ temp_sent[ind+1] in self.num_list and \ number(temp_sent[ind+1]) > 9: sent = sent.replace(wrd, wrd + u' hundred') wrd = wrd + u' hundred' words.append(wrd) else: words.append(wrd) elif wrd == 'and' and temp_sent[ind - 1] in self.sim_list: if is_a_part: try: if temp_sent[ind + 1] in self.num_list: words.append(wrd) except: pass else: is_a_part = False word = " ".join([word for word in words]) word = list(_timelex(word)) word = ' '.join([x for x in word if x != ' ']) try: num = number(word) print(num) except Exception: num = word else: if word: sent = sent.replace(word, str(num)) new_list.append(sent) return new_list
print comparative('bad') print superlative('bad') # verb conjugation print lexeme('purr') print lemma('purring') print conjugate('purred', '3sg') # he / she / it print 'p' in tenses('purred') # By alias. print PAST in tenses('purred') print(PAST, 1, PL) in tenses('purred') # rule-based conjugation print 'google' in verbs.infinitives print 'googled' in verbs.inflections print conjugate('googled', tense=PARTICIPLE, parse=False) print conjugate('googled', tense=PARTICIPLE, parse=True) # quantification print number("seventy-five point two") # "seventy-five point two" => 75.2 print numerals(2.245, round=2) # 2.245 => "two point twenty-five" print quantify(['goose', 'goose', 'duck', 'chicken', 'chicken', 'chicken']) print quantify({'carrot': 100, 'parrot': 20}) print quantify('carrot', amount=1000) # spelling print suggest("parot") # n-grams print ngrams("I am eating pizza.", n=2) # bigrams print ngrams("I am eating pizza.", n=3, punctuation=".,;:!?()[]{}`''\"@#$^&*+-|=~_", continuous=False) # parser print parse( 'I eat pizza with a fork.',
from builtins import str, bytes, dict, int import os import sys sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) from builtins import str, bytes, dict, int from pattern.en import article, referenced from pattern.en import pluralize, singularize from pattern.en import comparative, superlative from pattern.en import conjugate, lemma, lexeme, tenses from pattern.en import NOUN, VERB, ADJECTIVE from pattern.en import number, numerals, quantify, reflect from pattern.en import parse, pprint, tag from pattern.en import parse, Text #''' # The singularize() function returns the singular form of a plural noun (or adjective). # It is slightly less robust than the pluralize() function. for word in [ "parts-of-speech", "children", "dogs'", "wolves", "bears", "kitchen knives", "octopodes", "matrices", "matrixes" ]: print(singularize(word)) print(singularize("our", pos=ADJECTIVE)) print("") print(number("I am two thousand five hundred and eight years old")) print(number("two point eighty-five")) print("") #'''