Exemplo n.º 1
0
 def test_number(self):
     # Assert numeric string = actual number (after rounding).
     for i in range(100):
         x = random.random()
         y = en.number(en.numerals(x, round=10))
         self.assertAlmostEqual(x, y, places=10)
     print "pattern.en.number()"
Exemplo n.º 2
0
 def test_number(self):
     # Assert numeric string = actual number (after rounding).
     for i in range(100):
         x = random.random()
         y = en.number(en.numerals(x, round=10))
         self.assertAlmostEqual(x, y, places=10)
     print("pattern.en.number()")
Exemplo n.º 3
0
# 			else:
# 				sent_tokens.append(user_response)
# 				word_tokens=word_tokens+nltk.word_tokenize(user_response)
# 				final_words=list(set(word_tokens))
# 				print("ROBO: ",end="")
# 				print(response(user_response))
# 				sent_tokens.remove(user_response)
# 	else:
# 		flag=False
# 		print("ROBO: Bye! take care..")

# ===============================================IDENTIFY ORDINAL NUMBER======================================================
import spacy
from spacy import displacy
import en_core_web_sm

nlp = en_core_web_sm.load()

doc = nlp(
    'European authorities fined Google a record $5.1 billion on Wednesday for abusing its power in the mobile phone market and ordered the company to alter its practices'
)
print([(X, X.ent_iob_, X.ent_type_) for X in doc])
print([(X.text, X.label_) for X in doc.ents])

[(x.orth_, x.pos_, x.lemma_)
 for x in [y for y in doc if not y.is_stop and y.pos_ != 'PUNCT']]

from pattern.en import number

number('two thousand fifty and a half')
Exemplo n.º 4
0
print(modality(sent))

# ### Spelling Corrections

from pattern.en import suggest

print(suggest("Whitle"))

from pattern.en import suggest
print(suggest("Fracture"))

# ### Working with Numbers

from pattern.en import number, numerals

print(number("one hundred and twenty two"))
print(numerals(256.390, round=2))

from pattern.en import quantify

print(
    quantify([
        'apple', 'apple', 'apple', 'banana', 'banana', 'banana', 'mango',
        'mango'
    ]))

from pattern.en import quantify

print(quantify({'strawberry': 200, 'peach': 15}))
print(quantify('orange', amount=1200))
Exemplo n.º 5
0
import os, sys; sys.path.insert(0, os.path.join("..", ".."))

from pattern.en import number, numerals, quantify, reflect

# The number() command returns an int or float from a written representation.
# This is useful, for example, in combination with a parser 
# to transform "CD" parts-of-speech to actual numbers.
# The algorithm ignores words that aren't recognized as numerals.
print number("two thousand five hundred and eight")
print number("two point eighty-five")
print

# The numerals() command returns a written representation from an int or float.
print numerals(1.249, round=2)
print numerals(1.249, round=3)
print

# The quantify() commands uses pluralization + approximation to enumerate words.
# This is useful to generate a human-readable summary of a set of strings.
print quantify(["goose", "goose", "duck", "chicken", "chicken", "chicken"])
print quantify(["penguin", "polar bear"])
print quantify(["carrot"] * 1000)
print quantify("parrot", amount=1000)
print quantify({"carrot": 100, "parrot": 20})
print

# The quantify() command only works with words (strings).
# To quantify a set of Python objects, use reflect().
# This will first create a human-readable name for each object and then quantify these.
print reflect([0, 1, {}, False, reflect])
print reflect(os.path)
Exemplo n.º 6
0
from __future__ import print_function
from __future__ import unicode_literals

from builtins import str, bytes, dict, int

import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))

from pattern.en import number, numerals, quantify, reflect

# The number() command returns an int or float from a written representation.
# This is useful, for example, in combination with a parser
# to transform "CD" parts-of-speech to actual numbers.
# The algorithm ignores words that aren't recognized as numerals.
print(number("two thousand five hundred and eight"))
print(number("two point eighty-five"))
print("")

# The numerals() command returns a written representation from an int or float.
print(numerals(1.249, round=2))
print(numerals(1.249, round=3))
print("")

# The quantify() commands uses pluralization + approximation to enumerate words.
# This is useful to generate a human-readable summary of a set of strings.
print(quantify(["goose", "goose", "duck", "chicken", "chicken", "chicken"]))
print(quantify(["penguin", "polar bear"]))
print(quantify(["carrot"] * 1000))
print(quantify("parrot", amount=1000))
print(quantify({"carrot": 100, "parrot": 20}))
Exemplo n.º 7
0
def extract_information(sentence):
    from_ = 0
    to = 0
    copies = 1
    keywords = preProcessText(sentence)
    for index, k in enumerate(keywords):
        'Checking if the number given is ordinal viz. 3rd 5th etc.'
        keywords[index] = re.sub(r'nd$|th$|rd$|st$', "", (k))
    wordlists = readCorpora()
    #print ("file Ids = ")
    #print wordlists.fileids()

    #rewriting the command so that if user  has shortforms or any spelling mistake
    copy_words = wordlists.words("copy_words")
    from_words = wordlists.words("from_words")
    to_words = wordlists.words("to_words")
    page_words = wordlists.words("page_words")

    for index, key in enumerate(keywords):
        if (key in copy_words):
            keywords[index] = "copies"

        if (key in from_words):
            keywords[index] = "from"
        if (key in to_words):
            keywords[index] = "to"
        if (key in page_words):
            keywords[index] = "page"

    if (DEBUG):
        print("Keywords = ")
        print keywords

    result = parseValues(
        keywords, '''
            NP: {<NN.>}
            CP: {<CD>}
            TO: {<TO>}
            VP: {<VB.> | <NN>}
            ''', True)
    if (DEBUG):
        print((result))
    #result.draw()
    for index, res in enumerate(result.subtrees()):
        if res.label() == "TO":
            if (DEBUG):
                print "TO"
                print result[index - 2].leaves()
            for leaf in result[index - 2]:
                from_ = number(leaf[0])
                if (DEBUG):
                    print from_
            if (DEBUG):
                try:
                    print result[index].leaves()
                except:
                    print "I didnt quite understand what you said can you rephrase your sentence?"
                    return
            for leaf in result[index]:
                to = number(leaf[0])
                if (DEBUG):
                    print to
            if (DEBUG):
                print "TO"
        if res.label() == "NP":
            if (DEBUG):
                print "COPIES"
            try:
                if (DEBUG):
                    try:
                        print result[index - 2].leaves()
                    except:
                        print "I didnt quite understand what you said can you rephrase your sentence?"
                        return
                for leaf in result[index - 2]:
                    if (to != number(leaf[0])):
                        if (leaf[1] == "CD"):
                            copies = number(leaf[0])
                            if (DEBUG):
                                print "if copies"
                                print copies
                    else:
                        for leaf in result[index]:
                            if (leaf[1] == "CD"):
                                copies = number(leaf[0])
                                if (DEBUG):
                                    print "else copies"
                                    print copies
            except:
                try:
                    for leaf in result[index]:
                        if (leaf[1] == "CD"):
                            copies = number(leaf[0])
                            if (DEBUG):
                                print "except copies"
                                print copies
                except:
                    copies = to
                    if (DEBUG):
                        print "TO = COPIES"

    if (from_ == 0 or to == 0):
        print "\n"
        print "\n"
        print "\n"
        print "it seems like you want to print " + str(
            copies) + " copies of the whole document"
        from_ = 1
        to = 1
        return
    print "\n"
    print "\n"
    print "\n"
    print "it seems like you want to print " + str(
        copies) + " copies of the document " + "from pages " + str(
            from_) + " to " + str(to)
    print "\n"
    print "Have I guessed correctly :)?"
    FROM_ = from_
    TO_ = to
    COPIES_ = copies
Exemplo n.º 8
0
def extract_information(sentence):
    fromToSet = False
    from_ = 0
    to = 0
    copies = 1
    keywords = preProcessText(sentence)
    for index,k in enumerate(keywords):
        'Checking if the number given is ordinal viz. 3rd 5th etc.'
        keywords[index] = re.sub(r'nd$|th$|rd$|st$',"",(k))
    

    if(DEBUG):
        print ("Keywords = ")
        print keywords
    if not keywords:
        from_ = -1
        to = -1
        copies = 1
        d = {}
        d['from'] = str(from_)
        d['to'] = str(to)
        d['copies'] = str(copies)
        return d
    result = parseValues(keywords,'''
            NP: {<NN.>}
            CP: {<CD>}
            TO: {<TO>}
            VP: {<VB.> | <NN>}
            ''',True)
    if(DEBUG):
        print((result))
    #result.draw()
    for index,res in enumerate(result.subtrees()):
        
        if(isSinglePageRange):
            if(res.label() == "CP"):
                if(DEBUG):
                    print "PAGE CP"
                    print fromToSet
                    if(fromToSet == False):
                        if("page" in result[index-2].leaves()[0][0]):
                            for leaf in res.leaves():
                                from_ = number(leaf[0])
                                to = number(leaf[0])
                            fromToSet = True
                        elif("page" in result[index].leaves()[0][0]):
                            for leaf in res.leaves():
                                from_ = number(leaf[0])
                                to = number(leaf[0])
                            fromToSet = True
                        else:
                            pass
        else:
            if res.label() == "TO":
                if(DEBUG):
                    print "TO" 
                    print result[index-2].leaves()
                for leaf in result[index-2]:
                    from_ = number(leaf[0])
                    if(DEBUG):
                        print from_
                if(DEBUG):
                    try:
                        print result[index].leaves()
                    except:
                        print "I didnt quite understand what you said can you rephrase your sentence?"
                        from_ = -1
                        to = -1
                        copies = 1
                        d = {}
                        d['from'] = str(from_)
                        d['to'] = str(to)
                        d['copies'] = str(copies)
                        return d
                for leaf in result[index]:
                    to = number(leaf[0])
                    if(DEBUG):
                        print to
                if(DEBUG):
                    print "TO"
        if res.label() == "NP":
            if(DEBUG):
                print "COPIES"
            try: 
                if(DEBUG):
                    try:
                        print result[index-2].leaves()
                    except:
                        print "I didnt quite understand what you said can you rephrase your sentence?"
                        from_ = -1
                        to = -1
                        copies = 1
                        d = {}
                        d['from'] = str(from_)
                        d['to'] = str(to)
                        d['copies'] = str(copies)
                        return d
                for leaf in result[index-2]:
                    if(to!=number(leaf[0])):
                        if(leaf[1]=="CD"):
                            copies = number(leaf[0])
                            if(DEBUG):
                                print "if copies"
                                print copies
                    else:
                        for leaf in result[index]:
                            if(leaf[1]=="CD"):
                                copies = number(leaf[0])
                                if(DEBUG):
                                    print "else copies"
                                    print copies
            except:
                    try:
                        for leaf in result[index]:
                            if(leaf[1]=="CD"):
                                copies =  number(leaf[0])
                                if(DEBUG):
                                    print "except copies"
                                    print copies
                    except:
                        copies = to
                        if(DEBUG):
                            print "TO = COPIES"
    
    if(from_ == 0 or to == 0):
        print "\n"
        print "\n"
        print "\n"
        print "it seems like you want to print " + str(copies)+" copies of the whole document"
        from_=-1
        to=-1
    print "\n"
    print "\n"
    print "\n"
    print "it seems like you want to print " + str(copies) +" copies of the document " + "from pages " + str(from_) + " to " + str(to)
    print "\n"
    print "Have I guessed correctly :)?"
    d = {}
    d['from'] = str(from_)
    d['to'] = str(to)
    d['copies'] = str(copies)
    return d
Exemplo n.º 9
0
    def convertword2num(self, sent_list):
        """
        This is the main function where numbers are extracted from
        their alphabetic equivalents
        Params:
            Input:
                sent_list - list of broken sentences
            Output:
                new_list - same list of sentences, but with numbers
        """
        new_list = []
        print(sent_list)
        for sent in sent_list:
            is_a_part = False
            words = []
            temp_sent = [x for x in list(_timelex(sent)) if x != ' ']
            for ind, wrd in enumerate(temp_sent):
                word = ''
                if wrd in self.num_list + self.sim_list:
                    if not is_a_part:
                        is_a_part = True
                        if wrd in self.sim_list:
                            if ind == 0:
                                sent = sent.replace(wrd, u'one ' + wrd)
                                wrd = u'one ' + wrd
                                words.append(wrd)
                            elif number(temp_sent[ind - 1]) == 0:
                                sent = sent.replace(wrd, u'one ' + wrd)
                                wrd = u'one ' + wrd
                                words.append(wrd)
                        if ind != len(temp_sent) - 1 and \
                        number(temp_sent[ind]) in range(1, 21) and \
                        temp_sent[ind+1] in self.num_list and \
                        number(temp_sent[ind+1]) > 9:
                            sent = sent.replace(wrd, wrd + u' hundred')
                            wrd = wrd + u' hundred'
                            words.append(wrd)
                        elif wrd not in words:
                            words.append(wrd)
                    else:
                        if ind != len(temp_sent)-1 and \
                        number(temp_sent[ind]) in range(1, 21) and \
                        temp_sent[ind+1] in self.num_list and \
                        number(temp_sent[ind+1]) > 9:
                            sent = sent.replace(wrd, wrd + u' hundred')
                            wrd = wrd + u' hundred'
                            words.append(wrd)
                        else:
                            words.append(wrd)
                elif wrd == 'and' and temp_sent[ind - 1] in self.sim_list:
                    if is_a_part:
                        try:
                            if temp_sent[ind + 1] in self.num_list:
                                words.append(wrd)
                        except:
                            pass
                else:
                    is_a_part = False

            word = " ".join([word for word in words])
            word = list(_timelex(word))
            word = ' '.join([x for x in word if x != ' '])
            try:
                num = number(word)
                print(num)
            except Exception:
                num = word
            else:
                if word:
                    sent = sent.replace(word, str(num))
            new_list.append(sent)

        return new_list
Exemplo n.º 10
0
print comparative('bad')
print superlative('bad')
# verb conjugation
print lexeme('purr')
print lemma('purring')
print conjugate('purred', '3sg')  # he / she / it
print 'p' in tenses('purred')  # By alias.
print PAST in tenses('purred')
print(PAST, 1, PL) in tenses('purred')
# rule-based conjugation
print 'google' in verbs.infinitives
print 'googled' in verbs.inflections
print conjugate('googled', tense=PARTICIPLE, parse=False)
print conjugate('googled', tense=PARTICIPLE, parse=True)
# quantification
print number("seventy-five point two")  # "seventy-five point two" => 75.2
print numerals(2.245, round=2)  # 2.245 => "two point twenty-five"
print quantify(['goose', 'goose', 'duck', 'chicken', 'chicken', 'chicken'])
print quantify({'carrot': 100, 'parrot': 20})
print quantify('carrot', amount=1000)
# spelling
print suggest("parot")
# n-grams
print ngrams("I am eating pizza.", n=2)  # bigrams
print ngrams("I am eating pizza.",
             n=3,
             punctuation=".,;:!?()[]{}`''\"@#$^&*+-|=~_",
             continuous=False)
# parser
print parse(
    'I eat pizza with a fork.',
from builtins import str, bytes, dict, int

import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
from builtins import str, bytes, dict, int
from pattern.en import article, referenced
from pattern.en import pluralize, singularize
from pattern.en import comparative, superlative
from pattern.en import conjugate, lemma, lexeme, tenses
from pattern.en import NOUN, VERB, ADJECTIVE
from pattern.en import number, numerals, quantify, reflect
from pattern.en import parse, pprint, tag
from pattern.en import parse, Text
#'''
# The singularize() function returns the singular form of a plural noun (or adjective).
# It is slightly less robust than the pluralize() function.
for word in [
        "parts-of-speech", "children", "dogs'", "wolves", "bears",
        "kitchen knives", "octopodes", "matrices", "matrixes"
]:
    print(singularize(word))
print(singularize("our", pos=ADJECTIVE))
print("")

print(number("I am two thousand five hundred and eight years old"))
print(number("two point eighty-five"))
print("")
#'''