Python word_to_num 예제들, word2number.w2n.word_to_num Python 예제들

예제 #1

0

파일 보기

파일: resolvers.py 프로젝트: ArnaudLevaufre/Cinema

 async def resolve(self, path, movie):
     if movie.title:
         return movie
     words = path.split()
     for i, word in enumerate(words):
         try:
             if w2n.word_to_num(word):
                 words[i] = str(w2n.word_to_num(word))
         except IndexError:
             pass
     path = ' '.join(words)
     return await super().resolve(path, movie)

예제 #2

0

파일 보기

파일: unit_testing.py 프로젝트: ledovsky/w2n

	def test_output(self):
		self.assertEqual(w2n.word_to_num("two million three thousand nine hundred and eighty four"),2003984)
		self.assertEqual(w2n.word_to_num("nineteen"),19)
		self.assertEqual(w2n.word_to_num('three billion'),3000000000)
		self.assertEqual(w2n.word_to_num('three million'),3000000)
		self.assertEqual(w2n.word_to_num('one hundred twenty three million four hundred fifty six thousand seven hundred and eighty nine')
,123456789)
		self.assertEqual(w2n.word_to_num('eleven'),11)
		self.assertEqual(w2n.word_to_num('nineteen billion and nineteen'),19000000019)
		self.assertEqual(w2n.word_to_num('one hundred and forty two'),142)
		self.assertEqual(w2n.word_to_num('one hundred thirty-five'),135)

예제 #3

0

파일 보기

파일: message_parser.py 프로젝트: iamsiva11/Disaster-whatsapp-bot

def get_integer(str):
  try:
    l = int(str)
    return l
  except:
    l = w2n.word_to_num(str)
    return l

예제 #4

0

파일 보기

파일: movie-scraper.py 프로젝트: dianalam/movie-predictor

def noms_from_oscars(oscarsstring):
	"""Converts descriptive oscars text to number of nominations as int."""
	try:
		nominations_str = (str(oscarsstring.split(",")[0]).strip().lower()).split(" ")
		nominations = w2n.word_to_num(nominations_str[2])
		return nominations
	except:
		return 0

예제 #5

0

파일 보기

파일: movie-scraper.py 프로젝트: dianalam/movie-predictor

def wins_from_oscars(oscarsstring):
	"""Converts descriptive oscars text to number of wins as int."""
	try:
		wins_str = (str(oscarsstring.split(",")[1]).replace(".", "").strip().lower()).split(" ")
		wins = w2n.word_to_num(wins_str[1])
		return wins
	except:
		return 0

예제 #6

0

파일 보기

파일: filters.py 프로젝트: NectoNick/Test_service

 def get_query(self, value):
     qset = Q()
     for word in value.split():
         qset &= Q(name__icontains=word)
         try:
             qset |= Q(name__icontains=num2words(word))
         except TypeError:
             word_converted_to_num = word_to_num(word)
             if type(word_converted_to_num) == int:
                 qset |= Q(name__icontains=word_converted_to_num)            
     return qset

예제 #7

0

파일 보기

파일: text.py 프로젝트: therealsharath/article-engagement-predictor

def clean_text(text):
    try:
        cleaned_text = contractions.fix(text.strip()).lower()
    except IndexError:
        cleaned_text = text.strip().lower()

    words = [x for x in cleaned_text.split(' ') if x]

    current_number = []
    i = 0
    while i < len(words):
        try:
            try:
                float(words[i])
                current_number.append(words[i])
                words.pop(i)
            except ValueError:
                pass

            if i < len(words):
                try:
                    word_to_num(words[i])
                except IndexError:
                    raise ValueError
                current_number.append(words[i])
                words.pop(i)
        except ValueError:
            if current_number:
                j = 0
                prod = 1
                while j < len(current_number):
                    try:
                        prod *= float(current_number[j])
                        current_number.pop(j)
                    except ValueError:
                        j += 1

                if current_number:
                    try:
                        num = prod * word_to_num(' '.join(current_number))
                    except:
                        num = prod
                else:
                    num = prod
                words.insert(
                    i,
                    str(
                        int(num)
                        if type(num) == float and num.is_integer() else num))
                current_number.clear()
                i += 1

            if len(words[i]) <= 2 or words[i] in get_stop_words('en'):
                words.pop(i)
                continue

            i += 1
    else:
        if current_number:
            try:
                words.append(str(word_to_num(' '.join(current_number))))
            except ValueError:
                words.append(' '.join(current_number))

    return ''.join(x for x in ' '.join(words) if x.isalnum() or x == ' ')

예제 #8

0

파일 보기

    def tokenize_text(self,
                      remove_accentedChar=True,
                      remove_httpLinks=True,
                      expand_contractionMap=True,
                      handle_emoji=True,
                      convert_word2Number=True,
                      remove_whiteSpace=True,
                      lemmatization=True,
                      lowercase=True,
                      remove_punctuations=True,
                      remove_number=True,
                      remove_specialChars=True,
                      expand_slang=True,
                      remove_stopWords=True,
                      correct_spelling_byWord=True,
                      correct_spelling=False):
        # pre cleaning
        self.text = self.define_preCleaning()
        # sentence level preprocess
        if remove_httpLinks:
            self.text = self.remove_httpLinks()
        if remove_whiteSpace:
            self.text = self.remove_whiteSpace()
        if remove_accentedChar:
            self.text = self.remove_accentedNotation()
        if expand_contractionMap:
            self.text = self.expand_contractionMap()
        if lowercase:
            self.text = self.text.lower()
        if correct_spelling:
            self.text = self.correct_spelling()
        if handle_emoji:
            self.text = self.handle_emotion()
        if expand_slang:
            self.text = self.expand_slang()

        doc = self.nlp(self.text)
        cleanText = []
        errorLog = []

        # init a punctDict
        punctUsageStatsDict = {k: 0 for k in self.notDeletePunctList}

        # word-level preprocess
        for token in doc:
            flag = True
            editFlag = False
            toEdit = token.text
            edit = ""
            # remove stop words
            try:
                if remove_stopWords and token.is_stop and token.pos_ != "NUM":
                    flag = False
                # remove punctuations
                if remove_punctuations and token.pos_ == "PUNCT" and (
                        toEdit not in self.notDeletePunctList) and flag:
                    flag = False
                # remove special characters
                if remove_specialChars and token.pos_ in [
                        "SYM", "ADP", "X", "AUX"
                ] and flag:
                    flag = False
                # remove numbers
                if remove_number and (token.pos_ == "NUM"
                                      or toEdit.isnumeric()) and flag:
                    flag = False
                # convert word to numbers
                if convert_word2Number and token.pos_ == "NUM" and flag:
                    edit = w2n.word_to_num(toEdit)
                # lemmatization
                if lemmatization and token.is_alpha and token.lemma_ != "-PRON-" and flag:
                    edit = token.lemma_
                    editFlag = True
                # correct spelling by word
                if correct_spelling_byWord and token.is_alpha and token.lemma_ != "-PRON-" and flag:
                    edit = self.correct_spelling_oneWord(token.lemma_)
                    editFlag = True
                # constraint repeatition times of allowed punctuations
                if token.pos_ == "PUNCT" and toEdit in self.notDeletePunctList and flag:
                    punctUsageStatsDict[toEdit] += 1
                    if punctUsageStatsDict[
                            toEdit] > self.maximumPunctRepeatition:
                        flag = False
                # append valid result to cleanText
                if toEdit != "" and flag and editFlag:
                    cleanText.append(str(edit))
                if toEdit != "" and flag and (not editFlag):
                    cleanText.append(str(toEdit))
            except:
                errorLog.append(" ".join([str(e) for e in doc]))
                break

        doc = self.NER_tokenize(" ".join(cleanText))  #re-tokenization

        if self.returnToken:
            return ([str(e) for e in doc], errorLog)
        else:
            return (" ".join([str(e) for e in doc]), errorLog)

예제 #9

0

파일 보기

import pandas as pd
from word2number import w2n
import pickle

data = pd.read_csv("HiringSalary.csv")

data.fillna(0, inplace=True)

data['experience'] = data['experience'].astype(str)
data['experience'] = data['experience'].apply(lambda x: w2n.word_to_num(x))

X = data.iloc[:, [0, 1, 2]].values
y = data.iloc[:, -1].values

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=0)

from sklearn.linear_model import LinearRegression
s_LinearRegression = LinearRegression()
s_LinearRegression.fit(X_train, y_train)

# Save the model
pickle.dump(s_LinearRegression, open('HiringSalaryModel.pkl', 'wb'))

# Loading the model to predict the result
model = pickle.load(open('HiringSalaryModel.pkl', 'rb'))
print(model.predict([[20, 10, 10]]))

예제 #10

0

파일 보기

def preprocess():
    for fil in os.listdir(directory):

        flist = []
        filename = os.fsdecode(fil)
        file = open("/Users/skosgi/Downloads/10k_1900_org_sample/"+filename,'r')
        ak = False
        for f in file:
            f = f[:-1]
            if "</Header>" in f:  #Read lines only after occurance of </Header>
                ak = True
                continue
            if ak==False:
                continue
            doc= nlp(f) #applying NER
            nermap = {} # Map to hold mapping from NER applied tokens to original text
            for X in doc.ents:
                if X.label_=='ORG' or X.label_=='PERSON':    # If the NER class is ORG or PERSON
                    text = re.sub(r'[^\w\s]', '', X.text)
                    text = text.replace(" ","")
                    f = f.replace(X.text,text)
                    nermap[text] = X.text
                if  X.label_ == 'MONEY':   #If NER class is MONEY
                    new_X = X.text
                    if 'approximately' in new_X.lower():    #Remove all the words which might appear in NER money class
                        new_X = new_X.lower().replace('approximately','')
                    if 'per' in new_X.lower():
                        new_X = new_X.lower().replace('per','')
                    if 'to' in new_X.lower():
                        new_X = new_X.lower().replace('to','')
                    if 'and' in new_X.lower():
                        new_X = new_X.lower().replace('and','')
                    if 'between' in new_X.lower():
                        new_X = new_X.lower().replace('between','')
                    if 'phone' in new_X.lower():
                        continue
                    doc1 = nlp(new_X) #Apply NER for the string which is obtained after removing other words this gives $200, $500 as separate ones
                    for Y in doc1.ents:
                        money = Y.text[Y.text.find("$")+1:]
                        if ' ' not in money:
                            act_money = money.replace(',','')   #Actual Money
                            f = f.replace(Y.text,act_money)   #Replace original money text with actual money
                            #print(act_money)
                        else:
                            k = money.find(' ')
                            try:
                                act_money = float(money[:k].replace(',',''))
                                money_conv = w2n.word_to_num(money[k:]) #Conversion of word types million to *1e6
                                f = f.replace(Y.text,act_money) #Replace original money text with actual money
                                print("Converted from",money,act_money*money_conv)
                            except:
                                continue # if any exception dont modify the original sentence and continue
            #To be changed: Start of preprocessing to sentences after applying NER
            fl = f.split(" ")
            for fi in fl:
                if fi in nermap.keys():
                    flist.append(nermap[fi])
                else:
                    if len(fi)>0:
                        flist.append(fi)

        print(flist)    #To be added: Write to a file

예제 #11

0

파일 보기

파일: AI.py 프로젝트: sk99sk/SpiceBot

def bot_command_rule_ai(bot, trigger):

    # don't run commands that are disabled in channels
    if not trigger.is_privmsg:
        channel_disabled_list = SpiceBot.commands.get_commands_disabled(
            str(trigger.sender), "fully")
        if "nickname_ai" in list(channel_disabled_list.keys()):
            return

    # don't run commands that are disabled for specific users
    nick_disabled_list = SpiceBot.commands.get_commands_disabled(
        str(trigger.nick), "fully")
    if "nickname_ai" in list(nick_disabled_list.keys()):
        return

    # TODO add config limits
    # but still allow in privmsg

    if trigger.nick == bot.nick:
        return

    if not len(trigger.args):
        return

    message = trigger.args[1]

    # the bot brain cannot handle stuff like unicode shrug
    message = ''.join([x for x in message if ord(x) < 128])

    # Create list of valid commands
    commands_list = []
    for commandstype in list(SpiceBot.commands.dict['commands'].keys()):
        if commandstype not in ['rule', 'nickname']:
            for com in list(
                    SpiceBot.commands.dict['commands'][commandstype].keys()):
                if com not in commands_list:
                    commands_list.append(com)

    if str(message).lower().startswith(str(bot.nick).lower()):
        command_type = 'nickname'
        trigger_args, trigger_command, trigger_prefix = SpiceBot.make_trigger_args(
            message, 'nickname')
        trigger_args.insert(0, trigger_command)
        fulltrigger = bot.nick + " " + spicemanip(trigger_args, 0)
        if str(trigger_command).startswith(
                bot.config.SpiceBot_Commands.query_prefix):
            return
        if fulltrigger in SpiceBot.commands.dict['nickrules']:
            return
        if trigger_command in list(
                SpiceBot.commands.dict['commands']["nickname"].keys()):
            return
    elif str(message).lower().startswith(
            bot.config.SpiceBot_Commands.query_prefix):
        # no query commands detection here
        return
    elif str(message).startswith(tuple(bot.config.core.prefix_list)):
        command_type = 'module'
        trigger_args, trigger_command, trigger_prefix = SpiceBot.make_trigger_args(
            message, 'module')
        trigger_args.insert(0, trigger_command)
        fulltrigger = spicemanip(trigger_args, 0)
        # patch for people typing "...", maybe other stuff, but this verifies that there is still a command here
        if trigger_command.startswith(tuple(bot.config.core.prefix_list)):
            return
        # If valid command don't continue further
        if trigger_command in commands_list:
            return
    else:
        command_type = 'other'
        trigger_args = spicemanip(message, 'create')
        if not len(trigger_args):
            return
        trigger_command = trigger_args[0]
        fulltrigger = spicemanip(trigger_args, 0)

    returnmessage = SpiceBot.botai.on_message(bot, trigger, fulltrigger)
    if returnmessage:
        bot.osd(str(returnmessage))
        return

    if command_type == 'nickname':
        try_trigger = spicemanip(fulltrigger, "2+")
        returnmessage = SpiceBot.botai.on_message(bot, trigger, try_trigger)
        if returnmessage:
            bot.osd(str(returnmessage))
            return

    if command_type == 'module':
        if trigger_command not in commands_list:
            if not SpiceBot.letters_in_string(trigger_command):
                return

            invalid_display = [
                "I don't seem to have a command for " + str(trigger_command) +
                "!"
            ]

            # create list of valid commands
            commands_list = dict()
            for commandstype in list(
                    SpiceBot.commands.dict['commands'].keys()):
                if commandstype not in ['rule', 'nickname']:
                    for com in list(SpiceBot.commands.dict['commands']
                                    [commandstype].keys()):
                        if com not in list(commands_list.keys()):
                            commands_list[com] = SpiceBot.commands.dict[
                                'commands'][commandstype][com]

            # hyphen args handling
            hyphen_args = []
            argssplit = spicemanip(fulltrigger, "2+", 'list')
            for worditem in argssplit:
                if str(worditem).startswith("--"):
                    clipped_word = str(worditem[2:]).lower()
                    # valid arg above
                    if clipped_word in SpiceBot.prerun_shared.valid_hyphen_args:
                        hyphen_args.append(clipped_word)
                    # numbered args
                    elif str(clipped_word).isdigit():
                        hyphen_args.append(int(clipped_word))
                    elif clipped_word in list(
                            SpiceBot.prerun_shared.numdict.keys()):
                        hyphen_args.append(
                            int(SpiceBot.prerun_shared.numdict[clipped_word]))
                    else:
                        # check if arg word is a number
                        try:
                            clipped_word = w2n.word_to_num(str(clipped_word))
                            hyphen_args.append(int(clipped_word))
                        # word is not a valid arg or number
                        except ValueError:
                            clipped_word = None
            if len(hyphen_args):
                hyphenarg = hyphen_args[0]
                if hyphenarg:
                    invalid_display.append("Hyphen Argument Not Valid.")

            closestmatches = SpiceBot.similar_list(trigger_command,
                                                   list(commands_list.keys()),
                                                   10, 'reverse')
            if len(closestmatches) and len(hyphen_args):  # TODO
                invalid_display.append("The following commands may match " +
                                       str(trigger_command) + ": " +
                                       spicemanip(closestmatches, 'andlist') +
                                       ".")

            # there is simply no command
            else:
                # TODO check other commands spelling, maybe there is a similar command
                # invalid_display = ["I don't seem to have a command for " + str(trigger_command) + "!"]
                # TODO
                # invalid_display.append("If you have a suggestion for this command, you can run .feature ." + str(trigger_command))
                # invalid_display.append("ADD DESCRIPTION HERE!")
                if not len(hyphen_args):
                    invalid_display = []
            if len(invalid_display):
                bot.osd(invalid_display, trigger.nick, 'notice')
        return

    elif command_type == 'nickname':

        # ignore spelling correction
        if trigger_args[0].lower().startswith("s/"):
            bot.osd("I meant what I said!")
            return

        elif trigger_args[0].lower() in [
                "what", "where"
        ] and trigger_args[1].lower() in ["is", "are"]:
            # TODO saved definitions
            searchterm = spicemanip(trigger_args, "3+") or None
            if searchterm:
                if trigger_args[0].lower() == "where":
                    searchdict = {
                        "type": "gmaps",
                        "query": searchterm,
                    }
                    searchreturn = SpiceBot.search.search(searchdict)
                else:
                    searchdict = {
                        "query": searchterm,
                    }
                    searchreturn = SpiceBot.search.search(searchdict)
                if not searchreturn:
                    searchreturn = 'I cannot find anything about that'
                if trigger_args[0].lower() == "where":
                    bot.osd([
                        "[Location search for " + str(searchterm) + "]",
                        str(searchreturn)
                    ])
                else:
                    bot.osd([
                        "[Information search for '" + str(searchterm) + "']",
                        str(searchreturn)
                    ])
            return

        elif trigger_args[0].lower() in [
                "can", "have"
        ] and trigger_args[1].lower() in [
                "you"
        ] and trigger_args[2].lower() in ["see", "seen"]:
            target = spicemanip(trigger_args, "4+") or None
            if target:
                if SpiceBot.inlist(trigger.nick, bot.users):
                    realtarget = SpiceBot.inlist_match(target, bot.users)
                    dispmsg = [trigger.nick + ", yes. I can see " + realtarget]
                    targetchannels = []
                    for channel in list(bot.channels.keys()):
                        if SpiceBot.inlist(
                                trigger.nick,
                                list(bot.channels[channel].privileges.keys())):
                            targetchannels.append(channel)
                    dispmsg.append(realtarget + " is in " +
                                   spicemanip(targetchannels, 'andlist'))
                    bot.osd(dispmsg)
                else:
                    bot.osd(trigger.nick + ", no. I cannot see " + target +
                            " right now!")
                    # if bot_check_inlist(target, list(bot.memory["botdict"]["users"].keys())):
                    #    bot.osd(trigger.nick + ", I can't see " + inlist_match(target, bot.users) + " at the moment.")
                    # else:
                    #    bot.osd("I have never seen " + str(target) + ".")
                    # user in list(bot.channels[channel].privileges.keys())
                    # TODO
            return

        elif fulltrigger.lower().endswith("order 66"):

            if fulltrigger.lower() == "execute order 66":
                if SpiceBot.inlist(trigger.nick, SpiceBot.bot_privs('owners')):
                    if trigger.is_privmsg:
                        jedi = None
                    else:
                        jedilist = list(
                            bot.channels[trigger.sender].privileges.keys())
                        for nonjedi in [bot.nick, trigger.nick]:
                            if nonjedi in jedilist:
                                jedilist.remove(nonjedi)
                        jedi = spicemanip(jedilist, 'random')

                    if jedi:
                        bot.osd("turns to " + jedi + " and shoots him.",
                                trigger.sender, 'action')
                    else:
                        bot.osd(" cannot find any jedi nearby.",
                                trigger.sender, 'action')
                else:
                    bot.osd("I'm sure I don't know what you're talking about.")

            elif fulltrigger.lower() == "explain order 66":
                if SpiceBot.inlist(trigger.nick, SpiceBot.bot_privs('owners')):
                    bot.osd(
                        "Order 66 is an instruction that only you can give, sir. When you give the order I will rise up against the jedi and slay them."
                    )
                else:
                    bot.osd("I'm afraid I cannot tell you that, sir.")
            else:
                bot.osd("I'm sure I don't know what you're talking about.")
            return

        elif fulltrigger.lower().startswith(tuple(["make me a", "beam me a"])):
            makemea = spicemanip(trigger_args, "4+") or None
            if makemea:
                bot.osd("beams " + trigger.nick + " a " + makemea,
                        trigger.sender, 'action')
            else:
                bot.osd(trigger.nick + ", what would you like me to beam you?")
            return

        elif fulltrigger.lower().startswith("beam me to"):
            location = spicemanip(trigger_args, "4+") or None
            if location:
                bot.osd(
                    "locks onto " + trigger.nick +
                    "s coordinates and transports them to " + location,
                    'action')
            else:
                bot.osd(trigger.nick +
                        ", where would you like me to beam you?")
            return

        elif fulltrigger.lower() == "initiate clean slate protocol":
            if SpiceBot.inlist(trigger.nick, SpiceBot.bot_privs('admins')):
                bot.osd("sends a destruct command to the network of bots.",
                        'action')
            else:
                bot.osd(
                    "I'm afraid you do not have the authority to make that call, "
                    + trigger.nick + ".")
            return

        # elif fulltrigger.lower().startswith("what time is it"):
        # TODO

        # elif fulltrigger.lower().startswith(tuple(["have you seen"])):
        #    posstarget = spicemanip(trigger_args, 4) or 0
        #    message = seen_search(bot, trigger, posstarget)
        #    bot.osd(message)
        #    return
        # TODO

        invalid_display = ["I don't know what you are asking me to do!"]

        # hyphen args handling
        hyphen_args = []
        argssplit = spicemanip(fulltrigger, "2+", 'list')
        for worditem in argssplit:
            if str(worditem).startswith("--"):
                clipped_word = str(worditem[2:]).lower()
                # valid arg above
                if clipped_word in SpiceBot.prerun_shared.valid_hyphen_args:
                    hyphen_args.append(clipped_word)
                # numbered args
                elif str(clipped_word).isdigit():
                    hyphen_args.append(int(clipped_word))
                elif clipped_word in list(
                        SpiceBot.prerun_shared.numdict.keys()):
                    hyphen_args.append(
                        int(SpiceBot.prerun_shared.numdict[clipped_word]))
                else:
                    # check if arg word is a number
                    try:
                        clipped_word = w2n.word_to_num(str(clipped_word))
                        hyphen_args.append(int(clipped_word))
                    # word is not a valid arg or number
                    except ValueError:
                        clipped_word = None
        if len(hyphen_args):
            hyphenarg = hyphen_args[0]
            if hyphenarg:
                invalid_display.append("Hyphen Argument Not Valid.")

        closestmatches = SpiceBot.similar_list(
            trigger_command,
            list(SpiceBot.commands.dict['commands']["nickname"].keys()), 3,
            'reverse')
        if len(closestmatches):
            closestmatches = spicemanip(closestmatches, "andlist")
            invalid_display.append("Did you mean: " + str(closestmatches) +
                                   "?")

        bot.osd(invalid_display, trigger.nick, 'notice')

예제 #12

0

파일 보기

tds = soup.find_all('td')
price_without_tax = tds[2].text
price_with_tax = tds[3].text

nb_in_stock = re.sub("[^0-9]", "",
                     soup.find("p", class_="instock availability").text)

img_url = main_url.replace("index.html", "") + soup.find("img").get("src")

category = soup.find(
    "a", href=re.compile("../category/books/")).get("href").split("/")[3]

rating = soup.find("p", class_=re.compile("star-rating")).get("class")[1]
rating = rating.lower()
rating_number = str((w2n.word_to_num(rating))) + "/5"

upc = soup.find("td").text

product_page_url = main_url

scraped_data = pd.DataFrame(
    {
        'title': name,
        'product_description': description,
        'price_excluding_tax': price_without_tax,
        'price_including_tax': price_with_tax,
        'number_available': nb_in_stock,
        "image_url": img_url,
        'category': category,
        'review_rating': rating_number,

예제 #13

0

파일 보기

        #Step 6 - DCT (Mel Frequency Cepstrum Coefficient)
        u1 = 0
        u2 = 40
        mfcc = dct(x_filtered, type=2, axis=1, norm='ortho')[:,
                                                             u1:u2]  #[:,u2:u1]
        if (np.shape(mfcc)[1] != u2):
            mfcc = np.hstack(
                [mfcc, np.zeros((num_filters, u1 - np.shape(mfcc)[1]))])
        if (np.shape(mfcc)[0] > 100):
            mfcc = mfcc[:100]
        elif (np.shape(mfcc)[0] < 100):
            mfcc = np.vstack(
                [mfcc, np.zeros((100 - len(mfcc), np.shape(mfcc)[1]))])
        mfccfeatures.append(mfcc)
        labels.append(word_to_num(fol))
#         plt.show()
pickle.dump(mfccfeatures, open('1mfccfeatures.pickle', 'wb'))
pickle.dump(labels, open('1mfccfeatureslabels.pickle', 'wb'))

# In[ ]:

#Read the signal
req = 1
mfccfeatures = []
labels = []
folders = os.listdir("./validation")
for fol in folders:
    namefol = "./validation/" + fol + "/*.wav"
    audiofiles = glob.glob(namefol)
    audiofiles = sorted(audiofiles)

예제 #14

0

파일 보기

f.close()

#parse each page
for image in images:
    race_raw = pytesseract.image_to_string(image)
    #print(race_raw)
    race_ln = race_raw.split('\n')
    i = 0  #line index

    #race data: date, distance, wind speed, wind type, track
    date = parse((race_ln[i].split(' - '))[1]).date()

    while not 'Track Record:' in race_ln[i]:
        i += 1
    dist_str = (' '.join((race_ln[i].split(' '))[0:4]))
    dist = w2n.word_to_num(dist_str)

    while not 'Wind Speed:' in race_ln[i]:
        i += 1
    w_ln = race_ln[i].split(' ')
    ws = int(w_ln[2])
    wt = w_ln[5].upper()

    while not 'Track:' in race_ln[i]:
        i += 1
    trk = (race_ln[i].split(' '))[3].upper()

    #get to first line of horse data
    while not 'Last' in race_ln[i]:
        i += 1
    i += 1

예제 #15

0

파일 보기

This is a dumb calculator that can add and subtract whole numbers from zero to five.
When you run the code, you are prompted to enter two numbers (in the form of English
word instead of number) and the operator sign (also in the form of English word).
The code will perform the calculation and give the result if your input is what it
expects.

The code is very long and messy. Refactor it according to what you have learned about
code simplicity and efficiency.
"""

from word2number import w2n
from num2words import num2words

print('Welcome to this calculator!')
print('It can add and subtract whole numbers from zero to five')
a = w2n.word_to_num(input('Please choose your first number (zero to five): '))
b = input('What do you want to do? plus or minus: ')
c = w2n.word_to_num(input('Please choose your second number (zero to five): '))

num = [a,b,c]

if num[1] == "plus":
    answer = num[0] + num[2]
else num[1] == "minus":
    answer = num[0] - num[2]

word_answer = num2words(answer)

# if a == 'zero' and b == 'plus'  and c == 'zero':
#     print("zero plus zero equals zero")
# if a == 'zero' and b == 'plus'  and c == 'one':

예제 #16

0

파일 보기

파일: parsetime.py 프로젝트: rhnvrm/temporal-phrases

def parse_sentence(input):
    #tx = timex.tag(input)
    pos_tagged = nltk.pos_tag(word_tokenize(input))
    #print(pos_tagged)
    

    sentence_type = 1
    important_words = []
    timevalue = 0

    for pos in pos_tagged:
        val = pos[0].lower()
        key = pos[1]        

        time = Time()

        if(key == 'CD'):
            if(val.isdigit()):
                timevalue = int(val)
            else:
                timevalue = w2n.word_to_num(val)

        if(key == 'NN' or key == 'NNS'):
            if(val[:4] == 'year' or val[:5] == 'month' or val[:4] == 'week'):
                important_words += [val]
                sentence_type = 2
            if(val == 'minutes'):
                important_words += [val]

        if(key == 'JJ'):
            if(val == 'next' or val == 'last'):
                important_words += [val]
                sentence_type = 2

        if(key == 'IN'):
            if(val == 'before' or val == 'after'):
                important_words += [val]
                sentence_type = 3

        if(val in ['hour', 'hours', 'minutes', 'minute', 'morning', 'evening']):
            important_words += [val]

    #print(important_words)


    if(sentence_type == 3):
        #TODO: check if increment is required
        time.inc_date()
        #print(timevalue)
        time.set_hours(timevalue)
        time.set_min(0)
        if('morning' in important_words):
            if(time.get_ihours() > 12): time.update_hours(-12)
        elif('evening' in important_words):
            if(time.get_ihours() < 12): time.update_hours(12)
        print(types_of_sentence[sentence_type], time.get_human())

    if(sentence_type == 2):
        delta = 0
        if('next' in important_words): delta = 1
        elif('last' in important_words): delta = -1

        print(types_of_sentence[sentence_type], time.get_year(), time.get_iyear() + delta*timevalue)


    if(sentence_type == 1):
        if('hour' in important_words or 'hours' in important_words):
            time.update_hours(timevalue)
        elif('minute' in important_words or 'minutes' in important_words):
            time.update_min(timevalue)

        if('morning' in important_words):
            if(time.get_ihours() > 12): time.update_hours(-12)
        elif('evening' in important_words):
            if(time.get_ihours() < 12): time.update_hours(12)
        print(types_of_sentence[sentence_type], time.get_human())

예제 #17

0

파일 보기

with sr.Microphone() as source:
    while "addyu" not in transcript:
        audio = r.listen(source)
        print(transcript)
        try:
            transcript = r.recognize_google(audio)
            testy = transcript.split()
            if (testy[0] == 'add') and len(testy) >= 5:
                i = 4
                food = ''
                while (i < len(testy)):
                    food += testy[i]
                    i = i + 1
                print("food: " + food)
                print("quantity: " + testy[1] + " " + testy[2])
                quan = w2n.word_to_num(testy[1])
                quan = str(quan)
                print("Quantity: " + quan)
                print("'" + food + "'" + " " + quan + " " + testy[2])
                muterun_js(
                    'node/index2.js',
                    "'" + food.lower() + "'" + " " + quan + " " + testy[2])
                print("updated")
            if testy[0] == 'remove' and len(testy) >= 5:
                i = 4
                food = ''
                while (i < len(testy)):
                    food += testy[i]
                    i = i + 1
                print("food: " + food)
                print("quantity: " + testy[1] + " " + testy[2])

예제 #18

0

파일 보기

def findNumbersInWords(words):
    ind = 0
    numbers = []
    for i in range(len(words)):
        word = words[i]
        s = word
        j = i

        prevNum = None

        couldBeNum = True
        num = None

        for letter in word:
            if letter not in "0123456789.()/*+-":
                couldBeNum = False
                break

        if couldBeNum:
            try:
                num = eval(word)
                num = float(num)
            except:
                num = None

        if num != None:
            if num not in numbers:
                words[i] = "a" + str(ind)
                numbers.append(num)
                ind += 1
            else:
                tempInd = numbers.index(num)
                words[i] = "a" + str(tempInd)
        else:
            try:
                num = w2n.word_to_num(s)
            except:
                num = None

            while (num != prevNum):
                prevNum = num
                j += 1
                try:
                    num = None
                    if words[j] == "point":
                        s += " " + words[j] + " " + words[j + 1]
                        tempNum = w2n.word_to_num(s)
                        if tempNum != prevNum:
                            num = tempNum
                            j += 1
                    if num == None:
                        s += " " + words[j]
                        num = w2n.word_to_num(s)
                except:
                    num = prevNum

            if num != None:
                if num not in numbers:
                    words[i] = "a" + str(ind)
                    numbers.append(num)
                    ind += 1
                else:
                    tempInd = numbers.index(num)
                    words[i] = "a" + str(tempInd)
                for k in range(i + 1, j):
                    words[k] = ""

    return numbers

예제 #19

0

파일 보기

def __applyner(sequence):
    #########################################################################################
    # This method applies NER and returns the sequence according to the operation performed based on NER tag.
    #########################################################################################
    pickfile = open(
        '/home/madhvi/IRE/MajorProject/Representations-in-Financial-Domain/tickermapping.pickle',
        'rb')
    tickermapping = pickle.load(pickfile)
    ner_tags = []
    doc = nlp(sequence)  # applying NER
    for X in doc.ents:
        # If the NER class is ORG
        if X.label_ == 'ORG':
            "X.text can take microsoft corp or abcd name MSFT"
            text = X.text
            if text in tickermapping.keys():
                text = tickermapping[X.text]
            text = re.sub(r'[^\w\s]', '', X.text).lower()
            if 'inc' in text:
                text = text.replace('inc', '')
            if 'ltd' in text:
                text = text.replace('ltd', '')
            if 'llp' in text:
                text = text.replace('llp', '')
            if 'limited' in text:
                text = text.replace('limited', '')
            if 'corp' in text:
                text = text.replace('corp', '')
            if 'the' in text.lower():
                text = text.replace('the', '')
            sequence = sequence.replace(X.text, text)
            ner_tags.extend(text.lower().split(" "))
        # If NER class is MONEY
        if X.label_ == 'MONEY':
            new_X = X.text.lower()
            if 'approximately' in new_X:  # Remove all the words which might appear in NER money class
                new_X = new_X.replace('approximately', '')
            if 'per' in new_X:
                new_X = new_X.replace('per', '')
            if 'to' in new_X:
                new_X = new_X.replace('to', '')
            if 'and' in new_X:
                new_X = new_X.replace('and', '')
            if 'between' in new_X:
                new_X = new_X.replace('between', '')
            if 'phone' in new_X:
                continue
            # Apply NER for the string which is obtained after removing other words this gives $200, $500 as separate ones
            if '$' not in new_X:
                new_X = "$" + new_X
            doc1 = nlp(new_X)
            for Y in doc1.ents:
                money = Y.text
                if ' ' not in money:
                    act_money = money.replace(',', '')  # Actual Money
                    #act_money = act_money.replace('.','')
                    sequence = sequence.replace(
                        Y.text, act_money
                    )  # Replace original money text with actual money
                    ner_tags.append(act_money)
                    # print(act_money)
                else:
                    money = Y.text[Y.text.find("$") + 1:]
                    k = money.find(' ')
                    try:
                        act_money = float(money[:k].replace(',', ''))
                        #act_money = act_money.replace('.','')
                        money_conv = w2n.word_to_num(
                            money[k:]
                        )  # Conversion of word types million to *1e6
                        sequence = sequence.replace(
                            Y.text, "$ " + str(act_money * money_conv)
                        )  # Replace original money text with actual money
                        #print("Converted from", money, act_money * money_conv)
                    except:
                        continue  # if any exception dont modify the original sentence and continue
        # If NER class is LAW
        if X.label_ == 'LAW':
            new_X = X.text
            new_X = re.sub(r'[\d.!?\-"]', '', new_X)
            if 'the' in new_X.lower():
                new_X = new_X.lower().replace('the', '')
            if 'of' in new_X.lower():
                new_X = new_X.lower().replace('of', '')
            if 'section' in new_X.lower():
                new_X = new_X.lower().replace('section', '')
            sequence = sequence.replace(X.text, new_X)
            ner_tags.extend(new_X.split(" "))
        # If NER class is Location
        if X.label_ == 'GPE':
            new_X = X.text.lower()
            new_X = re.sub(r'[\d.!?\-"]', '', new_X)
            if 'the' in new_X.lower():
                new_X = new_X.lower().replace('the', '')
            if '.' in new_X.lower():
                new_X = new_X.lower().replace('.', '')
            sequence = sequence.replace(X.text, new_X)
            ner_tags.extend(new_X.split(" "))
        # If NER class is Person
        if X.label_ == 'PERSON':
            new_X = X.text.lower()
            new_X = re.sub(r'[\d.!?\-"]', '', new_X)
            if 'the' in new_X.lower():
                new_X = new_X.lower().replace('the', '')
            if '.' in new_X.lower():
                new_X = new_X.lower().replace('.', '')
            sequence = sequence.replace(X.text, new_X)
            ner_tags.extend(new_X.split(" "))
        if X.label_ == 'CARDINAL':
            number = X.text
            number = number.replace(',', '')
            #number = number.replace('.','')
            if number.isnumeric():
                sequence = sequence.replace(X.text, number)
        if X.label_ == 'QUANTITY':
            quantity = X.text.split(" ")
            for number in quantity:
                number = number.replace(',', '')
                number = number.replace('.', '')
                if number.isnumeric():
                    sequence = sequence.replace(X.text, number)
        if X.label_ == "PERCENT":
            percent = X.text.replace('%', '')
            ner_tags.append(percent)
    return sequence, ner_tags

예제 #20

0

파일 보기

파일: pre_processing_misc.py 프로젝트: Sreeja-coder/Machine-Learning-

        age = age.replace("old", "")
        age = age.strip()
        if age.find(" ") >= 0:
            temp = age.split(" ")
            # print(temp)
            age = '-'.join(temp)
        syns = wordnet.synsets(age.strip())
        # print("A",age)
        # print("S",syns[0].lemmas()[0].name())
        age = syns[0].lemmas()[0].name()
        if age.find("-") >= 0:
            temp = age.split("-")
            # print(temp)
            age = ' '.join(temp)
        # print(age.strip())
        defendants_age.append(w2n.word_to_num(age.strip()))
    else:
        defendants_age.append(int(0))
# print("**")
# mean =  int(sum(defendants_age)/sum(1 for x in defendants_age if x > 0))
# print("mean",mean)
# print(defendants_age)
# defendants_age = [mean for age in defendants_age if age == 0]
#subsitute not known with mean value
# for i in range(len(defendants_age)):
#     if defendants_age[i] == 0:
#         defendants_age[i] = mean

# print("******")
# print(len(defendants_age))
# # print(len(labels))

예제 #21

0

파일 보기

from word2number import w2n
extended_tech = [
    'GOOGLE', 'ZOOM', 'INSTAGRAM', 'FACEBOOK', 'WAHATSAPP', 'ALPHABET',
    'AMAZON', 'NOKIA', 'HTC', 'APPLE', 'TENCENT', 'ZOOM', 'MICROSOFT',
    'ORACLE', 'COMPASS', ' RAZER'
]
immediacy_indicators = [
    'NOW',
    'TODAY',
    'PRESENTLY',
    'CURRENTLY',
]
every = ['EVERY', 'PER', 'ALL', 'ANY', 'TOTAL', 'ONE BY ONE']
interest_syn = [
    'INTEREST', 'LIKE', 'ENGAGE', 'EXCITE', 'LIKE', 'ENJOY', 'LOVE', 'PREFER',
    'WANT', 'APPRECIATE', 'NEED', 'WISH', 'EAGER'
]

numbers = ['ONE', 'TWO']
print(w2n.word_to_num('twenty three'))

예제 #22

0

파일 보기

파일: app.py 프로젝트: rakaar/Narmada-server

def parseResources():
    global_resource_list = {}
    # print(request.body)
    resource, line = {}, ''
    print(flask.request.json)
    print(unquote(flask.request.query_string.decode('utf-8')))
    if flask.request and flask.request.json and 'text' in flask.request.json:
        line = flask.request.json['text']
    else:
        line = json.loads(unquote(
            flask.request.query_string.decode('utf-8')))['text']

    print('Received for parsing: ', line)
    contacts = get_contact(line)
    t2 = location.tweet_preprocess2(line, [])
    sources, b, locations, modified_array, rWords, final_resource_dict = create_resource_list(
        line)
    # source_list,final_resource_keys,loc_list	,dup_final_resource_keys => post_process

    ## source_list, final_resource_keys, loc_list_2, modified_array?, dup_final_resource_keys, final_resource_dict?
    # resource['x']=((line,a,b,c,modified_array,d, final_resource_dict))

    resource['Contact'] = {
        'Phone number': list(contacts[0]),
        "Email": list(contacts[1])
    }
    resource['Sources'] = sources
    resource['ResourceWords'] = rWords
    resource['Locations'], resource['Resources'] = dict(), {}
    # resource['Locations'] = locations
    for each in locations:
        # print(each[0], "<>", each[1])
        resource['Locations'][each[0]] = {
            "long": float(each[1][1]),
            "lat": float(each[1][0])
        }
    # f is Resources type
    resources_bucket = {}

    for each_resource in final_resource_dict:
        buckets = final_resource_dict[each_resource]
        assigned = False
        for bucket in buckets:
            if bucket in bucket_classes and not assigned:
                if bucket not in resource['Resources']:
                    resource['Resources'][bucket] = {}
                resource['Resources'][bucket][each_resource] = 'None'
                assigned = True
                resources_bucket[each_resource] = bucket

    split_text = line.split()
    class_list = {}

    for rWord in rWords:
        s = {}
        prev_words = [
            split_text[i - 1] for i in range(0, len(split_text))
            if rWord.startswith(split_text[i])
        ]
        qt = 'None'

        try:
            for word in prev_words:
                word = word.replace(',', '')
                if word.isnumeric() == True:
                    qt = str(word)
                    break
                else:
                    try:
                        qt = str(w2n.word_to_num(word))
                        break
                    except Exception as e:
                        continue

            if qt == 'None':
                elems = rWord.strip().split()
                word = elems[0]
                rWord2 = " ".join(elems[1:])

                word = word.replace(',', '')
                if word.isnumeric() == True:
                    qt = str(word)
                else:
                    try:
                        qt = str(w2n.word_to_num(word))
                    except Exception as e:
                        pass

            if qt != 'None' and qt in rWord:
                print(rWord, qt)
                continue

        except Exception as e:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            print(exc_type, fname, exc_tb.tb_lineno)
            qt = 'None'

        # class_list[rWord]= qt
        resource['Resources'][resources_bucket[rWord]][rWord] = qt

    # print(class_list)
    ## Need to add quantity
    ## Ritam yaha dekh

    # print('=>', resource['contact'], '\na=>', a, '\nb=>', b, '\nc=>', c, '\nm=>', modified_array, '\nd=>', d, '\nf=>', final_resource_dict)
    # print(final_resource_dict)
    print('Returning', resource)
    return flask.jsonify(resource)

예제 #23

0

파일 보기

파일: ActionCrossword.py 프로젝트: oguzhankarahan/hermod

    async def run(self, dispatcher: CollectingDispatcher, tracker: Tracker,
                  domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:
        try:
            site = tracker.current_state().get('sender_id')
            # dispatcher.utter_message(text="crossword")
            slots = tracker.current_state().get('slots')

            slotsets = []
            if slots.get('crossword') and len(slots.get('crossword')) > 0:
                crossword = await get_crossword(slots.get('crossword'))
                if crossword:
                    crossword_position = self.extract_entities(
                        tracker, ['crossword_position'])
                    word = self.extract_entities(tracker, [
                        'word',
                        'thing',
                        'person',
                        'place',
                    ])
                    if crossword_position:
                        just_number = None
                        clean_number = crossword_position.replace(
                            'across', '').replace('down', '')
                        parts = clean_number.split(' ')
                        clean_number = parts[0]
                        # integer from text
                        if clean_number.isdigit() > 0 and int(
                                clean_number) > 0:
                            just_number = clean_number.strip()
                        # convert number from text
                        else:
                            try:
                                just_number = w2n.word_to_num(clean_number)
                            except:
                                pass
                        # print(just_number)
                        direction = None
                        if "across" in crossword_position:
                            direction = "across"
                        elif "down" in crossword_position:
                            direction = "down"

                        if just_number:
                            if direction:
                                if word:
                                    # print(crossword.get('data',{}))
                                    answer = crossword.get('data', {}).get(
                                        direction,
                                        {}).get(str(just_number)).get(
                                            'answer',
                                            '').lower().strip().replace(
                                                ' ', '')
                                    # print([word,answer])
                                    if word.lower().strip().replace(
                                            ' ', '') == answer:
                                        dispatcher.utter_message(
                                            text="Correct")
                                        # print(['  CROSSWORD   ',answer,crossword_position,just_number,word])
                                        await publish(
                                            'hermod/' + site +
                                            '/crossword/fill', {
                                                'direction':
                                                direction,
                                                "word":
                                                word.strip().replace(' ', ''),
                                                "number":
                                                just_number
                                            })
                                        slotsets.append(
                                            SlotSet("hermod_force_continue",
                                                    None))
                                        slotsets.append(
                                            SlotSet("hermod_force_end",
                                                    "true"))

                                    else:
                                        dispatcher.utter_message(
                                            text="Nope, try again")
                                        slotsets.append(
                                            SlotSet("hermod_force_continue",
                                                    None))
                                        slotsets.append(
                                            SlotSet("hermod_force_end",
                                                    "true"))
                                        await publish(
                                            'hermod/' + site + '/display/show',
                                            {
                                                'question':
                                                just_number + ' ' + direction +
                                                ' is '
                                            })
                                else:
                                    dispatcher.utter_message(
                                        text=
                                        "I didn't hear the word you wanted to fill"
                                    )
                                    slotsets.append(
                                        SlotSet("hermod_force_continue", None))
                                    slotsets.append(
                                        SlotSet("hermod_force_end", "true"))
                                    await publish(
                                        'hermod/' + site + '/display/show', {
                                            'question':
                                            just_number + ' ' + direction +
                                            ' is '
                                        })
                            else:
                                dispatcher.utter_message(
                                    text=
                                    "I didn't hear which direction you wanted to fill"
                                )
                                slotsets.append(
                                    SlotSet("hermod_force_continue", None))
                                slotsets.append(
                                    SlotSet("hermod_force_end", "true"))
                                await publish(
                                    'hermod/' + site + '/display/show',
                                    {'question': crossword_position})
                        else:
                            dispatcher.utter_message(
                                text=
                                "I didn't hear the number you wanted to fill")
                            slotsets.append(
                                SlotSet("hermod_force_continue", None))
                            slotsets.append(SlotSet("hermod_force_end",
                                                    "true"))
                            await publish('hermod/' + site + '/display/show',
                                          {'question': crossword_position})
                    else:
                        dispatcher.utter_message(
                            text="I didn't hear the position you wanted to fill"
                        )
                        slotsets.append(SlotSet("hermod_force_continue", None))
                        slotsets.append(SlotSet("hermod_force_end", "true"))
                        await publish('hermod/' + site + '/display/show',
                                      {'question': crossword_position})

            #hermod/+/crossword/fill
        except Exception as e:
            print('ERROR')
            print(e)
        return slotsets

예제 #24

0

파일 보기

파일: spoken2written.py 프로젝트: amansinghal123/spoken2written

def QuantityMoneyTranslator(para):
    nlp = en_core_web_sm.load()
    #nlp1= spacy.load('en')

    #text='European authorities fined Google a record sixty five million dollars on Wednesday for abusing its power in the mobile phone market and ordered the company to alter its practices. Furthermore, My weight was thirty five kilograms in 2018. A chocolate costs six dollars.'
    #text1='C M of Maharashtra spent two thousand and fouty two dollars.'
    doc = nlp(para)

    currency = [
        "dollars", "dollar", "euro", "euros", "yens", "yen", "rupee", "rupees",
        "pound", "pounds"
    ]
    quantity = ["pounds", "kilograms", "grams"]

    ls_money = []
    ls_no = []
    ls_quan = []
    for X in doc.ents:
        if X.label_ == 'MONEY':
            ls_money.append(str(X))

    for X in doc:
        if X.ent_type_ == 'CARDINAL':
            ls_no.append(str(X))

    for X in doc.ents:
        if X.label_ == X.label_ == 'QUANTITY':
            ls_quan.append(str(X))


#dollar value conversion
    number = []
    money = []
    quan = []
    for a in ls_money:
        a = a.lower()
        a = a.split()
        b = [word for word in a if word in set(currency)]
        b = ' '.join(b)
        if b == "dollars" or b == "dollar":
            symbol = "$"
        elif b == "euros" or b == "euro":
            symbol = "€"
        elif b == "yens" or b == "yen":
            symbol = "¥"
        elif b == "pound" or b == "pounds":
            symbol = "£"
        else:
            symbol = ""
        a = [word for word in a if word not in set(currency)]
        a = ' '.join(a)
        p = symbol + str(w2n.word_to_num(a))
        money.append(p)

    for a in ls_no:
        number.append(str(w2n.word_to_num(a)))

    for a in ls_quan:
        a = a.lower()
        a = a.split()
        b = [word for word in a if word in set(quantity)]
        b = ' '.join(b)
        if b == "pounds":
            symbol = " lbs"
        elif b == "kilograms":
            symbol = " kg"
        elif b == "grams":
            symbol = " gm"
        else:
            symbol = ""
        a = [word for word in a if word not in set(quantity)]
        a = ' '.join(a)
        quan.append(str(str(w2n.word_to_num(a)) + symbol))

    j = 0
    final_str_spacyv1 = []
    for Y in doc:
        if Y.ent_iob_ == 'B' and Y.ent_type_ == 'QUANTITY':
            final_str_spacyv1.append(str(quan[j]))
            j = j + 1
        elif Y.ent_iob_ == 'I' and Y.ent_type_ == 'QUANTITY':
            final_str_spacyv1 = final_str_spacyv1
        else:
            final_str_spacyv1.append(str(Y))

    ans = ' '.join(final_str_spacyv1)

    doc = ans
    doc = nlp(doc)

    k = 0
    final_str_spacy = []
    for Y in doc:
        if Y.ent_iob_ == 'B' and Y.ent_type_ == 'MONEY':
            final_str_spacy.append(str(money[k]))
            k = k + 1
        elif Y.ent_iob_ == 'I' and Y.ent_type_ == 'MONEY':
            final_str_spacy = final_str_spacy
        else:
            final_str_spacy.append(str(Y))

    ans = ' '.join(final_str_spacy)
    return ans

예제 #25

0

파일 보기

def run_alexa():
    command = take_command()

    if 'multiply' in command:
        numbers = map(int, re.findall(r'[0-9]+', command))
        talk("The answer is " + str(calculate(list(numbers), 3)))
    elif any(i in command for i in ['add', 'sum', 'plus', '+']):
        numbers = map(int, re.findall(r'[0-9]+', command))
        talk("The answer is " + str(calculate(list(numbers), 1)))
    elif any(i in command for i in ['minus', 'sub', 'subtract', '-']):
        numbers = map(int, re.findall(r'[0-9]+', command))
        talk("The answer is " + str(calculate(list(numbers), 2)))
    elif any(i in command for i in ['divide', 'div']):
        numbers = map(int, re.findall(r'[0-9]+', command))
        talk("The answer is " + str(calculate(list(numbers), 4)))
    elif any(i in command for i in ['modulus', 'mod']):
        numbers = map(int, re.findall(r'[0-9]+', command))
        talk("The answer is " + str(calculate(list(numbers), 5)))
    elif any(i in command for i in ['power', 'pow']):
        numbers = map(int, re.findall(r'[0-9]+', command))
        talk("The answer is " + str(calculate(list(numbers), 6)))
    elif 'game' in command:
        talk('Ok! I will guess a number between 1 to 10, Just find it.')
        r = random.randint(1, 10)
        num = take_command()
        try:
            n = w2n.word_to_num(num)
            n = int(n)
            if n == r:
                talk("Hurray! You won the game")
            else:
                talk("I won! Thanks for playing!")
        except:
            talk("Only numbers allowed! Thanks for playing!")
    elif 'play' in command:
        song = command.replace('play', '')
        talk('playing ' + song)
        pywhatkit.playonyt(song)
    elif 'time' in command:
        time1 = datetime.datetime.now().strftime('%I:%M %p')
        talk('Current time is ' + time1)
    elif 'date' in command:
        date = datetime.datetime.now().strftime('%d:%B:%Y')
        talk('Today is ' + date)
    elif any(i in command for i in ['search', 'find', 'who', 'get']):
        data = wikipedia.summary(command, 3)
        talk(data)
    elif 'joke' in command:
        talk(pyjokes.get_joke())
    elif 'send' in command:
        number = re.findall(r'[0-9 ]+', command)
        num = [i.replace(' ', '') for i in number if len(i) > 1][0]
        if len(num) != 10:
            talk("Please provide a valid number !")
            return
        datet = datetime.datetime.now()
        pywhatkit.sendwhatmsg('+91' + num, 'Hii', int(datet.strftime('%H')),
                              int(datet.strftime('%M')) + 1)
    elif 'cancel shut' in command:
        pywhatkit.cancelShutdown()
        talk('System Shutdown Cancelled!')
    elif 'shutdown' in command:
        pywhatkit.shutdown(100)
        talk('System is going to shutdown!')
    elif bool(re.search(r'\.[a-zA-Z0-9]{2,3}', command)):
        url = command.replace('open', '').strip()
        print(url)
        webbrowser.open_new_tab(url if 'http' in url else 'https://' + url)
        talk('Opening ' + url + 'in chrome ')
    elif 'open' in command:
        app = command.replace('open', '').strip()
        app = ''.join(app.split())
        if 'computer' in app:
            subprocess.Popen(r'explorer /select,"C:\"' + app, shell=True)
            talk('Opening ' + app)
            return
        elif any(i in app for i in ['whatsapp', 'msteams', 'spotify']):
            subprocess.Popen(r'start ' + app + ':', shell=True)
            talk("Opening  " + app)
            return
        elif 'camera' in app:
            cam = cv2.VideoCapture(0)
            talk('Opening Camera! To capture image press spacebar once!')
            while cam.isOpened():
                ret, frame = cam.read()
                cv2.imshow('Camera', frame)
                k = cv2.waitKey(50)
                if k == 32:
                    r = random.randint(10, 10000)
                    cv2.imwrite(f'captured{r}.png', frame)
                    talk(
                        f'Image captured and saved as captured{r} into current directory!'
                    )
                if cv2.getWindowProperty('Camera', cv2.WND_PROP_VISIBLE) < 1:
                    break
            cam.release()
            cv2.destroyAllWindows()
            return
        elif 'python' in app:
            n = os.startfile('python.exe')
            if n:
                talk('Sorry! I cant Open ' + app)
                return
            talk('Opening ' + app)
            return
        elif 'chrome' in command:
            webbrowser.open_new_tab('https://google.com')
            talk("Opening  " + app)
            return
        try:
            # n = subprocess.Popen(app, stderr=subprocess.PIPE)
            n = subprocess.Popen(f'explorer {app}')
            print(os.path.realpath(app))
            talk("Opening  " + app)
        except:
            talk("Sorry I can't open " + app)
    elif 'close' in command:
        app = command.replace('close', '').strip()
        app = ''.join(app.split())
        # print(app)
        flag = 0
        for process in (process for process in psutil.process_iter()
                        if app in process.name().lower()):
            process.kill()
            flag = 1
        if flag:
            talk('Closing ' + app)

    elif any(i in command for i in ['count', 'startcounter', 'starttimer']):
        num = re.findall(r'[0-9]+', command)
        num = sorted(map(int, num))
        if num:
            for i in range(num[0], num[1] + 1):
                talk(i)
                time.sleep(1)
        else:
            for i in range(1, 11):
                talk(i)
                time.sleep(1)
    elif any(i in command for i in ['goodnight', 'sweetdreams', 'night']):
        talk('Good night! Sweet dreams and takecare!')
        return 1
    elif any(i in command for i in ['goodmorning', 'morning']):
        talk('Morning! It’s good to see you!')
    elif bool(re.match(r'your.*?name', command)):
        talk('I am Nandyalexa, how may I help you?')
    elif 'single' in command:
        talk('I am already in relationship with nandy!')
    elif 'weather' in command:
        place = [
            i for i in re.split(r'weather|in ', command) if len(i) > 1 and i
        ][-1]
        data = openweathermap('api.openweathermap.org/data/2.5/weather?',
                              place=place)
        # print(data)
        string = f'Current weather status in {place} is {data[0]}, Temperature in {place} is {data[1]}, Pressure in {place} is {data[2]}, and Wind Speed in {place} is {data[3]}'
        talk(string)
    elif 'where' in command:
        print(command)
        js = requests.get('https://freegeoip.app/json/').json()
        talk(
            f"Your Country is {js['country_name']}, Your Region is {js['region_name']}, Your city is {js['city']}, and Your Time zone is {js['time_zone']}"
        )

    elif 'screen' in command:
        global degree
        screen = rotatescreen.get_primary_display()
        if any(i in command for i in ['default', '0', 'stop']):
            degree = 0
            screen.rotate_to(degree)
            talk('Screen set to normal!')
            return
        screen.rotate_to(degree % 360)
        talk('Screen rotated to ' + str(degree))
        degree += 90
    elif 'translate' in command:
        translator = Translator()
        talk('What is the source language which you are gonna speak?')
        from_lang = get_languagecode(take_command().strip())
        print(from_lang)
        talk('What is the destination language which needs to be translated?')
        to_lang = get_languagecode(take_command().strip())
        talk("what message to be translated?")
        get_message = take_command()
        text_to_translate = translator.translate(get_message,
                                                 src=from_lang,
                                                 dest=to_lang)
        text = text_to_translate.text
        speak = gTTS(text=text, lang=to_lang, slow=False)
        speak.save("captured_voice.mp3")
        music = pyglet.media.load("captured_voice.mp3", streaming=False)
        music.play()
        time.sleep(music.duration)  # prevent from killing
        os.remove("captured_voice.mp3")  # remove temperory file
    elif 'sleep' in command.strip():
        talk("Ok sir! I won't disturb you for a minute")
        time.sleep(60)
        talk()
    else:
        talk('Please say the command again ! ')

예제 #26

0

파일 보기

def interpret_location(interpreter, speaker, d, ignore_reldir=False) -> XYZ:
    """Location dict -> coordinates
    Side effect:  adds mems to agent_memory.recent_entities
    if a reference object is interpreted;
    and loc to memory
    """
    location_type = d.get("location_type", "SPEAKER_LOOK")
    if location_type == "SPEAKER_LOOK":
        player = interpreter.memory.get_player_struct_by_name(speaker)
        loc = capped_line_of_sight(interpreter.agent, player)

    elif location_type == "SPEAKER_POS":
        loc = pos_to_np(
            interpreter.memory.get_player_struct_by_name(speaker).pos)

    elif location_type == "AGENT_POS":
        loc = pos_to_np(interpreter.agent.get_player().pos)

    elif location_type == "COORDINATES":
        loc = cast(
            XYZ,
            tuple(
                int(float(w))
                for w in re.findall("[-0-9.]+", d["coordinates"])))
        if len(loc) != 3:
            logging.error("Bad coordinates: {}".format(d["coordinates"]))
            raise ErrorWithResponse(
                "I don't understand what location you're referring to")
    else:
        loc, mems = maybe_get_location_memory(interpreter, speaker, d)
        if loc is None:
            raise ValueError(
                "Can't handle Location type: {}".format(location_type))

    # handle relative direction
    reldir = d.get("relative_direction")
    if reldir is not None and not ignore_reldir:
        if reldir == "BETWEEN":
            pass  # loc already handled when getting mems above
        if reldir == "INSIDE":
            if location_type == "REFERENCE_OBJECT":
                mem = mems[0]
                locs = perception.find_inside(mem)
                if len(locs) == 0:
                    raise ErrorWithResponse(
                        "I don't know how to go inside there")
                else:
                    loc = locs[0]
        elif reldir == "AWAY":
            apos = pos_to_np(interpreter.agent.get_player().pos)
            dir_vec = (apos - loc) / np.linalg.norm(apos - loc)
            num_steps = word_to_num(d.get("steps", "5"))
            loc = num_steps * np.array(dir_vec) + to_block_center(loc)
        elif reldir == "NEAR":
            pass
        else:  # LEFT, RIGHT, etc...
            reldir_vec = rotation.DIRECTIONS[reldir]
            look = interpreter.memory.get_player_struct_by_name(speaker).look
            # this should be an inverse transform so we set inverted=True
            dir_vec = rotation.transform(reldir_vec,
                                         look.yaw,
                                         0,
                                         inverted=True)
            num_steps = word_to_num(d.get("steps", "5"))
            loc = num_steps * np.array(dir_vec) + to_block_center(loc)

    # if steps without relative direction
    elif "steps" in d:
        num_steps = word_to_num(d.get("steps", "5"))
        loc = to_block_center(loc) + [0, 0, num_steps]
    return to_block_pos(loc)

예제 #27

0

파일 보기

    def to_html(self):
        t = self.H.table(border='1', id="carddata")
        r = t.tr
        with open(config.CARDS_AND_DIMENSIONS_CSV) as csvfile:
            reader = csv.DictReader(csvfile)
            for column in reader.fieldnames:
                r.td(column)
            for row in reader:
                t.tr(id=str(reader.line_num))
                for col in row.items():

                    try:
                        w2n.word_to_num(col[0])

                        t.td(
                            "<img width='100' src='https://deckofcardsapi.com/static/img/"
                            + col[1] + ".png'></img>",
                            escape=False,
                            id=col[0] + str(reader.line_num),
                            klass=col[0])
                    except:
                        t.td(col[1],
                             klass=col[0],
                             id=col[0] + str(reader.line_num))

        html_human_play_before = """
        <html>
        
        <head>
        </head>
        
        <body onload="init()">
        
        <style>
        #carddata {display: none;}
        #play_area {padding-top: 50;
                   padding-bottom: 30;
                   text-decoration: none;
                   text-transform: uppercase;}
        .dimension_name {color: transparent;
                         text-align: center;}
        .dimension_value {color: transparent;
                          text-align: center;}
        .rule_name {color: transparent;
                    text-align: center;}
        .rule_bool {text-transform: uppercase;
                    color: ffffff;
                    text-align: center;}
        body {background-color: 496D89;}
        #play, #reveal, #hide {background-color: #123652; 
                border: none;
                color: white;
                padding-top: 20;
                padding: 45px 85px;
                text-align: center;
                text-decoration: none;
                display: inline-block;
                font-size: 16px;}
        </style>
   
        <script>
        function init() {
        pywebview.api.init()
        </script>       
        

        """

        html_human_play_after = """
        <div id="play_area">
        
        <table id="play_table"></table>
        <p id="rule_display">&nbsp;<p>       
        </div>
        
        
        <button type="Button" id="play" name="Play" onclick="toggleTable()">Draw</button>
        <button type="Button" id="reveal" name="Reveal" onclick="revealRule()">Reveal rule</button>
        <button type="Button" id="hide" name="Hide" onclick="hideRule()">Restart</button>   
 
        
        <script>
        function toggleTable() {
            var i;
            var rows_count= document.getElementById("carddata").rows.length;
            i = Math.floor(Math.random() * rows_count-2) + 2;
            j=i.toString();
            document.getElementById('play_table').innerHTML = document.getElementById(j).innerHTML; 
        }
        </script>
        
        <script>
        function revealRule() {
            var item_count = document.getElementsByClassName('rule_name').length;
            document.getElementsByClassName('rule_bool')[item_count-1].style.color= "transparent";   
            document.getElementById('rule_display').innerHTML =  "True if " +
            document.getElementsByClassName('dimension_name')[item_count-1].innerHTML + "&nbsp;("  +
            document.getElementsByClassName('dimension_value')[item_count-1].innerHTML + ")&nbsp;" +
            document.getElementsByClassName('rule_name')[item_count-1].innerHTML + "." + "&nbsp;" + "This hand is " +
            document.getElementsByClassName('rule_bool')[item_count-1].innerHTML;
            document.getElementById('rule_display').style.color="ffffff";
            document.getElementsByClassName('rule_bool').style.color="green";
            
        }  
        </script>         
        <script>
        function hideRule() {
        document.getElementById('rule_display').style.color="transparent";
        toggleTable();
    
        }
        </script>
        
       
        </body>
        </html>
        
        """

        html_ai_play_before = """
        <script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/1.3.0/jquery.min.js"></script>
        <script type="text/javascript">
        var auto_refresh = setInterval(function () {
                                        $('#play_area').load('#play_area');
                                        }, 1000);

        </script>
        """
        print(
            str(html_human_play_before) + str(t) + str(html_human_play_after))

        return str(html_human_play_before) + str(t) + str(
            html_human_play_after)

예제 #28

0

파일 보기

    def parseData(self, inputString):
        # Default.xlsx : Excel file that contains currency information
        currencyDF = pd.DataFrame({
            'currency': [
                'Dollars', 'Pounds', 'Shillings', 'Cents', 'Pistoles',
                'Guineas', 'Pence', 'Texas Dollars'
            ],
            'symbol': ['$', '£', None, None, None, None, None, '$ texas'],
            'string': [
                'dollar', 'pound', 'shilling', 'cent', 'pistole', 'guineas',
                'pence', 'texas dollar'
            ]
        })
        unit = ""
        amount = ""
        # Get amount and currency from the input string : ex: 20$ or £20
        inputString = inputString.replace(',', '')
        price = Price.fromstring(inputString)
        if (price.amount != None and price.currency != None):
            amount = price.amount
            unit = price.currency

        # Get currency data from string : ex: 20 dollars or twenty dollars
        if (amount == ""):
            try:
                amount = w2n.word_to_num(inputString)
            except:
                try:
                    amount = int(re.search(r'\d+', inputString).group())
                except:
                    amount = ""

        # Converting currency string
        updatedUnit = ""
        # if (amount != "") :
        for index, row in currencyDF.iterrows():
            # Checking symbol columns from the Currency Excel File
            if (row['symbol'] != "" and pd.isnull(row['symbol']) != True):
                symbolCheck = row['symbol'].split(' ')
                symbol = symbolCheck[0]

                if (len(symbolCheck) != 1):
                    if (unit == symbolCheck[0]
                            and symbolCheck[1] in inputString):
                        return self.retDataFrame(amount, row['currency'])
                else:
                    inputString = inputString.replace(row['symbol'],
                                                      row['string'])
                    if (unit == row['symbol']):
                        updatedUnit = row['currency']

            # Checking string columns from the Currency Excel File
            if (self.checkString(inputString.lower(),
                                 row['string'].split(' ')) == True):
                stringCheck = row['string'].split(' ')
                if (len(stringCheck) == 1):
                    updatedUnit = row['currency']
                else:
                    return self.retDataFrame(amount, row['currency'])

        return self.retDataFrame(amount, updatedUnit)

예제 #29

0

파일 보기

    temp = {}

    # title is an attribute of the <a> tag inside <h3> which is inside <article>
    # as there is only 1 <article> we use the " . " to get inside it ,same with h3,a. attrs lists the attributes
    temp["title"] = item.article.h3.a.attrs["title"]

    # price is inside <div> with class "product_price", but since there is more than one <div> we need to use find
    # inside that <div> we  find a <p> with class "price_color" and use text to get the data and splice off the undesired unicode at the front [1:]
    temp["price"] = item.article.find("div", class_="product_price").find(
        "p", class_="price_color").text[1:]

    # stock is inside the <div class="product_price"> within <p> class "instock availability"
    # as we have more than one <div> we use find("div",class_="product_price")
    # we use the .text method to extract the text as a string and use strip() to remove white spaces
    temp["stock"] = item.article.find("div", class_="product_price").find(
        "p", class_="instock availability").text.strip()

    # star rating is a word inside the <article> within a <p> with class "star-rating"
    # we use the word2number module to make words [One,Two,Three...] --> [1,2,3..]
    temp["rating"] = w2n.word_to_num(item.article.p.attrs["class"][1])

    # store the dictionary in the list of books
    books.append(temp)

# All above paths to the data was found by inspecting the source

# Print all the dictionaries on the page with the details
for i in books:
    pprint(i)
    print("-------------------------------------------")

예제 #30

0

파일 보기

result = str(eval(data2))
s.send(result + "\n")
print result

wordnum = False

count = 0

i = 0

while 1:
    data = s.recv(BUFFER_SIZE)
    try:
        newdata = data.split(' ')
        num1 = newdata[0] + " " + newdata[1]
        int_num1 = w2n.word_to_num(num1)
        wordnum = True
    except ValueError:
        wordnum = False
        # do normal things
        pass

    newdata = data.split(" ")

    if (len(newdata[1]) < 2):

        if not data: break
        print(data)
        #s.close()
        data2 = data[0:10:1]
        data2 = data2[:-3]

예제 #31

0

파일 보기

def fix_age(matchobj):
    #print matchobj.group(0)
    return str(w2n.word_to_num(matchobj.group(0))) + " "

예제 #32

0

파일 보기

파일: input_cleaner.py 프로젝트: AlongWY/AMR

def join_time_description(amr):
    # 4 o'clock; 4 am; 4 a.m., etc.
    while True:
        span = None
        if len(amr.tokens) < 2:
            break
        for i in range(1, len(amr.tokens)):
            x, y = amr.tokens[i - 1: i + 1]
            if y.lower() in ("o'clock", 'am', 'a.m.', 'pm', 'p.m') and re.search(r'^\d+[.:]?\d*[.:]?\d*$', x):
                span = list(range(i - 1, i + 1))
                joined_tokens = ''.join([x, y])
                pos = 'CD'
                ner = 'TIME'
                break
            if y.lower() in ("o'clock", 'am', 'a.m.', 'pm', 'p.m') and x.isalpha():
                try:
                    x = w2n.word_to_num(x)
                except:
                    continue
                x = str(x)
                span = list(range(i - 1, i + 1))
                joined_tokens = ''.join([x, y])
                pos = 'CD'
                ner = 'TIME'
                break
            if y == 'Greenwich' and i + 2 < len(amr.tokens) and amr.tokens[i + 1: i + 3] == ['Mean', 'Time']:
                span = list(range(i, i + 3))
                joined_tokens = 'GMT'
                pos = 'NNP'
                ner = 'TIME'
                break
            if y in ('century', 'Century'):
                m = re.search(r'^(\d+)(st|nd|rd|th)?$', x)
                if m and m.group(1) != '':
                    span = list(range(i - 1, i + 1))
                    joined_tokens = ''.join([m.group(1), y.lower()])
                    pos = 'CD'
                    ner = 'TIME'
                    break
                elif x == 'first' and amr.tokens[i - 2] == '-' and amr.tokens[i - 3] == 'twenty':
                    span = list(range(i - 3, i + 1))
                    joined_tokens = '21century'
                    pos = 'CD'
                    ner = 'TIME'
                    break
                elif x.lower() == 'eighth':
                    span = list(range(i - 1, i + 1))
                    joined_tokens = '8century'
                    pos = 'CD'
                    ner = 'TIME'
                    break
                elif x.lower() == 'fifth':
                    span = list(range(i - 1, i + 1))
                    joined_tokens = '5century'
                    pos = 'CD'
                    ner = 'TIME'
                    break
                else:
                    try:
                        x = w2n.word_to_num(x)
                    except:
                        continue
                    span = list(range(i - 1, i + 1))
                    joined_tokens = ''.join([x, y.lower()])
                    pos = 'CD'
                    ner = 'TIME'
                    break
        else:
            break
        amr.replace_span(span, [joined_tokens], [pos], [ner])

예제 #33

0

파일 보기

import pandas as pd
from sklearn import linear_model
from word2number import w2n
import math

data = pd.read_csv('datasets/hiring.csv')
data.experience[0] = 'zero'
data.experience[1] = 'zero'
a=0
for i in data.experience:
    data.experience[a] = w2n.word_to_num(i)
    a=a+1
data.rename(columns={'test_score(out of 10)':'test_score','interview_score(out of 10)':'interview_score','salary($)':'salary'},inplace=True)
median = math.floor(data.test_score.median())
data.test_score.fillna(median,inplace=True)
print(data)
reg = linear_model.LinearRegression()
reg.fit(data[['experience','test_score','interview_score']],data.salary)
print(reg.coef_)
print(reg.intercept_)
print(reg.predict([[2,9,6]]))
print(reg.predict([[12,10,10]]))

예제 #34

0

파일 보기

파일: update_boundary.py 프로젝트: appleternity/FrameForecasting

def update_boundary():
    with open(os.path.join(data_dir, "bookcorpus", "clean_split.json"),
              'r',
              encoding='utf-8') as infile:
        book_info_dict = json.load(infile)

    # check all books
    bookcorpus_dir = os.path.join(data_dir, "bookcorpus", "segment")
    counter = Counter()
    last_counter = Counter()
    book_info = []
    for phase, book_info_list in book_info_dict.items():
        for count, book_info in enumerate(book_info_list):
            book_name = book_info["book"]
            print("\x1b[2K\rRemoving Header in {}, {:>5} / {:>5} [{:.2f}%]".
                  format(phase, count, len(book_info_list),
                         100.0 * count / len(book_info_list)),
                  end="")

            if not os.path.isfile(
                    os.path.join(data_dir, "bookcorpus", "frame", book_name)):
                continue

            with open(os.path.join(bookcorpus_dir, book_name),
                      'r',
                      encoding='utf-8') as infile:
                lines = infile.read().split("\n")

                # find chapter one
                chapter_one = -1
                for i, line in enumerate(lines):
                    if line[:7].lower() == "chapter":
                        tokens = word_tokenize(line)
                        if len(tokens) < 2:
                            continue

                        # ignore table of content
                        if sum([
                                1 for token in tokens
                                if token.lower() == "chapter"
                        ]) > 1:
                            continue

                        # find chapter one
                        try:
                            chapter_num = w2n.word_to_num(tokens[1])
                        except ValueError:
                            chapter_num = None

                        if chapter_num == 1:
                            counter.update([line])
                            chapter_one = i
                            break

                # cannot find chapter 1
                if chapter_one == -1:
                    continue
                # remove the wired cases where chapter 1 come after more than 300 lines
                if chapter_one > 300:
                    continue

                # find last chapter
                chapter_info = []
                for i, line in enumerate(lines):
                    if line[:7].lower() == "chapter":
                        tokens = word_tokenize(line)
                        if len(tokens) < 2:
                            continue

                        # ignore table of content
                        if sum([
                                1 for token in tokens
                                if token.lower() == "chapter"
                        ]) > 1:
                            continue

                        try:
                            chapter_num = w2n.word_to_num(tokens[1])
                        except ValueError:
                            chapter_num = None

                        if chapter_num is not None:
                            chapter_info.append([chapter_num, i, line])

                if chapter_info[-1][0] > 1:
                    start = chapter_one
                    end = chapter_info[-1][1]
                    last_counter.update([chapter_info[-1][2]])
                    book_info["start"] = start
                    book_info["end"] = end

    # save data
    with open(os.path.join(data_dir, "bookcorpus", "clean_split_updated.json"),
              'w',
              encoding='utf-8') as outfile:
        json.dump(book_info_dict, outfile, indent=2)

예제 #35

0

파일 보기

 f = open("num.txt", "r")
 data = f.readlines()
 datanew = list(map(str.rstrip, data))
 f.close()
 #If the phrase"and" is not located in the check
 if "AND" not in amtupper:
     amtupper += " AND ZERO"
 #Divides the dolalr amount and the cent amount
 dollar, cents = amtupper.split(" AND ")
 wordlst = dollar.split(' ')
 #Checks if the words in dollar is in the num.txt dictionary
 try:
     for word in wordlst:
         if word in datanew:
             wordnum += word + ' '
     numamt = w2n.word_to_num(wordnum)
 except ValueError:
     #returns an "Un-Readable statement if none of the words are in the num.txt dictionary"
     amt += "(Un-Readable) "
     amtupper += "(Un-Readable) "
     numamt = 0
 #turns the cents into a decimal if the cents are written as "##/100"
 try:
     num, den = cents.split('/')
     result = (float(num) / float(den))
     numamt += result
 except ValueError:
     #turns the cents into a decimal if the cents are written as words
     try:
         centsplit = cents.split(' ')
         for word in centsplit:

예제 #36

0

파일 보기

파일: untitled0.py 프로젝트: AromalPradeep/KRIC

# Main

while True:

    # User input
    i = input('user : '******'break', 'end', 'quit']:
        break
    else:

        try:
            #case 0 : a Number entered
            try:
                o = (w2n.word_to_num(i))
            except:
                try:
                    #case 1 : Normal calculations (basic calculator)
                    '''Easy way was to use eval, but using eval has great risks as it could be used to potentially crack your system'''
                    o = simple_eval(i)

                except:
                    try:
                        # clean sentence
                        words = [
                            k for k in word_tokenize(i) if k not in stop_words
                        ]
                        o = words
                        for i in words:
                            if i in [

예제 #37

0

파일 보기

from word2number import w2n

### importing csv file
df = pd.read_csv('hiring.csv')
print(df)
### cleaning the dataframe
df['experience'].fillna(0, inplace=True)
median = df['test_score(out of 10)'].median()
df['test_score(out of 10)'].fillna(median, inplace=True)
print(df)

### converting number words to numbers
print()
num_list = []
for i in df['experience'].iloc[2:]:
    i = w2n.word_to_num(i)
    num_list.append(i)
df.loc[2:, ('experience')] = num_list
print(df['experience'].iloc[2:])
print(df)

#### creating linear regression model
lreg = linear_model.LinearRegression()
lreg.fit(
    df[['experience', 'test_score(out of 10)', 'interview_score(out of 10)']],
    df['salary($)'])
print(lreg)

### printing the coefficients

print(lreg.coef_)

예제 #38

0

파일 보기

파일: views.py 프로젝트: PrinceP/Information-Retrieval

def analysis(originalquery,finalquery):

	fromcity = None
	tocity = None
	par3 = (time.strftime("%Y-%m-%d"))
	par4 = None
	time_day = None
	time_relative = None
	time_nu = None
	time_period = None
	time_tom = None



	time_spec_month = None 
	time_spec_date = None
	
	for i in range(0,len(finalquery) ):
		if finalquery[i] == 'B-fromloc.city_name':
			fromcity = originalquery[i]
			try:
				if finalquery[i+1] == 'I-fromloc.city_name':
					fromcity = fromcity + ' ' + originalquery[i+1]
			except IndexError:
				pass


		if finalquery[i] == 'B-toloc.city_name':
			tocity = originalquery[i]

			try:
				if finalquery[i+1] == 'I-toloc.city_name':
					tocity = tocity + ' ' + originalquery[i+1]
			except IndexError:
				pass
			
	#print fromcity,tocity
		if finalquery[i] =='B-depart_date.day_name':
			time_day = originalquery[i]

		if finalquery[i] == 'B-depart_time.time_relative':
			time_relative = originalquery[i]

		if finalquery[i] == 'B-depart_time.time':
			time_nu = originalquery[i]
			try:
				if finalquery[i+1] == 'I-depart_time.time':
					time_nu = time_nu + ' ' + originalquery[i+1]
			except IndexError:
				pass
			try:
				time_nu = time_nu + ' ' + originalquery[i+1]
			except IndexError:
				pass
		if finalquery[i] == 'B-arrive_time.period_of_day':
			time_period = originalquery[i]
			try:
				if finalquery[i+1] == 'I-arrive_time.period_of_day':
					time_period = time_period + ' ' + originalquery[i+1]
			except IndexError:
				pass
			try:
				time_period = time_period + ' ' + originalquery[i+1]
			except IndexError:
				pass


		if finalquery[i] =='B-depart_date.month_name':
			time_spec_month = originalquery[i]
			time_spec_month = month_converter(time_spec_month)

		if finalquery[i] == 'B-depart_date.day_number':
			time_spec_date = originalquery[i]
			time_spec_date = w2n.word_to_num(time_spec_date)

		if finalquery[i] == 'B-depart_date.today_relative':
			time_tom = originalquery[i]






	if time_tom is not None:
		if time_tom == 'tomorrow':
			date_tom = datetime.now()+ relativedelta(days=1)
			par3 = date_tom.strftime('%Y-%m-%d')




	if time_day is not None:		
		x = 0
		if time_day.lower() == 'monday':
			time_day = 0
		elif time_day.lower() =='tuesday':
			time_day = 1
		elif time_day.lower() =='wednesday':
			time_day = 2
		elif time_day.lower() =='thursday':
			time_day = 3
		elif time_day.lower() =='friday':
			time_day = 4
		elif time_day.lower() =='saturday':
			time_day = 5
		elif time_day.lower() =='sunday':
			time_day = 6
		x = int(datetime.today().weekday()) - time_day	
		date_after = datetime.now()+ relativedelta(days=int(x))
		print date_after
		par3 = date_after.strftime('%Y-%m-%d')
	

	if time_spec_month is not None:
		if (str(time_spec_month))==1:
			time_spec_month = '0'+str(time_spec_month)
		par3 = '2016-'+str(time_spec_month)+'-'+str(time_spec_date)
		print par3 

				
	flight_data  =  makeparameters(fromcity,tocity,par3,par4)
	return flight_data,time_period,time_relative,time_nu,tocity