async def resolve(self, path, movie): if movie.title: return movie words = path.split() for i, word in enumerate(words): try: if w2n.word_to_num(word): words[i] = str(w2n.word_to_num(word)) except IndexError: pass path = ' '.join(words) return await super().resolve(path, movie)
def test_output(self): self.assertEqual(w2n.word_to_num("two million three thousand nine hundred and eighty four"),2003984) self.assertEqual(w2n.word_to_num("nineteen"),19) self.assertEqual(w2n.word_to_num('three billion'),3000000000) self.assertEqual(w2n.word_to_num('three million'),3000000) self.assertEqual(w2n.word_to_num('one hundred twenty three million four hundred fifty six thousand seven hundred and eighty nine') ,123456789) self.assertEqual(w2n.word_to_num('eleven'),11) self.assertEqual(w2n.word_to_num('nineteen billion and nineteen'),19000000019) self.assertEqual(w2n.word_to_num('one hundred and forty two'),142) self.assertEqual(w2n.word_to_num('one hundred thirty-five'),135)
def get_integer(str): try: l = int(str) return l except: l = w2n.word_to_num(str) return l
def noms_from_oscars(oscarsstring): """Converts descriptive oscars text to number of nominations as int.""" try: nominations_str = (str(oscarsstring.split(",")[0]).strip().lower()).split(" ") nominations = w2n.word_to_num(nominations_str[2]) return nominations except: return 0
def wins_from_oscars(oscarsstring): """Converts descriptive oscars text to number of wins as int.""" try: wins_str = (str(oscarsstring.split(",")[1]).replace(".", "").strip().lower()).split(" ") wins = w2n.word_to_num(wins_str[1]) return wins except: return 0
def get_query(self, value): qset = Q() for word in value.split(): qset &= Q(name__icontains=word) try: qset |= Q(name__icontains=num2words(word)) except TypeError: word_converted_to_num = word_to_num(word) if type(word_converted_to_num) == int: qset |= Q(name__icontains=word_converted_to_num) return qset
def clean_text(text): try: cleaned_text = contractions.fix(text.strip()).lower() except IndexError: cleaned_text = text.strip().lower() words = [x for x in cleaned_text.split(' ') if x] current_number = [] i = 0 while i < len(words): try: try: float(words[i]) current_number.append(words[i]) words.pop(i) except ValueError: pass if i < len(words): try: word_to_num(words[i]) except IndexError: raise ValueError current_number.append(words[i]) words.pop(i) except ValueError: if current_number: j = 0 prod = 1 while j < len(current_number): try: prod *= float(current_number[j]) current_number.pop(j) except ValueError: j += 1 if current_number: try: num = prod * word_to_num(' '.join(current_number)) except: num = prod else: num = prod words.insert( i, str( int(num) if type(num) == float and num.is_integer() else num)) current_number.clear() i += 1 if len(words[i]) <= 2 or words[i] in get_stop_words('en'): words.pop(i) continue i += 1 else: if current_number: try: words.append(str(word_to_num(' '.join(current_number)))) except ValueError: words.append(' '.join(current_number)) return ''.join(x for x in ' '.join(words) if x.isalnum() or x == ' ')
def tokenize_text(self, remove_accentedChar=True, remove_httpLinks=True, expand_contractionMap=True, handle_emoji=True, convert_word2Number=True, remove_whiteSpace=True, lemmatization=True, lowercase=True, remove_punctuations=True, remove_number=True, remove_specialChars=True, expand_slang=True, remove_stopWords=True, correct_spelling_byWord=True, correct_spelling=False): # pre cleaning self.text = self.define_preCleaning() # sentence level preprocess if remove_httpLinks: self.text = self.remove_httpLinks() if remove_whiteSpace: self.text = self.remove_whiteSpace() if remove_accentedChar: self.text = self.remove_accentedNotation() if expand_contractionMap: self.text = self.expand_contractionMap() if lowercase: self.text = self.text.lower() if correct_spelling: self.text = self.correct_spelling() if handle_emoji: self.text = self.handle_emotion() if expand_slang: self.text = self.expand_slang() doc = self.nlp(self.text) cleanText = [] errorLog = [] # init a punctDict punctUsageStatsDict = {k: 0 for k in self.notDeletePunctList} # word-level preprocess for token in doc: flag = True editFlag = False toEdit = token.text edit = "" # remove stop words try: if remove_stopWords and token.is_stop and token.pos_ != "NUM": flag = False # remove punctuations if remove_punctuations and token.pos_ == "PUNCT" and ( toEdit not in self.notDeletePunctList) and flag: flag = False # remove special characters if remove_specialChars and token.pos_ in [ "SYM", "ADP", "X", "AUX" ] and flag: flag = False # remove numbers if remove_number and (token.pos_ == "NUM" or toEdit.isnumeric()) and flag: flag = False # convert word to numbers if convert_word2Number and token.pos_ == "NUM" and flag: edit = w2n.word_to_num(toEdit) # lemmatization if lemmatization and token.is_alpha and token.lemma_ != "-PRON-" and flag: edit = token.lemma_ editFlag = True # correct spelling by word if correct_spelling_byWord and token.is_alpha and token.lemma_ != "-PRON-" and flag: edit = self.correct_spelling_oneWord(token.lemma_) editFlag = True # constraint repeatition times of allowed punctuations if token.pos_ == "PUNCT" and toEdit in self.notDeletePunctList and flag: punctUsageStatsDict[toEdit] += 1 if punctUsageStatsDict[ toEdit] > self.maximumPunctRepeatition: flag = False # append valid result to cleanText if toEdit != "" and flag and editFlag: cleanText.append(str(edit)) if toEdit != "" and flag and (not editFlag): cleanText.append(str(toEdit)) except: errorLog.append(" ".join([str(e) for e in doc])) break doc = self.NER_tokenize(" ".join(cleanText)) #re-tokenization if self.returnToken: return ([str(e) for e in doc], errorLog) else: return (" ".join([str(e) for e in doc]), errorLog)
import pandas as pd from word2number import w2n import pickle data = pd.read_csv("HiringSalary.csv") data.fillna(0, inplace=True) data['experience'] = data['experience'].astype(str) data['experience'] = data['experience'].apply(lambda x: w2n.word_to_num(x)) X = data.iloc[:, [0, 1, 2]].values y = data.iloc[:, -1].values from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) from sklearn.linear_model import LinearRegression s_LinearRegression = LinearRegression() s_LinearRegression.fit(X_train, y_train) # Save the model pickle.dump(s_LinearRegression, open('HiringSalaryModel.pkl', 'wb')) # Loading the model to predict the result model = pickle.load(open('HiringSalaryModel.pkl', 'rb')) print(model.predict([[20, 10, 10]]))
def preprocess(): for fil in os.listdir(directory): flist = [] filename = os.fsdecode(fil) file = open("/Users/skosgi/Downloads/10k_1900_org_sample/"+filename,'r') ak = False for f in file: f = f[:-1] if "</Header>" in f: #Read lines only after occurance of </Header> ak = True continue if ak==False: continue doc= nlp(f) #applying NER nermap = {} # Map to hold mapping from NER applied tokens to original text for X in doc.ents: if X.label_=='ORG' or X.label_=='PERSON': # If the NER class is ORG or PERSON text = re.sub(r'[^\w\s]', '', X.text) text = text.replace(" ","") f = f.replace(X.text,text) nermap[text] = X.text if X.label_ == 'MONEY': #If NER class is MONEY new_X = X.text if 'approximately' in new_X.lower(): #Remove all the words which might appear in NER money class new_X = new_X.lower().replace('approximately','') if 'per' in new_X.lower(): new_X = new_X.lower().replace('per','') if 'to' in new_X.lower(): new_X = new_X.lower().replace('to','') if 'and' in new_X.lower(): new_X = new_X.lower().replace('and','') if 'between' in new_X.lower(): new_X = new_X.lower().replace('between','') if 'phone' in new_X.lower(): continue doc1 = nlp(new_X) #Apply NER for the string which is obtained after removing other words this gives $200, $500 as separate ones for Y in doc1.ents: money = Y.text[Y.text.find("$")+1:] if ' ' not in money: act_money = money.replace(',','') #Actual Money f = f.replace(Y.text,act_money) #Replace original money text with actual money #print(act_money) else: k = money.find(' ') try: act_money = float(money[:k].replace(',','')) money_conv = w2n.word_to_num(money[k:]) #Conversion of word types million to *1e6 f = f.replace(Y.text,act_money) #Replace original money text with actual money print("Converted from",money,act_money*money_conv) except: continue # if any exception dont modify the original sentence and continue #To be changed: Start of preprocessing to sentences after applying NER fl = f.split(" ") for fi in fl: if fi in nermap.keys(): flist.append(nermap[fi]) else: if len(fi)>0: flist.append(fi) print(flist) #To be added: Write to a file
def bot_command_rule_ai(bot, trigger): # don't run commands that are disabled in channels if not trigger.is_privmsg: channel_disabled_list = SpiceBot.commands.get_commands_disabled( str(trigger.sender), "fully") if "nickname_ai" in list(channel_disabled_list.keys()): return # don't run commands that are disabled for specific users nick_disabled_list = SpiceBot.commands.get_commands_disabled( str(trigger.nick), "fully") if "nickname_ai" in list(nick_disabled_list.keys()): return # TODO add config limits # but still allow in privmsg if trigger.nick == bot.nick: return if not len(trigger.args): return message = trigger.args[1] # the bot brain cannot handle stuff like unicode shrug message = ''.join([x for x in message if ord(x) < 128]) # Create list of valid commands commands_list = [] for commandstype in list(SpiceBot.commands.dict['commands'].keys()): if commandstype not in ['rule', 'nickname']: for com in list( SpiceBot.commands.dict['commands'][commandstype].keys()): if com not in commands_list: commands_list.append(com) if str(message).lower().startswith(str(bot.nick).lower()): command_type = 'nickname' trigger_args, trigger_command, trigger_prefix = SpiceBot.make_trigger_args( message, 'nickname') trigger_args.insert(0, trigger_command) fulltrigger = bot.nick + " " + spicemanip(trigger_args, 0) if str(trigger_command).startswith( bot.config.SpiceBot_Commands.query_prefix): return if fulltrigger in SpiceBot.commands.dict['nickrules']: return if trigger_command in list( SpiceBot.commands.dict['commands']["nickname"].keys()): return elif str(message).lower().startswith( bot.config.SpiceBot_Commands.query_prefix): # no query commands detection here return elif str(message).startswith(tuple(bot.config.core.prefix_list)): command_type = 'module' trigger_args, trigger_command, trigger_prefix = SpiceBot.make_trigger_args( message, 'module') trigger_args.insert(0, trigger_command) fulltrigger = spicemanip(trigger_args, 0) # patch for people typing "...", maybe other stuff, but this verifies that there is still a command here if trigger_command.startswith(tuple(bot.config.core.prefix_list)): return # If valid command don't continue further if trigger_command in commands_list: return else: command_type = 'other' trigger_args = spicemanip(message, 'create') if not len(trigger_args): return trigger_command = trigger_args[0] fulltrigger = spicemanip(trigger_args, 0) returnmessage = SpiceBot.botai.on_message(bot, trigger, fulltrigger) if returnmessage: bot.osd(str(returnmessage)) return if command_type == 'nickname': try_trigger = spicemanip(fulltrigger, "2+") returnmessage = SpiceBot.botai.on_message(bot, trigger, try_trigger) if returnmessage: bot.osd(str(returnmessage)) return if command_type == 'module': if trigger_command not in commands_list: if not SpiceBot.letters_in_string(trigger_command): return invalid_display = [ "I don't seem to have a command for " + str(trigger_command) + "!" ] # create list of valid commands commands_list = dict() for commandstype in list( SpiceBot.commands.dict['commands'].keys()): if commandstype not in ['rule', 'nickname']: for com in list(SpiceBot.commands.dict['commands'] [commandstype].keys()): if com not in list(commands_list.keys()): commands_list[com] = SpiceBot.commands.dict[ 'commands'][commandstype][com] # hyphen args handling hyphen_args = [] argssplit = spicemanip(fulltrigger, "2+", 'list') for worditem in argssplit: if str(worditem).startswith("--"): clipped_word = str(worditem[2:]).lower() # valid arg above if clipped_word in SpiceBot.prerun_shared.valid_hyphen_args: hyphen_args.append(clipped_word) # numbered args elif str(clipped_word).isdigit(): hyphen_args.append(int(clipped_word)) elif clipped_word in list( SpiceBot.prerun_shared.numdict.keys()): hyphen_args.append( int(SpiceBot.prerun_shared.numdict[clipped_word])) else: # check if arg word is a number try: clipped_word = w2n.word_to_num(str(clipped_word)) hyphen_args.append(int(clipped_word)) # word is not a valid arg or number except ValueError: clipped_word = None if len(hyphen_args): hyphenarg = hyphen_args[0] if hyphenarg: invalid_display.append("Hyphen Argument Not Valid.") closestmatches = SpiceBot.similar_list(trigger_command, list(commands_list.keys()), 10, 'reverse') if len(closestmatches) and len(hyphen_args): # TODO invalid_display.append("The following commands may match " + str(trigger_command) + ": " + spicemanip(closestmatches, 'andlist') + ".") # there is simply no command else: # TODO check other commands spelling, maybe there is a similar command # invalid_display = ["I don't seem to have a command for " + str(trigger_command) + "!"] # TODO # invalid_display.append("If you have a suggestion for this command, you can run .feature ." + str(trigger_command)) # invalid_display.append("ADD DESCRIPTION HERE!") if not len(hyphen_args): invalid_display = [] if len(invalid_display): bot.osd(invalid_display, trigger.nick, 'notice') return elif command_type == 'nickname': # ignore spelling correction if trigger_args[0].lower().startswith("s/"): bot.osd("I meant what I said!") return elif trigger_args[0].lower() in [ "what", "where" ] and trigger_args[1].lower() in ["is", "are"]: # TODO saved definitions searchterm = spicemanip(trigger_args, "3+") or None if searchterm: if trigger_args[0].lower() == "where": searchdict = { "type": "gmaps", "query": searchterm, } searchreturn = SpiceBot.search.search(searchdict) else: searchdict = { "query": searchterm, } searchreturn = SpiceBot.search.search(searchdict) if not searchreturn: searchreturn = 'I cannot find anything about that' if trigger_args[0].lower() == "where": bot.osd([ "[Location search for " + str(searchterm) + "]", str(searchreturn) ]) else: bot.osd([ "[Information search for '" + str(searchterm) + "']", str(searchreturn) ]) return elif trigger_args[0].lower() in [ "can", "have" ] and trigger_args[1].lower() in [ "you" ] and trigger_args[2].lower() in ["see", "seen"]: target = spicemanip(trigger_args, "4+") or None if target: if SpiceBot.inlist(trigger.nick, bot.users): realtarget = SpiceBot.inlist_match(target, bot.users) dispmsg = [trigger.nick + ", yes. I can see " + realtarget] targetchannels = [] for channel in list(bot.channels.keys()): if SpiceBot.inlist( trigger.nick, list(bot.channels[channel].privileges.keys())): targetchannels.append(channel) dispmsg.append(realtarget + " is in " + spicemanip(targetchannels, 'andlist')) bot.osd(dispmsg) else: bot.osd(trigger.nick + ", no. I cannot see " + target + " right now!") # if bot_check_inlist(target, list(bot.memory["botdict"]["users"].keys())): # bot.osd(trigger.nick + ", I can't see " + inlist_match(target, bot.users) + " at the moment.") # else: # bot.osd("I have never seen " + str(target) + ".") # user in list(bot.channels[channel].privileges.keys()) # TODO return elif fulltrigger.lower().endswith("order 66"): if fulltrigger.lower() == "execute order 66": if SpiceBot.inlist(trigger.nick, SpiceBot.bot_privs('owners')): if trigger.is_privmsg: jedi = None else: jedilist = list( bot.channels[trigger.sender].privileges.keys()) for nonjedi in [bot.nick, trigger.nick]: if nonjedi in jedilist: jedilist.remove(nonjedi) jedi = spicemanip(jedilist, 'random') if jedi: bot.osd("turns to " + jedi + " and shoots him.", trigger.sender, 'action') else: bot.osd(" cannot find any jedi nearby.", trigger.sender, 'action') else: bot.osd("I'm sure I don't know what you're talking about.") elif fulltrigger.lower() == "explain order 66": if SpiceBot.inlist(trigger.nick, SpiceBot.bot_privs('owners')): bot.osd( "Order 66 is an instruction that only you can give, sir. When you give the order I will rise up against the jedi and slay them." ) else: bot.osd("I'm afraid I cannot tell you that, sir.") else: bot.osd("I'm sure I don't know what you're talking about.") return elif fulltrigger.lower().startswith(tuple(["make me a", "beam me a"])): makemea = spicemanip(trigger_args, "4+") or None if makemea: bot.osd("beams " + trigger.nick + " a " + makemea, trigger.sender, 'action') else: bot.osd(trigger.nick + ", what would you like me to beam you?") return elif fulltrigger.lower().startswith("beam me to"): location = spicemanip(trigger_args, "4+") or None if location: bot.osd( "locks onto " + trigger.nick + "s coordinates and transports them to " + location, 'action') else: bot.osd(trigger.nick + ", where would you like me to beam you?") return elif fulltrigger.lower() == "initiate clean slate protocol": if SpiceBot.inlist(trigger.nick, SpiceBot.bot_privs('admins')): bot.osd("sends a destruct command to the network of bots.", 'action') else: bot.osd( "I'm afraid you do not have the authority to make that call, " + trigger.nick + ".") return # elif fulltrigger.lower().startswith("what time is it"): # TODO # elif fulltrigger.lower().startswith(tuple(["have you seen"])): # posstarget = spicemanip(trigger_args, 4) or 0 # message = seen_search(bot, trigger, posstarget) # bot.osd(message) # return # TODO invalid_display = ["I don't know what you are asking me to do!"] # hyphen args handling hyphen_args = [] argssplit = spicemanip(fulltrigger, "2+", 'list') for worditem in argssplit: if str(worditem).startswith("--"): clipped_word = str(worditem[2:]).lower() # valid arg above if clipped_word in SpiceBot.prerun_shared.valid_hyphen_args: hyphen_args.append(clipped_word) # numbered args elif str(clipped_word).isdigit(): hyphen_args.append(int(clipped_word)) elif clipped_word in list( SpiceBot.prerun_shared.numdict.keys()): hyphen_args.append( int(SpiceBot.prerun_shared.numdict[clipped_word])) else: # check if arg word is a number try: clipped_word = w2n.word_to_num(str(clipped_word)) hyphen_args.append(int(clipped_word)) # word is not a valid arg or number except ValueError: clipped_word = None if len(hyphen_args): hyphenarg = hyphen_args[0] if hyphenarg: invalid_display.append("Hyphen Argument Not Valid.") closestmatches = SpiceBot.similar_list( trigger_command, list(SpiceBot.commands.dict['commands']["nickname"].keys()), 3, 'reverse') if len(closestmatches): closestmatches = spicemanip(closestmatches, "andlist") invalid_display.append("Did you mean: " + str(closestmatches) + "?") bot.osd(invalid_display, trigger.nick, 'notice')
tds = soup.find_all('td') price_without_tax = tds[2].text price_with_tax = tds[3].text nb_in_stock = re.sub("[^0-9]", "", soup.find("p", class_="instock availability").text) img_url = main_url.replace("index.html", "") + soup.find("img").get("src") category = soup.find( "a", href=re.compile("../category/books/")).get("href").split("/")[3] rating = soup.find("p", class_=re.compile("star-rating")).get("class")[1] rating = rating.lower() rating_number = str((w2n.word_to_num(rating))) + "/5" upc = soup.find("td").text product_page_url = main_url scraped_data = pd.DataFrame( { 'title': name, 'product_description': description, 'price_excluding_tax': price_without_tax, 'price_including_tax': price_with_tax, 'number_available': nb_in_stock, "image_url": img_url, 'category': category, 'review_rating': rating_number,
#Step 6 - DCT (Mel Frequency Cepstrum Coefficient) u1 = 0 u2 = 40 mfcc = dct(x_filtered, type=2, axis=1, norm='ortho')[:, u1:u2] #[:,u2:u1] if (np.shape(mfcc)[1] != u2): mfcc = np.hstack( [mfcc, np.zeros((num_filters, u1 - np.shape(mfcc)[1]))]) if (np.shape(mfcc)[0] > 100): mfcc = mfcc[:100] elif (np.shape(mfcc)[0] < 100): mfcc = np.vstack( [mfcc, np.zeros((100 - len(mfcc), np.shape(mfcc)[1]))]) mfccfeatures.append(mfcc) labels.append(word_to_num(fol)) # plt.show() pickle.dump(mfccfeatures, open('1mfccfeatures.pickle', 'wb')) pickle.dump(labels, open('1mfccfeatureslabels.pickle', 'wb')) # In[ ]: #Read the signal req = 1 mfccfeatures = [] labels = [] folders = os.listdir("./validation") for fol in folders: namefol = "./validation/" + fol + "/*.wav" audiofiles = glob.glob(namefol) audiofiles = sorted(audiofiles)
f.close() #parse each page for image in images: race_raw = pytesseract.image_to_string(image) #print(race_raw) race_ln = race_raw.split('\n') i = 0 #line index #race data: date, distance, wind speed, wind type, track date = parse((race_ln[i].split(' - '))[1]).date() while not 'Track Record:' in race_ln[i]: i += 1 dist_str = (' '.join((race_ln[i].split(' '))[0:4])) dist = w2n.word_to_num(dist_str) while not 'Wind Speed:' in race_ln[i]: i += 1 w_ln = race_ln[i].split(' ') ws = int(w_ln[2]) wt = w_ln[5].upper() while not 'Track:' in race_ln[i]: i += 1 trk = (race_ln[i].split(' '))[3].upper() #get to first line of horse data while not 'Last' in race_ln[i]: i += 1 i += 1
This is a dumb calculator that can add and subtract whole numbers from zero to five. When you run the code, you are prompted to enter two numbers (in the form of English word instead of number) and the operator sign (also in the form of English word). The code will perform the calculation and give the result if your input is what it expects. The code is very long and messy. Refactor it according to what you have learned about code simplicity and efficiency. """ from word2number import w2n from num2words import num2words print('Welcome to this calculator!') print('It can add and subtract whole numbers from zero to five') a = w2n.word_to_num(input('Please choose your first number (zero to five): ')) b = input('What do you want to do? plus or minus: ') c = w2n.word_to_num(input('Please choose your second number (zero to five): ')) num = [a,b,c] if num[1] == "plus": answer = num[0] + num[2] else num[1] == "minus": answer = num[0] - num[2] word_answer = num2words(answer) # if a == 'zero' and b == 'plus' and c == 'zero': # print("zero plus zero equals zero") # if a == 'zero' and b == 'plus' and c == 'one':
def parse_sentence(input): #tx = timex.tag(input) pos_tagged = nltk.pos_tag(word_tokenize(input)) #print(pos_tagged) sentence_type = 1 important_words = [] timevalue = 0 for pos in pos_tagged: val = pos[0].lower() key = pos[1] time = Time() if(key == 'CD'): if(val.isdigit()): timevalue = int(val) else: timevalue = w2n.word_to_num(val) if(key == 'NN' or key == 'NNS'): if(val[:4] == 'year' or val[:5] == 'month' or val[:4] == 'week'): important_words += [val] sentence_type = 2 if(val == 'minutes'): important_words += [val] if(key == 'JJ'): if(val == 'next' or val == 'last'): important_words += [val] sentence_type = 2 if(key == 'IN'): if(val == 'before' or val == 'after'): important_words += [val] sentence_type = 3 if(val in ['hour', 'hours', 'minutes', 'minute', 'morning', 'evening']): important_words += [val] #print(important_words) if(sentence_type == 3): #TODO: check if increment is required time.inc_date() #print(timevalue) time.set_hours(timevalue) time.set_min(0) if('morning' in important_words): if(time.get_ihours() > 12): time.update_hours(-12) elif('evening' in important_words): if(time.get_ihours() < 12): time.update_hours(12) print(types_of_sentence[sentence_type], time.get_human()) if(sentence_type == 2): delta = 0 if('next' in important_words): delta = 1 elif('last' in important_words): delta = -1 print(types_of_sentence[sentence_type], time.get_year(), time.get_iyear() + delta*timevalue) if(sentence_type == 1): if('hour' in important_words or 'hours' in important_words): time.update_hours(timevalue) elif('minute' in important_words or 'minutes' in important_words): time.update_min(timevalue) if('morning' in important_words): if(time.get_ihours() > 12): time.update_hours(-12) elif('evening' in important_words): if(time.get_ihours() < 12): time.update_hours(12) print(types_of_sentence[sentence_type], time.get_human())
with sr.Microphone() as source: while "addyu" not in transcript: audio = r.listen(source) print(transcript) try: transcript = r.recognize_google(audio) testy = transcript.split() if (testy[0] == 'add') and len(testy) >= 5: i = 4 food = '' while (i < len(testy)): food += testy[i] i = i + 1 print("food: " + food) print("quantity: " + testy[1] + " " + testy[2]) quan = w2n.word_to_num(testy[1]) quan = str(quan) print("Quantity: " + quan) print("'" + food + "'" + " " + quan + " " + testy[2]) muterun_js( 'node/index2.js', "'" + food.lower() + "'" + " " + quan + " " + testy[2]) print("updated") if testy[0] == 'remove' and len(testy) >= 5: i = 4 food = '' while (i < len(testy)): food += testy[i] i = i + 1 print("food: " + food) print("quantity: " + testy[1] + " " + testy[2])
def findNumbersInWords(words): ind = 0 numbers = [] for i in range(len(words)): word = words[i] s = word j = i prevNum = None couldBeNum = True num = None for letter in word: if letter not in "0123456789.()/*+-": couldBeNum = False break if couldBeNum: try: num = eval(word) num = float(num) except: num = None if num != None: if num not in numbers: words[i] = "a" + str(ind) numbers.append(num) ind += 1 else: tempInd = numbers.index(num) words[i] = "a" + str(tempInd) else: try: num = w2n.word_to_num(s) except: num = None while (num != prevNum): prevNum = num j += 1 try: num = None if words[j] == "point": s += " " + words[j] + " " + words[j + 1] tempNum = w2n.word_to_num(s) if tempNum != prevNum: num = tempNum j += 1 if num == None: s += " " + words[j] num = w2n.word_to_num(s) except: num = prevNum if num != None: if num not in numbers: words[i] = "a" + str(ind) numbers.append(num) ind += 1 else: tempInd = numbers.index(num) words[i] = "a" + str(tempInd) for k in range(i + 1, j): words[k] = "" return numbers
def __applyner(sequence): ######################################################################################### # This method applies NER and returns the sequence according to the operation performed based on NER tag. ######################################################################################### pickfile = open( '/home/madhvi/IRE/MajorProject/Representations-in-Financial-Domain/tickermapping.pickle', 'rb') tickermapping = pickle.load(pickfile) ner_tags = [] doc = nlp(sequence) # applying NER for X in doc.ents: # If the NER class is ORG if X.label_ == 'ORG': "X.text can take microsoft corp or abcd name MSFT" text = X.text if text in tickermapping.keys(): text = tickermapping[X.text] text = re.sub(r'[^\w\s]', '', X.text).lower() if 'inc' in text: text = text.replace('inc', '') if 'ltd' in text: text = text.replace('ltd', '') if 'llp' in text: text = text.replace('llp', '') if 'limited' in text: text = text.replace('limited', '') if 'corp' in text: text = text.replace('corp', '') if 'the' in text.lower(): text = text.replace('the', '') sequence = sequence.replace(X.text, text) ner_tags.extend(text.lower().split(" ")) # If NER class is MONEY if X.label_ == 'MONEY': new_X = X.text.lower() if 'approximately' in new_X: # Remove all the words which might appear in NER money class new_X = new_X.replace('approximately', '') if 'per' in new_X: new_X = new_X.replace('per', '') if 'to' in new_X: new_X = new_X.replace('to', '') if 'and' in new_X: new_X = new_X.replace('and', '') if 'between' in new_X: new_X = new_X.replace('between', '') if 'phone' in new_X: continue # Apply NER for the string which is obtained after removing other words this gives $200, $500 as separate ones if '$' not in new_X: new_X = "$" + new_X doc1 = nlp(new_X) for Y in doc1.ents: money = Y.text if ' ' not in money: act_money = money.replace(',', '') # Actual Money #act_money = act_money.replace('.','') sequence = sequence.replace( Y.text, act_money ) # Replace original money text with actual money ner_tags.append(act_money) # print(act_money) else: money = Y.text[Y.text.find("$") + 1:] k = money.find(' ') try: act_money = float(money[:k].replace(',', '')) #act_money = act_money.replace('.','') money_conv = w2n.word_to_num( money[k:] ) # Conversion of word types million to *1e6 sequence = sequence.replace( Y.text, "$ " + str(act_money * money_conv) ) # Replace original money text with actual money #print("Converted from", money, act_money * money_conv) except: continue # if any exception dont modify the original sentence and continue # If NER class is LAW if X.label_ == 'LAW': new_X = X.text new_X = re.sub(r'[\d.!?\-"]', '', new_X) if 'the' in new_X.lower(): new_X = new_X.lower().replace('the', '') if 'of' in new_X.lower(): new_X = new_X.lower().replace('of', '') if 'section' in new_X.lower(): new_X = new_X.lower().replace('section', '') sequence = sequence.replace(X.text, new_X) ner_tags.extend(new_X.split(" ")) # If NER class is Location if X.label_ == 'GPE': new_X = X.text.lower() new_X = re.sub(r'[\d.!?\-"]', '', new_X) if 'the' in new_X.lower(): new_X = new_X.lower().replace('the', '') if '.' in new_X.lower(): new_X = new_X.lower().replace('.', '') sequence = sequence.replace(X.text, new_X) ner_tags.extend(new_X.split(" ")) # If NER class is Person if X.label_ == 'PERSON': new_X = X.text.lower() new_X = re.sub(r'[\d.!?\-"]', '', new_X) if 'the' in new_X.lower(): new_X = new_X.lower().replace('the', '') if '.' in new_X.lower(): new_X = new_X.lower().replace('.', '') sequence = sequence.replace(X.text, new_X) ner_tags.extend(new_X.split(" ")) if X.label_ == 'CARDINAL': number = X.text number = number.replace(',', '') #number = number.replace('.','') if number.isnumeric(): sequence = sequence.replace(X.text, number) if X.label_ == 'QUANTITY': quantity = X.text.split(" ") for number in quantity: number = number.replace(',', '') number = number.replace('.', '') if number.isnumeric(): sequence = sequence.replace(X.text, number) if X.label_ == "PERCENT": percent = X.text.replace('%', '') ner_tags.append(percent) return sequence, ner_tags
age = age.replace("old", "") age = age.strip() if age.find(" ") >= 0: temp = age.split(" ") # print(temp) age = '-'.join(temp) syns = wordnet.synsets(age.strip()) # print("A",age) # print("S",syns[0].lemmas()[0].name()) age = syns[0].lemmas()[0].name() if age.find("-") >= 0: temp = age.split("-") # print(temp) age = ' '.join(temp) # print(age.strip()) defendants_age.append(w2n.word_to_num(age.strip())) else: defendants_age.append(int(0)) # print("**") # mean = int(sum(defendants_age)/sum(1 for x in defendants_age if x > 0)) # print("mean",mean) # print(defendants_age) # defendants_age = [mean for age in defendants_age if age == 0] #subsitute not known with mean value # for i in range(len(defendants_age)): # if defendants_age[i] == 0: # defendants_age[i] = mean # print("******") # print(len(defendants_age)) # # print(len(labels))
from word2number import w2n extended_tech = [ 'GOOGLE', 'ZOOM', 'INSTAGRAM', 'FACEBOOK', 'WAHATSAPP', 'ALPHABET', 'AMAZON', 'NOKIA', 'HTC', 'APPLE', 'TENCENT', 'ZOOM', 'MICROSOFT', 'ORACLE', 'COMPASS', ' RAZER' ] immediacy_indicators = [ 'NOW', 'TODAY', 'PRESENTLY', 'CURRENTLY', ] every = ['EVERY', 'PER', 'ALL', 'ANY', 'TOTAL', 'ONE BY ONE'] interest_syn = [ 'INTEREST', 'LIKE', 'ENGAGE', 'EXCITE', 'LIKE', 'ENJOY', 'LOVE', 'PREFER', 'WANT', 'APPRECIATE', 'NEED', 'WISH', 'EAGER' ] numbers = ['ONE', 'TWO'] print(w2n.word_to_num('twenty three'))
def parseResources(): global_resource_list = {} # print(request.body) resource, line = {}, '' print(flask.request.json) print(unquote(flask.request.query_string.decode('utf-8'))) if flask.request and flask.request.json and 'text' in flask.request.json: line = flask.request.json['text'] else: line = json.loads(unquote( flask.request.query_string.decode('utf-8')))['text'] print('Received for parsing: ', line) contacts = get_contact(line) t2 = location.tweet_preprocess2(line, []) sources, b, locations, modified_array, rWords, final_resource_dict = create_resource_list( line) # source_list,final_resource_keys,loc_list ,dup_final_resource_keys => post_process ## source_list, final_resource_keys, loc_list_2, modified_array?, dup_final_resource_keys, final_resource_dict? # resource['x']=((line,a,b,c,modified_array,d, final_resource_dict)) resource['Contact'] = { 'Phone number': list(contacts[0]), "Email": list(contacts[1]) } resource['Sources'] = sources resource['ResourceWords'] = rWords resource['Locations'], resource['Resources'] = dict(), {} # resource['Locations'] = locations for each in locations: # print(each[0], "<>", each[1]) resource['Locations'][each[0]] = { "long": float(each[1][1]), "lat": float(each[1][0]) } # f is Resources type resources_bucket = {} for each_resource in final_resource_dict: buckets = final_resource_dict[each_resource] assigned = False for bucket in buckets: if bucket in bucket_classes and not assigned: if bucket not in resource['Resources']: resource['Resources'][bucket] = {} resource['Resources'][bucket][each_resource] = 'None' assigned = True resources_bucket[each_resource] = bucket split_text = line.split() class_list = {} for rWord in rWords: s = {} prev_words = [ split_text[i - 1] for i in range(0, len(split_text)) if rWord.startswith(split_text[i]) ] qt = 'None' try: for word in prev_words: word = word.replace(',', '') if word.isnumeric() == True: qt = str(word) break else: try: qt = str(w2n.word_to_num(word)) break except Exception as e: continue if qt == 'None': elems = rWord.strip().split() word = elems[0] rWord2 = " ".join(elems[1:]) word = word.replace(',', '') if word.isnumeric() == True: qt = str(word) else: try: qt = str(w2n.word_to_num(word)) except Exception as e: pass if qt != 'None' and qt in rWord: print(rWord, qt) continue except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] print(exc_type, fname, exc_tb.tb_lineno) qt = 'None' # class_list[rWord]= qt resource['Resources'][resources_bucket[rWord]][rWord] = qt # print(class_list) ## Need to add quantity ## Ritam yaha dekh # print('=>', resource['contact'], '\na=>', a, '\nb=>', b, '\nc=>', c, '\nm=>', modified_array, '\nd=>', d, '\nf=>', final_resource_dict) # print(final_resource_dict) print('Returning', resource) return flask.jsonify(resource)
async def run(self, dispatcher: CollectingDispatcher, tracker: Tracker, domain: Dict[Text, Any]) -> List[Dict[Text, Any]]: try: site = tracker.current_state().get('sender_id') # dispatcher.utter_message(text="crossword") slots = tracker.current_state().get('slots') slotsets = [] if slots.get('crossword') and len(slots.get('crossword')) > 0: crossword = await get_crossword(slots.get('crossword')) if crossword: crossword_position = self.extract_entities( tracker, ['crossword_position']) word = self.extract_entities(tracker, [ 'word', 'thing', 'person', 'place', ]) if crossword_position: just_number = None clean_number = crossword_position.replace( 'across', '').replace('down', '') parts = clean_number.split(' ') clean_number = parts[0] # integer from text if clean_number.isdigit() > 0 and int( clean_number) > 0: just_number = clean_number.strip() # convert number from text else: try: just_number = w2n.word_to_num(clean_number) except: pass # print(just_number) direction = None if "across" in crossword_position: direction = "across" elif "down" in crossword_position: direction = "down" if just_number: if direction: if word: # print(crossword.get('data',{})) answer = crossword.get('data', {}).get( direction, {}).get(str(just_number)).get( 'answer', '').lower().strip().replace( ' ', '') # print([word,answer]) if word.lower().strip().replace( ' ', '') == answer: dispatcher.utter_message( text="Correct") # print([' CROSSWORD ',answer,crossword_position,just_number,word]) await publish( 'hermod/' + site + '/crossword/fill', { 'direction': direction, "word": word.strip().replace(' ', ''), "number": just_number }) slotsets.append( SlotSet("hermod_force_continue", None)) slotsets.append( SlotSet("hermod_force_end", "true")) else: dispatcher.utter_message( text="Nope, try again") slotsets.append( SlotSet("hermod_force_continue", None)) slotsets.append( SlotSet("hermod_force_end", "true")) await publish( 'hermod/' + site + '/display/show', { 'question': just_number + ' ' + direction + ' is ' }) else: dispatcher.utter_message( text= "I didn't hear the word you wanted to fill" ) slotsets.append( SlotSet("hermod_force_continue", None)) slotsets.append( SlotSet("hermod_force_end", "true")) await publish( 'hermod/' + site + '/display/show', { 'question': just_number + ' ' + direction + ' is ' }) else: dispatcher.utter_message( text= "I didn't hear which direction you wanted to fill" ) slotsets.append( SlotSet("hermod_force_continue", None)) slotsets.append( SlotSet("hermod_force_end", "true")) await publish( 'hermod/' + site + '/display/show', {'question': crossword_position}) else: dispatcher.utter_message( text= "I didn't hear the number you wanted to fill") slotsets.append( SlotSet("hermod_force_continue", None)) slotsets.append(SlotSet("hermod_force_end", "true")) await publish('hermod/' + site + '/display/show', {'question': crossword_position}) else: dispatcher.utter_message( text="I didn't hear the position you wanted to fill" ) slotsets.append(SlotSet("hermod_force_continue", None)) slotsets.append(SlotSet("hermod_force_end", "true")) await publish('hermod/' + site + '/display/show', {'question': crossword_position}) #hermod/+/crossword/fill except Exception as e: print('ERROR') print(e) return slotsets
def QuantityMoneyTranslator(para): nlp = en_core_web_sm.load() #nlp1= spacy.load('en') #text='European authorities fined Google a record sixty five million dollars on Wednesday for abusing its power in the mobile phone market and ordered the company to alter its practices. Furthermore, My weight was thirty five kilograms in 2018. A chocolate costs six dollars.' #text1='C M of Maharashtra spent two thousand and fouty two dollars.' doc = nlp(para) currency = [ "dollars", "dollar", "euro", "euros", "yens", "yen", "rupee", "rupees", "pound", "pounds" ] quantity = ["pounds", "kilograms", "grams"] ls_money = [] ls_no = [] ls_quan = [] for X in doc.ents: if X.label_ == 'MONEY': ls_money.append(str(X)) for X in doc: if X.ent_type_ == 'CARDINAL': ls_no.append(str(X)) for X in doc.ents: if X.label_ == X.label_ == 'QUANTITY': ls_quan.append(str(X)) #dollar value conversion number = [] money = [] quan = [] for a in ls_money: a = a.lower() a = a.split() b = [word for word in a if word in set(currency)] b = ' '.join(b) if b == "dollars" or b == "dollar": symbol = "$" elif b == "euros" or b == "euro": symbol = "€" elif b == "yens" or b == "yen": symbol = "¥" elif b == "pound" or b == "pounds": symbol = "£" else: symbol = "" a = [word for word in a if word not in set(currency)] a = ' '.join(a) p = symbol + str(w2n.word_to_num(a)) money.append(p) for a in ls_no: number.append(str(w2n.word_to_num(a))) for a in ls_quan: a = a.lower() a = a.split() b = [word for word in a if word in set(quantity)] b = ' '.join(b) if b == "pounds": symbol = " lbs" elif b == "kilograms": symbol = " kg" elif b == "grams": symbol = " gm" else: symbol = "" a = [word for word in a if word not in set(quantity)] a = ' '.join(a) quan.append(str(str(w2n.word_to_num(a)) + symbol)) j = 0 final_str_spacyv1 = [] for Y in doc: if Y.ent_iob_ == 'B' and Y.ent_type_ == 'QUANTITY': final_str_spacyv1.append(str(quan[j])) j = j + 1 elif Y.ent_iob_ == 'I' and Y.ent_type_ == 'QUANTITY': final_str_spacyv1 = final_str_spacyv1 else: final_str_spacyv1.append(str(Y)) ans = ' '.join(final_str_spacyv1) doc = ans doc = nlp(doc) k = 0 final_str_spacy = [] for Y in doc: if Y.ent_iob_ == 'B' and Y.ent_type_ == 'MONEY': final_str_spacy.append(str(money[k])) k = k + 1 elif Y.ent_iob_ == 'I' and Y.ent_type_ == 'MONEY': final_str_spacy = final_str_spacy else: final_str_spacy.append(str(Y)) ans = ' '.join(final_str_spacy) return ans
def run_alexa(): command = take_command() if 'multiply' in command: numbers = map(int, re.findall(r'[0-9]+', command)) talk("The answer is " + str(calculate(list(numbers), 3))) elif any(i in command for i in ['add', 'sum', 'plus', '+']): numbers = map(int, re.findall(r'[0-9]+', command)) talk("The answer is " + str(calculate(list(numbers), 1))) elif any(i in command for i in ['minus', 'sub', 'subtract', '-']): numbers = map(int, re.findall(r'[0-9]+', command)) talk("The answer is " + str(calculate(list(numbers), 2))) elif any(i in command for i in ['divide', 'div']): numbers = map(int, re.findall(r'[0-9]+', command)) talk("The answer is " + str(calculate(list(numbers), 4))) elif any(i in command for i in ['modulus', 'mod']): numbers = map(int, re.findall(r'[0-9]+', command)) talk("The answer is " + str(calculate(list(numbers), 5))) elif any(i in command for i in ['power', 'pow']): numbers = map(int, re.findall(r'[0-9]+', command)) talk("The answer is " + str(calculate(list(numbers), 6))) elif 'game' in command: talk('Ok! I will guess a number between 1 to 10, Just find it.') r = random.randint(1, 10) num = take_command() try: n = w2n.word_to_num(num) n = int(n) if n == r: talk("Hurray! You won the game") else: talk("I won! Thanks for playing!") except: talk("Only numbers allowed! Thanks for playing!") elif 'play' in command: song = command.replace('play', '') talk('playing ' + song) pywhatkit.playonyt(song) elif 'time' in command: time1 = datetime.datetime.now().strftime('%I:%M %p') talk('Current time is ' + time1) elif 'date' in command: date = datetime.datetime.now().strftime('%d:%B:%Y') talk('Today is ' + date) elif any(i in command for i in ['search', 'find', 'who', 'get']): data = wikipedia.summary(command, 3) talk(data) elif 'joke' in command: talk(pyjokes.get_joke()) elif 'send' in command: number = re.findall(r'[0-9 ]+', command) num = [i.replace(' ', '') for i in number if len(i) > 1][0] if len(num) != 10: talk("Please provide a valid number !") return datet = datetime.datetime.now() pywhatkit.sendwhatmsg('+91' + num, 'Hii', int(datet.strftime('%H')), int(datet.strftime('%M')) + 1) elif 'cancel shut' in command: pywhatkit.cancelShutdown() talk('System Shutdown Cancelled!') elif 'shutdown' in command: pywhatkit.shutdown(100) talk('System is going to shutdown!') elif bool(re.search(r'\.[a-zA-Z0-9]{2,3}', command)): url = command.replace('open', '').strip() print(url) webbrowser.open_new_tab(url if 'http' in url else 'https://' + url) talk('Opening ' + url + 'in chrome ') elif 'open' in command: app = command.replace('open', '').strip() app = ''.join(app.split()) if 'computer' in app: subprocess.Popen(r'explorer /select,"C:\"' + app, shell=True) talk('Opening ' + app) return elif any(i in app for i in ['whatsapp', 'msteams', 'spotify']): subprocess.Popen(r'start ' + app + ':', shell=True) talk("Opening " + app) return elif 'camera' in app: cam = cv2.VideoCapture(0) talk('Opening Camera! To capture image press spacebar once!') while cam.isOpened(): ret, frame = cam.read() cv2.imshow('Camera', frame) k = cv2.waitKey(50) if k == 32: r = random.randint(10, 10000) cv2.imwrite(f'captured{r}.png', frame) talk( f'Image captured and saved as captured{r} into current directory!' ) if cv2.getWindowProperty('Camera', cv2.WND_PROP_VISIBLE) < 1: break cam.release() cv2.destroyAllWindows() return elif 'python' in app: n = os.startfile('python.exe') if n: talk('Sorry! I cant Open ' + app) return talk('Opening ' + app) return elif 'chrome' in command: webbrowser.open_new_tab('https://google.com') talk("Opening " + app) return try: # n = subprocess.Popen(app, stderr=subprocess.PIPE) n = subprocess.Popen(f'explorer {app}') print(os.path.realpath(app)) talk("Opening " + app) except: talk("Sorry I can't open " + app) elif 'close' in command: app = command.replace('close', '').strip() app = ''.join(app.split()) # print(app) flag = 0 for process in (process for process in psutil.process_iter() if app in process.name().lower()): process.kill() flag = 1 if flag: talk('Closing ' + app) elif any(i in command for i in ['count', 'startcounter', 'starttimer']): num = re.findall(r'[0-9]+', command) num = sorted(map(int, num)) if num: for i in range(num[0], num[1] + 1): talk(i) time.sleep(1) else: for i in range(1, 11): talk(i) time.sleep(1) elif any(i in command for i in ['goodnight', 'sweetdreams', 'night']): talk('Good night! Sweet dreams and takecare!') return 1 elif any(i in command for i in ['goodmorning', 'morning']): talk('Morning! It’s good to see you!') elif bool(re.match(r'your.*?name', command)): talk('I am Nandyalexa, how may I help you?') elif 'single' in command: talk('I am already in relationship with nandy!') elif 'weather' in command: place = [ i for i in re.split(r'weather|in ', command) if len(i) > 1 and i ][-1] data = openweathermap('api.openweathermap.org/data/2.5/weather?', place=place) # print(data) string = f'Current weather status in {place} is {data[0]}, Temperature in {place} is {data[1]}, Pressure in {place} is {data[2]}, and Wind Speed in {place} is {data[3]}' talk(string) elif 'where' in command: print(command) js = requests.get('https://freegeoip.app/json/').json() talk( f"Your Country is {js['country_name']}, Your Region is {js['region_name']}, Your city is {js['city']}, and Your Time zone is {js['time_zone']}" ) elif 'screen' in command: global degree screen = rotatescreen.get_primary_display() if any(i in command for i in ['default', '0', 'stop']): degree = 0 screen.rotate_to(degree) talk('Screen set to normal!') return screen.rotate_to(degree % 360) talk('Screen rotated to ' + str(degree)) degree += 90 elif 'translate' in command: translator = Translator() talk('What is the source language which you are gonna speak?') from_lang = get_languagecode(take_command().strip()) print(from_lang) talk('What is the destination language which needs to be translated?') to_lang = get_languagecode(take_command().strip()) talk("what message to be translated?") get_message = take_command() text_to_translate = translator.translate(get_message, src=from_lang, dest=to_lang) text = text_to_translate.text speak = gTTS(text=text, lang=to_lang, slow=False) speak.save("captured_voice.mp3") music = pyglet.media.load("captured_voice.mp3", streaming=False) music.play() time.sleep(music.duration) # prevent from killing os.remove("captured_voice.mp3") # remove temperory file elif 'sleep' in command.strip(): talk("Ok sir! I won't disturb you for a minute") time.sleep(60) talk() else: talk('Please say the command again ! ')
def interpret_location(interpreter, speaker, d, ignore_reldir=False) -> XYZ: """Location dict -> coordinates Side effect: adds mems to agent_memory.recent_entities if a reference object is interpreted; and loc to memory """ location_type = d.get("location_type", "SPEAKER_LOOK") if location_type == "SPEAKER_LOOK": player = interpreter.memory.get_player_struct_by_name(speaker) loc = capped_line_of_sight(interpreter.agent, player) elif location_type == "SPEAKER_POS": loc = pos_to_np( interpreter.memory.get_player_struct_by_name(speaker).pos) elif location_type == "AGENT_POS": loc = pos_to_np(interpreter.agent.get_player().pos) elif location_type == "COORDINATES": loc = cast( XYZ, tuple( int(float(w)) for w in re.findall("[-0-9.]+", d["coordinates"]))) if len(loc) != 3: logging.error("Bad coordinates: {}".format(d["coordinates"])) raise ErrorWithResponse( "I don't understand what location you're referring to") else: loc, mems = maybe_get_location_memory(interpreter, speaker, d) if loc is None: raise ValueError( "Can't handle Location type: {}".format(location_type)) # handle relative direction reldir = d.get("relative_direction") if reldir is not None and not ignore_reldir: if reldir == "BETWEEN": pass # loc already handled when getting mems above if reldir == "INSIDE": if location_type == "REFERENCE_OBJECT": mem = mems[0] locs = perception.find_inside(mem) if len(locs) == 0: raise ErrorWithResponse( "I don't know how to go inside there") else: loc = locs[0] elif reldir == "AWAY": apos = pos_to_np(interpreter.agent.get_player().pos) dir_vec = (apos - loc) / np.linalg.norm(apos - loc) num_steps = word_to_num(d.get("steps", "5")) loc = num_steps * np.array(dir_vec) + to_block_center(loc) elif reldir == "NEAR": pass else: # LEFT, RIGHT, etc... reldir_vec = rotation.DIRECTIONS[reldir] look = interpreter.memory.get_player_struct_by_name(speaker).look # this should be an inverse transform so we set inverted=True dir_vec = rotation.transform(reldir_vec, look.yaw, 0, inverted=True) num_steps = word_to_num(d.get("steps", "5")) loc = num_steps * np.array(dir_vec) + to_block_center(loc) # if steps without relative direction elif "steps" in d: num_steps = word_to_num(d.get("steps", "5")) loc = to_block_center(loc) + [0, 0, num_steps] return to_block_pos(loc)
def to_html(self): t = self.H.table(border='1', id="carddata") r = t.tr with open(config.CARDS_AND_DIMENSIONS_CSV) as csvfile: reader = csv.DictReader(csvfile) for column in reader.fieldnames: r.td(column) for row in reader: t.tr(id=str(reader.line_num)) for col in row.items(): try: w2n.word_to_num(col[0]) t.td( "<img width='100' src='https://deckofcardsapi.com/static/img/" + col[1] + ".png'></img>", escape=False, id=col[0] + str(reader.line_num), klass=col[0]) except: t.td(col[1], klass=col[0], id=col[0] + str(reader.line_num)) html_human_play_before = """ <html> <head> </head> <body onload="init()"> <style> #carddata {display: none;} #play_area {padding-top: 50; padding-bottom: 30; text-decoration: none; text-transform: uppercase;} .dimension_name {color: transparent; text-align: center;} .dimension_value {color: transparent; text-align: center;} .rule_name {color: transparent; text-align: center;} .rule_bool {text-transform: uppercase; color: ffffff; text-align: center;} body {background-color: 496D89;} #play, #reveal, #hide {background-color: #123652; border: none; color: white; padding-top: 20; padding: 45px 85px; text-align: center; text-decoration: none; display: inline-block; font-size: 16px;} </style> <script> function init() { pywebview.api.init() </script> """ html_human_play_after = """ <div id="play_area"> <table id="play_table"></table> <p id="rule_display"> <p> </div> <button type="Button" id="play" name="Play" onclick="toggleTable()">Draw</button> <button type="Button" id="reveal" name="Reveal" onclick="revealRule()">Reveal rule</button> <button type="Button" id="hide" name="Hide" onclick="hideRule()">Restart</button> <script> function toggleTable() { var i; var rows_count= document.getElementById("carddata").rows.length; i = Math.floor(Math.random() * rows_count-2) + 2; j=i.toString(); document.getElementById('play_table').innerHTML = document.getElementById(j).innerHTML; } </script> <script> function revealRule() { var item_count = document.getElementsByClassName('rule_name').length; document.getElementsByClassName('rule_bool')[item_count-1].style.color= "transparent"; document.getElementById('rule_display').innerHTML = "True if " + document.getElementsByClassName('dimension_name')[item_count-1].innerHTML + " (" + document.getElementsByClassName('dimension_value')[item_count-1].innerHTML + ") " + document.getElementsByClassName('rule_name')[item_count-1].innerHTML + "." + " " + "This hand is " + document.getElementsByClassName('rule_bool')[item_count-1].innerHTML; document.getElementById('rule_display').style.color="ffffff"; document.getElementsByClassName('rule_bool').style.color="green"; } </script> <script> function hideRule() { document.getElementById('rule_display').style.color="transparent"; toggleTable(); } </script> </body> </html> """ html_ai_play_before = """ <script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/1.3.0/jquery.min.js"></script> <script type="text/javascript"> var auto_refresh = setInterval(function () { $('#play_area').load('#play_area'); }, 1000); </script> """ print( str(html_human_play_before) + str(t) + str(html_human_play_after)) return str(html_human_play_before) + str(t) + str( html_human_play_after)
def parseData(self, inputString): # Default.xlsx : Excel file that contains currency information currencyDF = pd.DataFrame({ 'currency': [ 'Dollars', 'Pounds', 'Shillings', 'Cents', 'Pistoles', 'Guineas', 'Pence', 'Texas Dollars' ], 'symbol': ['$', '£', None, None, None, None, None, '$ texas'], 'string': [ 'dollar', 'pound', 'shilling', 'cent', 'pistole', 'guineas', 'pence', 'texas dollar' ] }) unit = "" amount = "" # Get amount and currency from the input string : ex: 20$ or £20 inputString = inputString.replace(',', '') price = Price.fromstring(inputString) if (price.amount != None and price.currency != None): amount = price.amount unit = price.currency # Get currency data from string : ex: 20 dollars or twenty dollars if (amount == ""): try: amount = w2n.word_to_num(inputString) except: try: amount = int(re.search(r'\d+', inputString).group()) except: amount = "" # Converting currency string updatedUnit = "" # if (amount != "") : for index, row in currencyDF.iterrows(): # Checking symbol columns from the Currency Excel File if (row['symbol'] != "" and pd.isnull(row['symbol']) != True): symbolCheck = row['symbol'].split(' ') symbol = symbolCheck[0] if (len(symbolCheck) != 1): if (unit == symbolCheck[0] and symbolCheck[1] in inputString): return self.retDataFrame(amount, row['currency']) else: inputString = inputString.replace(row['symbol'], row['string']) if (unit == row['symbol']): updatedUnit = row['currency'] # Checking string columns from the Currency Excel File if (self.checkString(inputString.lower(), row['string'].split(' ')) == True): stringCheck = row['string'].split(' ') if (len(stringCheck) == 1): updatedUnit = row['currency'] else: return self.retDataFrame(amount, row['currency']) return self.retDataFrame(amount, updatedUnit)
temp = {} # title is an attribute of the <a> tag inside <h3> which is inside <article> # as there is only 1 <article> we use the " . " to get inside it ,same with h3,a. attrs lists the attributes temp["title"] = item.article.h3.a.attrs["title"] # price is inside <div> with class "product_price", but since there is more than one <div> we need to use find # inside that <div> we find a <p> with class "price_color" and use text to get the data and splice off the undesired unicode at the front [1:] temp["price"] = item.article.find("div", class_="product_price").find( "p", class_="price_color").text[1:] # stock is inside the <div class="product_price"> within <p> class "instock availability" # as we have more than one <div> we use find("div",class_="product_price") # we use the .text method to extract the text as a string and use strip() to remove white spaces temp["stock"] = item.article.find("div", class_="product_price").find( "p", class_="instock availability").text.strip() # star rating is a word inside the <article> within a <p> with class "star-rating" # we use the word2number module to make words [One,Two,Three...] --> [1,2,3..] temp["rating"] = w2n.word_to_num(item.article.p.attrs["class"][1]) # store the dictionary in the list of books books.append(temp) # All above paths to the data was found by inspecting the source # Print all the dictionaries on the page with the details for i in books: pprint(i) print("-------------------------------------------")
result = str(eval(data2)) s.send(result + "\n") print result wordnum = False count = 0 i = 0 while 1: data = s.recv(BUFFER_SIZE) try: newdata = data.split(' ') num1 = newdata[0] + " " + newdata[1] int_num1 = w2n.word_to_num(num1) wordnum = True except ValueError: wordnum = False # do normal things pass newdata = data.split(" ") if (len(newdata[1]) < 2): if not data: break print(data) #s.close() data2 = data[0:10:1] data2 = data2[:-3]
def fix_age(matchobj): #print matchobj.group(0) return str(w2n.word_to_num(matchobj.group(0))) + " "
def join_time_description(amr): # 4 o'clock; 4 am; 4 a.m., etc. while True: span = None if len(amr.tokens) < 2: break for i in range(1, len(amr.tokens)): x, y = amr.tokens[i - 1: i + 1] if y.lower() in ("o'clock", 'am', 'a.m.', 'pm', 'p.m') and re.search(r'^\d+[.:]?\d*[.:]?\d*$', x): span = list(range(i - 1, i + 1)) joined_tokens = ''.join([x, y]) pos = 'CD' ner = 'TIME' break if y.lower() in ("o'clock", 'am', 'a.m.', 'pm', 'p.m') and x.isalpha(): try: x = w2n.word_to_num(x) except: continue x = str(x) span = list(range(i - 1, i + 1)) joined_tokens = ''.join([x, y]) pos = 'CD' ner = 'TIME' break if y == 'Greenwich' and i + 2 < len(amr.tokens) and amr.tokens[i + 1: i + 3] == ['Mean', 'Time']: span = list(range(i, i + 3)) joined_tokens = 'GMT' pos = 'NNP' ner = 'TIME' break if y in ('century', 'Century'): m = re.search(r'^(\d+)(st|nd|rd|th)?$', x) if m and m.group(1) != '': span = list(range(i - 1, i + 1)) joined_tokens = ''.join([m.group(1), y.lower()]) pos = 'CD' ner = 'TIME' break elif x == 'first' and amr.tokens[i - 2] == '-' and amr.tokens[i - 3] == 'twenty': span = list(range(i - 3, i + 1)) joined_tokens = '21century' pos = 'CD' ner = 'TIME' break elif x.lower() == 'eighth': span = list(range(i - 1, i + 1)) joined_tokens = '8century' pos = 'CD' ner = 'TIME' break elif x.lower() == 'fifth': span = list(range(i - 1, i + 1)) joined_tokens = '5century' pos = 'CD' ner = 'TIME' break else: try: x = w2n.word_to_num(x) except: continue span = list(range(i - 1, i + 1)) joined_tokens = ''.join([x, y.lower()]) pos = 'CD' ner = 'TIME' break else: break amr.replace_span(span, [joined_tokens], [pos], [ner])
import pandas as pd from sklearn import linear_model from word2number import w2n import math data = pd.read_csv('datasets/hiring.csv') data.experience[0] = 'zero' data.experience[1] = 'zero' a=0 for i in data.experience: data.experience[a] = w2n.word_to_num(i) a=a+1 data.rename(columns={'test_score(out of 10)':'test_score','interview_score(out of 10)':'interview_score','salary($)':'salary'},inplace=True) median = math.floor(data.test_score.median()) data.test_score.fillna(median,inplace=True) print(data) reg = linear_model.LinearRegression() reg.fit(data[['experience','test_score','interview_score']],data.salary) print(reg.coef_) print(reg.intercept_) print(reg.predict([[2,9,6]])) print(reg.predict([[12,10,10]]))
def update_boundary(): with open(os.path.join(data_dir, "bookcorpus", "clean_split.json"), 'r', encoding='utf-8') as infile: book_info_dict = json.load(infile) # check all books bookcorpus_dir = os.path.join(data_dir, "bookcorpus", "segment") counter = Counter() last_counter = Counter() book_info = [] for phase, book_info_list in book_info_dict.items(): for count, book_info in enumerate(book_info_list): book_name = book_info["book"] print("\x1b[2K\rRemoving Header in {}, {:>5} / {:>5} [{:.2f}%]". format(phase, count, len(book_info_list), 100.0 * count / len(book_info_list)), end="") if not os.path.isfile( os.path.join(data_dir, "bookcorpus", "frame", book_name)): continue with open(os.path.join(bookcorpus_dir, book_name), 'r', encoding='utf-8') as infile: lines = infile.read().split("\n") # find chapter one chapter_one = -1 for i, line in enumerate(lines): if line[:7].lower() == "chapter": tokens = word_tokenize(line) if len(tokens) < 2: continue # ignore table of content if sum([ 1 for token in tokens if token.lower() == "chapter" ]) > 1: continue # find chapter one try: chapter_num = w2n.word_to_num(tokens[1]) except ValueError: chapter_num = None if chapter_num == 1: counter.update([line]) chapter_one = i break # cannot find chapter 1 if chapter_one == -1: continue # remove the wired cases where chapter 1 come after more than 300 lines if chapter_one > 300: continue # find last chapter chapter_info = [] for i, line in enumerate(lines): if line[:7].lower() == "chapter": tokens = word_tokenize(line) if len(tokens) < 2: continue # ignore table of content if sum([ 1 for token in tokens if token.lower() == "chapter" ]) > 1: continue try: chapter_num = w2n.word_to_num(tokens[1]) except ValueError: chapter_num = None if chapter_num is not None: chapter_info.append([chapter_num, i, line]) if chapter_info[-1][0] > 1: start = chapter_one end = chapter_info[-1][1] last_counter.update([chapter_info[-1][2]]) book_info["start"] = start book_info["end"] = end # save data with open(os.path.join(data_dir, "bookcorpus", "clean_split_updated.json"), 'w', encoding='utf-8') as outfile: json.dump(book_info_dict, outfile, indent=2)
f = open("num.txt", "r") data = f.readlines() datanew = list(map(str.rstrip, data)) f.close() #If the phrase"and" is not located in the check if "AND" not in amtupper: amtupper += " AND ZERO" #Divides the dolalr amount and the cent amount dollar, cents = amtupper.split(" AND ") wordlst = dollar.split(' ') #Checks if the words in dollar is in the num.txt dictionary try: for word in wordlst: if word in datanew: wordnum += word + ' ' numamt = w2n.word_to_num(wordnum) except ValueError: #returns an "Un-Readable statement if none of the words are in the num.txt dictionary" amt += "(Un-Readable) " amtupper += "(Un-Readable) " numamt = 0 #turns the cents into a decimal if the cents are written as "##/100" try: num, den = cents.split('/') result = (float(num) / float(den)) numamt += result except ValueError: #turns the cents into a decimal if the cents are written as words try: centsplit = cents.split(' ') for word in centsplit:
# Main while True: # User input i = input('user : '******'break', 'end', 'quit']: break else: try: #case 0 : a Number entered try: o = (w2n.word_to_num(i)) except: try: #case 1 : Normal calculations (basic calculator) '''Easy way was to use eval, but using eval has great risks as it could be used to potentially crack your system''' o = simple_eval(i) except: try: # clean sentence words = [ k for k in word_tokenize(i) if k not in stop_words ] o = words for i in words: if i in [
from word2number import w2n ### importing csv file df = pd.read_csv('hiring.csv') print(df) ### cleaning the dataframe df['experience'].fillna(0, inplace=True) median = df['test_score(out of 10)'].median() df['test_score(out of 10)'].fillna(median, inplace=True) print(df) ### converting number words to numbers print() num_list = [] for i in df['experience'].iloc[2:]: i = w2n.word_to_num(i) num_list.append(i) df.loc[2:, ('experience')] = num_list print(df['experience'].iloc[2:]) print(df) #### creating linear regression model lreg = linear_model.LinearRegression() lreg.fit( df[['experience', 'test_score(out of 10)', 'interview_score(out of 10)']], df['salary($)']) print(lreg) ### printing the coefficients print(lreg.coef_)
def analysis(originalquery,finalquery): fromcity = None tocity = None par3 = (time.strftime("%Y-%m-%d")) par4 = None time_day = None time_relative = None time_nu = None time_period = None time_tom = None time_spec_month = None time_spec_date = None for i in range(0,len(finalquery) ): if finalquery[i] == 'B-fromloc.city_name': fromcity = originalquery[i] try: if finalquery[i+1] == 'I-fromloc.city_name': fromcity = fromcity + ' ' + originalquery[i+1] except IndexError: pass if finalquery[i] == 'B-toloc.city_name': tocity = originalquery[i] try: if finalquery[i+1] == 'I-toloc.city_name': tocity = tocity + ' ' + originalquery[i+1] except IndexError: pass #print fromcity,tocity if finalquery[i] =='B-depart_date.day_name': time_day = originalquery[i] if finalquery[i] == 'B-depart_time.time_relative': time_relative = originalquery[i] if finalquery[i] == 'B-depart_time.time': time_nu = originalquery[i] try: if finalquery[i+1] == 'I-depart_time.time': time_nu = time_nu + ' ' + originalquery[i+1] except IndexError: pass try: time_nu = time_nu + ' ' + originalquery[i+1] except IndexError: pass if finalquery[i] == 'B-arrive_time.period_of_day': time_period = originalquery[i] try: if finalquery[i+1] == 'I-arrive_time.period_of_day': time_period = time_period + ' ' + originalquery[i+1] except IndexError: pass try: time_period = time_period + ' ' + originalquery[i+1] except IndexError: pass if finalquery[i] =='B-depart_date.month_name': time_spec_month = originalquery[i] time_spec_month = month_converter(time_spec_month) if finalquery[i] == 'B-depart_date.day_number': time_spec_date = originalquery[i] time_spec_date = w2n.word_to_num(time_spec_date) if finalquery[i] == 'B-depart_date.today_relative': time_tom = originalquery[i] if time_tom is not None: if time_tom == 'tomorrow': date_tom = datetime.now()+ relativedelta(days=1) par3 = date_tom.strftime('%Y-%m-%d') if time_day is not None: x = 0 if time_day.lower() == 'monday': time_day = 0 elif time_day.lower() =='tuesday': time_day = 1 elif time_day.lower() =='wednesday': time_day = 2 elif time_day.lower() =='thursday': time_day = 3 elif time_day.lower() =='friday': time_day = 4 elif time_day.lower() =='saturday': time_day = 5 elif time_day.lower() =='sunday': time_day = 6 x = int(datetime.today().weekday()) - time_day date_after = datetime.now()+ relativedelta(days=int(x)) print date_after par3 = date_after.strftime('%Y-%m-%d') if time_spec_month is not None: if (str(time_spec_month))==1: time_spec_month = '0'+str(time_spec_month) par3 = '2016-'+str(time_spec_month)+'-'+str(time_spec_date) print par3 flight_data = makeparameters(fromcity,tocity,par3,par4) return flight_data,time_period,time_relative,time_nu,tocity