def search(): word = '' result = {} if request.method == 'POST': word = request.form['word'] elif request.method == 'GET': word = request.args['word'] word = word.strip() if (word != ''): words = g.mongodb.words entries = words.find_one({"lemma": word}) if (entries == None): suggest = correct(word) if (suggest == None): return render_template('notfound.html') else: result["suggest"] = suggest else: result['suggest'] = '' result['entries'] = entries return render_template('results.html', result=result) else: return render_template('search.html')
def search(): word = '' result = {} if request.method == 'POST' : word = request.form['word'] elif request.method == 'GET': word = request.args['word'] word = word.strip() if(word!=''): words = g.mongodb.words entries = words.find_one({"lemma":word }) if(entries==None): suggest=correct(word) if(suggest==None): return render_template('notfound.html') else: result["suggest"]=suggest else: result['suggest'] = '' result['entries']=entries return render_template('results.html',result=result) else: return render_template('search.html')
def checkSpelling(): error = None checkSpell = spellcheck.correct(request.form['word']) if checkSpell == request.form['word']: return "", 200 else: return jsonify(checkSpell), 303
def queryImgSubmit(input): """Query the images stored in the database that match to the given input string. This function will only display the first 5 images based on relevance.""" isSpelledCorrect = True search_words = input.split(" ") img_to_word = {} spelled_words = [] for word in search_words: if word != "": spelled_words.append(correct(word)) else: spelled_words.append("") imgs = get_images(word.lower()) for img in imgs: if not img in img_to_word: img_to_word[img] = [word] elif not word in img_to_word[img]: img_to_word[img].append(word) img_relevance = [] if spelled_words != list(search_words): isSpelledCorrect = False for img, words in img_to_word.items(): img_relevance.append((img, len(words))) img_relevance.sort(key=lambda tup: tup[1], reverse=True) pg_tot = len(img_relevance) if 5 > pg_tot: u_ = pg_tot else: u_ = 5 # place image-rank in a session variable for quick access session = request.environ.get('beaker.session') session['img_relevance'] = img_relevance session.save() user = None signin_state = "Sign in with Google+" link = "sign-in" if use_google_login: session = request.environ.get('beaker.session') user = session.get('user', None) if not user is None: signin_state = "Sign Out" link = "sign-out" return template('templates/img_results.tpl', spellcheck=isSpelledCorrect, corrected_search=" ".join(spelled_words), correct_link="?image_keywords=" + "+".join(spelled_words), queryInput=input, user=user, link=link, signin_state=signin_state, imgList=img_relevance[0:u_], page=1, pg_tot=pg_tot)
def correct_text(line): """ fixes text in lines - removes dashes from lyrics, corrects spelling """ # check if text output should be corrected or not if options.corrected: # fix strange problem where 'lu-' is read as 'hb' line['text']=replace(line['text'], 'hb', 'lu-') # remove dashes from text line['text']=replace(line['text'], '- ', '') line['text']=replace(line['text'], '-', '') # correct common spelling errors that the spell-checker cannot catch words=line['text'].split() words[0]=force_correct(words[0]) # correct spelling if corrected output is not 's' (short words sometimes get corrected to 's' - weird) words=[correct(lower(word)) for word in words if correct(lower(word))!='s'] return ' '.join(words) else: return line['text']
def queryVidSubmit(input): """Query the videos on youtube that match to the given input string. This function will only display the first 5 images based on relevance.""" isSpelledCorrect = True spelled_words = [] if input == '': vidList = [] else: for word in input.split(" "): if word != "": spelled_words.append(correct(word)) else: spelled_words.append("") vidList = youtube_search({'q': input, 'max_results': 40}) # place videos in a session variable for quick access session = request.environ.get('beaker.session') session['vidList'] = vidList session.save() if spelled_words != list(input.split(" ")): isSpelledCorrect = False pg_tot = len(vidList) if 5 > pg_tot: u_ = pg_tot else: u_ = 5 user = None signin_state = "Sign in with Google+" link = "sign-in" if use_google_login: session = request.environ.get('beaker.session') user = session.get('user', None) if not user is None: signin_state = "Sign Out" link = "sign-out" return template('templates/video_results.tpl', spellcheck=isSpelledCorrect, corrected_search=" ".join(spelled_words), correct_link="?video_keywords=" + "+".join(spelled_words), queryInput=input, user=user, link=link, signin_state=signin_state, vidList=vidList[0:u_], page=1, pg_tot=pg_tot)
def spelltest(tests, verbose=False, bias=None): import time n, bad, unknown, start = 0, 0, 0, time.clock() if bias: for target in tests: NWORDS[target] += bias for target,wrongs in tests.items(): for wrong in wrongs.split(): n += 1 w = correct(wrong) if w!=target: bad += 1 unknown += (target not in NWORDS) if verbose: print ('%r => %r (%d); expected %r (%d)' % (wrong, w, NWORDS[w], target, NWORDS[target])) return dict(n=n, pct="{}%".format(int(100. - 100.*bad/n)), bad=bad, unknown=unknown, bias=bias, secs=int(time.clock()-start))
def signup(): query = request.form['autocomplete'] radio = request.form["algorithm"] print "radio", radio print("Query Term '" + query + "'") isCorrected = False new_query = [] for q in query.lower().split(" "): corrected = correct(q) if corrected == q: continue else: isCorrected = True new_query.append(corrected) if isCorrected: new_query = " ".join(new_query) query = new_query print new_query if radio == "pagerank": r = json.loads( requests.get("http://localhost:8983/solr/myexample/select?q=" + query + "&sort=pageRankFile%20desc").content) else: r = json.loads( requests.get("http://localhost:8983/solr/myexample/select?q=" + query).content) r = r['response'] print r result = [] for doc in r['docs'][:10]: data = doc["id"] with open(data) as f: text = text_from_html(f.read()) print text q_terms = [query] + query.split() text = " ".join(re.split("\s+", text)) data = nltk.sent_tokenize(text) snippet = "" found = False for d in q_terms: for s in data: if d.lower() in s.lower(): # l = s.lower().find(d.lower) snippet = s #.lower().replace(d.lower(),"<b>"+d+"</b>") found = True break if found: break dicto = {} dicto['title'] = doc['title'][0] #dicto['description'] = doc['description'] #dicto['id'] = doc['id'] if snippet != "": # if l<20: # start=0 # start = l-20 # end = l+140 snippet = snippet[0:160] if "og_url" in doc: dicto['url'] = doc['og_url'][0] else: dicto['url'] = "" if snippet == "" and 'description' in doc: dicto['snippet'] = doc['description'][0] elif 'description' not in doc and snippet == "" or snippet != "": dicto['snippet'] = snippet result.append(dicto) return render_template('search.html', new_query=new_query, results=result, display=True, query=query, display_new=isCorrected)
def chat(): # keyword conditions condnext = False condweather = False condtime = False condlocation = False condtemp = False condkey = False condresponse = False foundinfo = False condtrain = False condcountry = False condspellcheck = True # global variables conversation = [] location = '' prevlocation = location time = 'today' key = '' keytemplate = [] fulltime = '' numdays = '' logstr = '' printstr = '' responsedict = {} # Dictionary to hold all inputs without predefined responses. This dictionary will be written into predefined_responses.txt before exiting the program. # read data files citylist = readfile.readfile('cities.txt') keylist = readfile.readfile('keywords.txt') timelist = readfile.readfile('time.txt') condlist = readfile.readfile('conditions.txt') numlist = readfile.readfile('numbers.txt') countrylist = readfile.readfile('countries.txt') exitlist = ['exit', 'quit', 'bye', 'ok'] # Greeting message printstr = 'Hello! You can ask me questions about the weather in any major city in the world. What would you like to know?' print printstr logstr += '\n\n' + printstr # Start main loop while True : foundinfo = False condtrain = False condcountry = False # read input from user input = raw_input('\nMe > ') logstr += '\nMe > ' + input + '\nBot > ' if input in exitlist: if input == 'ok': exitans = raw_input("Do you want to quit? (y/n)") if exitans in ('y','Y','Yes','YES','yes'): break else: continue break if input == 'disable spellcheck': condspellcheck = False continue if input == 'enable spellcheck': condspellcheck = True continue condcorrected = False if condspellcheck: corrected_input = '' for i in input.split(): str = spellcheck.correct(i) if str != i: condcorrected = True corrected_input += str + ' ' if condcorrected: print 'did you mean: \"' + corrected_input + '\"?' input = corrected_input currentstring = input.split() conversation.append(currentstring) # Start searching input for each of the keywords if input == 'train': condtrain = True printstr = 'Entering training mode. Enter input and response seperated by a "|": input|response. Type "exit" to quit training mode' print printstr logstr += '\n' + printstr + '\n' while True: traininput = raw_input('>') if traininput == 'exit': break if traininput.find('|') < 0: printstr = 'Format error: use input|response' print printstr logstr += '\n' + printstr + '\n' continue traininput = traininput.split('|') responsedict[traininput[0]] = traininput[1] if condtrain: continue for i in countrylist: for j in currentstring: if lower(i[0]) == lower(j): printstr = 'Which city in ' + i[0] + '?' condcountry = True foundinfo = True break if condcountry: print printstr logstr += printstr continue if 'next' in input: foundinfo = True condnext = True condtime = False numdays = currentstring[currentstring.index('next') + 1] for i in numlist: if numdays == i[0]: numdays = i[1] break if re.match('[0-9]*$',numdays): numdays = int(numdays) else: numdays = '' if 'weather' in input: foundinfo = True condweather = True condkey = False condtemp = False key = '' keytemplate = [] # get key from input for i in keylist: if i[0] in input: if 'sunday' in lower(input) and i[0] == 'sun': break else: foundinfo = True condkey = True condweather = False condtemp = False key = i[0] keytemplate = i break # get time from input for i in timelist: if lower(i[0]) in input: foundinfo = True condtime = True numdays = '' if lower(i[0]) != 'today' and lower(i[0]) != 'tomorrow': time = i[1] fulltime = i[0] break else: time = i[0] fulltime = time break if fulltime == '': fulltime = time if numdays != '': condtime = True if numdays > 4: printstr = 'Forecast is available only for the next 4 days.' print printstr logstr += '\n' + printstr + '\n' else: time = '' fulltime = '' count = numdays # get location from input for i in citylist: if lower(i[0]) in input: foundinfo = True condlocation = True location = i[0] break # find if a new location has been mentioned. if not, don't fetch data again if location != prevlocation: newlocation = True condlocation = True prevlocation = location else: newlocation = False if location is '': if prevlocation is '': condlocation = False else: location = prevlocation newlocation = False location = location.replace(' ','-') #Google requires a '-' in 2-word city names result = False # get temperature from input if 'temperature' in input: foundinfo = True condtemp = True # User gave no infomation about weather. Switching to general predefined response based chat if not foundinfo: response = predefined_responses.respond(input, responsedict) if response == '': printstr = "I don't know what that means. If I asked you the same question, what would you reply?" print printstr logstr += printstr responseinput = raw_input('Me > ') logstr += '\nMe > ' + responseinput if not responseinput in ('exit', 'quit'): responsedict[input] = responseinput print 'response learnt' else: printstr = response print printstr logstr += printstr continue if condlocation: if newlocation: #If location hasn't changed, don't fetch data again. It's already available printstr = 'Fetching weather information from Google...' print printstr logstr += printstr # Call Google weather to get current weather conditions google_result = weather.get_weather(location) if google_result == {}: print 'Could not get data from google.' continue # We have a valid location. Get further information # User has asked about temperature. Return temperature information and continue if condtemp: printstr = temperature.temperature(google_result, time) print printstr logstr += printstr continue # User has asked about a specific weather condition. Print information. There are 2 possibilities: # 1. Find the condition in the next n days # 2. Find the condition in a specified day if condkey: # 1. User has asked about a specific condition in the 'next x days'. Return appropriate response printstr = '' timecounter = 0 day_of_week = '' condition = '' if numdays != '': for i in google_result['forecasts']: count -= 1 if count < 0: break if key in lower(i['condition']): result = True day_of_week = i['day_of_week'] condition = i['condition'] break for i in timelist: if i[0] != 'today' and i[0] != 'tomorrow': if i[1] == day_of_week: fulltime = i[0] break if result: printstr = keytemplate[3] + keytemplate[0] + ' on ' + fulltime else: printstr = keytemplate[4] + keytemplate[0] + ' in the next ' + str(numdays) + ' days.' print printstr logstr += printstr continue # 2. User has asked about a particular condition on a particular day. Return appropriate response if time != 'today' and time != 'tomorrow': for i in google_result['forecasts']: if i['day_of_week'] == time: if key in lower(i['condition']): printstr = keytemplate[3] + keytemplate[0] + ' on' else: printstr = keytemplate[4] + keytemplate[0] + ' on' elif time == 'today': fulltime = time if key in lower(google_result['current_conditions']['condition']): printstr = keytemplate[1] + keytemplate[0] else: printstr = keytemplate[2] + keytemplate[0] elif time == 'tomorrow': fulltime = time if key in lower(google_result['forecasts'][1]['condition']): printstr = keytemplate[3] + keytemplate[0] else: printstr = keytemplate[4] + keytemplate[0] printstr = printstr + ' ' + fulltime print printstr logstr += printstr continue # User is asking about today's weather. Print details elif time == '' or time == 'today' : printstr = sentence.sentence(google_result['current_conditions']['condition'], time) printstr += ' ' + fulltime + '. ' + google_result['current_conditions']['humidity'] + ' ' if google_result['current_conditions'].has_key('wind_condition'): printstr += google_result['current_conditions']['wind_condition'] print printstr logstr += printstr continue # User is asking about weather of a particular day. Print details elif time == 'tomorrow': printstr = sentence.sentence(google_result['forecasts'][1]['condition'], time) printstr += ' ' + fulltime print printstr logstr += printstr else: found = False for i in range(4): if google_result['forecasts'][i]['day_of_week'] == time: printstr = sentence.sentence(google_result['forecasts'][i]['condition'], time) printstr += " on" + ' ' + fulltime print printstr logstr += printstr found = True if not found: printstr = "Forecast for " + time + " is not available currently." print printstr logstr += printstr continue else: printstr = 'What\'s the location?' print printstr logstr += printstr # End of outermost while loop. # Print message before exiting program dictcount = 0 for i in responsedict: dictcount += 1 if dictcount > 0: printstr = 'Writing new entries to database...' print printstr logstr += printstr datafile = file('predefined_responses.txt', 'a') for i in responsedict.keys(): trimmedi = re.sub('[^a-zA-Z0-9 ]+','', i) string = trimmedi + '|' + responsedict[i] + '\n' datafile.write(string) log.log(logstr) print 'Ending the program...' print 'Bye!' # End of function chat()
def querySubmit(input, math=None): """"Retrieves the links to pages that contain the input string. This function will only display the first 5 entries based on page rank and relevance (higher the number words that match to the given link, the higher the relevance), the rest will be paginated.""" isSpelledCorrect = True search_words = input.split(" ") spelled_words = [] sites_matched = [] url_to_word = {} check_None = lambda a, b: b in a and a[b] or "" for word in search_words: if word != "": spelled_words.append(correct(word)) else: spelled_words.append("") sites_info = get_site_info(word) sites_matched += sites_info for site in sites_info: if not site['Url'] in url_to_word: url_to_word[site['Url']] = [word] elif not word in url_to_word[site['Url']]: url_to_word[site['Url']].append(word) # Rank the sites site_rank = [(check_None(s_info, 'Url'), check_None(s_info,'PageRank') or '', \ check_None(s_info, 'Title') or '', check_None(s_info, 'Description'))\ for s_info in sites_matched] if spelled_words != list(search_words): isSpelledCorrect = False # Sort urls based on rank site_rank.sort(key=lambda tup: tup[1], reverse=True) # Calculate the relevance of sites based on searched words site_relevance = [] for site in site_rank: site_relevance.append( (site[0], len(url_to_word[site[0]]), site[2], site[3])) site_relevance.sort(key=lambda tup: tup[1], reverse=True) pg_tot = len(site_relevance) if 5 > pg_tot: u_ = pg_tot else: u_ = 5 #place urls-rank in a session variable for quick access session = request.environ.get('beaker.session') session['site_relevance'] = site_relevance session.save() user = None signin_state = "Sign in with Google+" link = "sign-in" if use_google_login: session = request.environ.get('beaker.session') user = session.get('user', None) if not user is None: signin_state = "Sign Out" link = "sign-out" return template('templates/results.tpl', spellcheck=isSpelledCorrect, corrected_search=" ".join(spelled_words), correct_link="?keywords=" + "+".join(spelled_words), queryInput=input, user=user, link=link, signin_state=signin_state, siteList=site_relevance[0:u_], page=1, pg_tot=pg_tot, math=math)
def spellcheck(name = " " ): return {"candidates":correct(name)}
def localDictSearch(words): for count,word_old in enumerate(words): word = removePunctuation(word_old) if word=="": #As this is the first step of the filter we have to check for words with no letters CURSOR.execute('select data from slang where key=?', (word_old[1],)) #Checks for emoticons for i in CURSOR: words[count][1]=i[0] #Set the word to be the first match in dictionary break; words[count][0] = 0 #Guarantees that letterless words/emoticons are ignored in future except for punctuation continue; elif word[0]==2: found=False #Used to tell if word was found in dictionary CURSOR.execute('select data from dictionary where key=?', (word[1],)) for i in CURSOR: found=True if i[0].islower(): #Dictionary returns proper nouns that match the word words[count][0] = 0 #Not a proper noun else: words[count][0] = 1 #Proper noun break; if not found: #First check if it is an abbreviation/internet slang CURSOR.execute('select data from slang where key=?', (word[1],)) for i in CURSOR: found=True punc=words[count][1].partition(word[1]) #Add punctuation to new word words[count][1] = punc[0]+i[0]+punc[2] #Replace word with translation words[count][0] = 0 #Guarantees that it is ignored in future except for punctuation break; if not found: #Second spellcheck the word word_new = spellcheck.correct(word[1]) CURSOR.execute('select data from dictionary where key=?', (word_new,)) for i in CURSOR: if i[0].islower(): #Dictionary returns proper nouns that match the word words[count][0] = 0 #Not a proper noun else: words[count][0] = 1 #Proper noun punc=words[count][1].partition(word[1]) #Add punctuation to new word words[count][1] = punc[0]+word_new+punc[2] #" break;
from spellcheck import correct lrs = 'speling is cool' lss = correct(lrs) dict = {} list = [] newlist = [] words = lrs.split() for word in words: list.append(word) for item in list: corr = correct(item) newlist.append(corr) print lrs print ' '.join(newlist)
def evaluate(event): res.configure(text="Did you mean:\n" + str(correct(entry.get())))