Пример #1
0
def search():
    word = ''
    result = {}
    if request.method == 'POST':
        word = request.form['word']
    elif request.method == 'GET':
        word = request.args['word']
    word = word.strip()
    if (word != ''):

        words = g.mongodb.words
        entries = words.find_one({"lemma": word})

        if (entries == None):
            suggest = correct(word)
            if (suggest == None):
                return render_template('notfound.html')
            else:
                result["suggest"] = suggest
        else:
            result['suggest'] = ''

        result['entries'] = entries

        return render_template('results.html', result=result)
    else:
        return render_template('search.html')
Пример #2
0
def search():
	word = ''
	result = {}
	if request.method == 'POST' :
		word = request.form['word']
	elif request.method == 'GET':
		word = request.args['word']
	word = word.strip()
	if(word!=''):
		
		words = g.mongodb.words
		entries = words.find_one({"lemma":word })
		
		if(entries==None):
			suggest=correct(word)
			if(suggest==None):
				return render_template('notfound.html')
			else:
				result["suggest"]=suggest
		else:
			result['suggest'] = ''

		result['entries']=entries		

		return render_template('results.html',result=result)
	else:
		return render_template('search.html')
Пример #3
0
def checkSpelling():
    error = None
    checkSpell = spellcheck.correct(request.form['word'])
    if checkSpell == request.form['word']:
        return "", 200
    else:
        return jsonify(checkSpell), 303
Пример #4
0
def queryImgSubmit(input):
    """Query the images stored in the database that match to the given input string.
    This function will only display the first 5 images based on relevance."""
    isSpelledCorrect = True
    search_words = input.split(" ")
    img_to_word = {}
    spelled_words = []
    for word in search_words:
        if word != "":
            spelled_words.append(correct(word))
        else:
            spelled_words.append("")
        imgs = get_images(word.lower())
        for img in imgs:
            if not img in img_to_word:
                img_to_word[img] = [word]
            elif not word in img_to_word[img]:
                img_to_word[img].append(word)

    img_relevance = []
    if spelled_words != list(search_words):
        isSpelledCorrect = False
    for img, words in img_to_word.items():
        img_relevance.append((img, len(words)))

    img_relevance.sort(key=lambda tup: tup[1], reverse=True)

    pg_tot = len(img_relevance)
    if 5 > pg_tot:
        u_ = pg_tot
    else:
        u_ = 5

    # place image-rank in a session variable for quick access
    session = request.environ.get('beaker.session')
    session['img_relevance'] = img_relevance
    session.save()

    user = None
    signin_state = "Sign in with Google+"
    link = "sign-in"
    if use_google_login:
        session = request.environ.get('beaker.session')
        user = session.get('user', None)
        if not user is None:
            signin_state = "Sign Out"
            link = "sign-out"

    return template('templates/img_results.tpl',
                    spellcheck=isSpelledCorrect,
                    corrected_search=" ".join(spelled_words),
                    correct_link="?image_keywords=" + "+".join(spelled_words),
                    queryInput=input,
                    user=user,
                    link=link,
                    signin_state=signin_state,
                    imgList=img_relevance[0:u_],
                    page=1,
                    pg_tot=pg_tot)
Пример #5
0
def correct_text(line):
	"""
	fixes text in lines - removes dashes from lyrics, corrects spelling
	"""
	# check if text output should be corrected or not
	if options.corrected:
		# fix strange problem where 'lu-' is read as 'hb'
		line['text']=replace(line['text'], 'hb', 'lu-')
		# remove dashes from text
		line['text']=replace(line['text'], '- ', '')
		line['text']=replace(line['text'], '-', '')
		# correct common spelling errors that the spell-checker cannot catch
		words=line['text'].split()
		words[0]=force_correct(words[0])
		# correct spelling if corrected output is not 's' (short words sometimes get corrected to 's' - weird)
		words=[correct(lower(word)) for word in words if correct(lower(word))!='s']
		return ' '.join(words)
	else:
		return line['text']
Пример #6
0
def queryVidSubmit(input):
    """Query the videos on youtube that match to the given input string.
    This function will only display the first 5 images based on relevance."""

    isSpelledCorrect = True
    spelled_words = []
    if input == '':
        vidList = []
    else:
        for word in input.split(" "):
            if word != "":
                spelled_words.append(correct(word))
            else:
                spelled_words.append("")
        vidList = youtube_search({'q': input, 'max_results': 40})

    # place videos in a session variable for quick access
    session = request.environ.get('beaker.session')
    session['vidList'] = vidList
    session.save()
    if spelled_words != list(input.split(" ")):
        isSpelledCorrect = False
    pg_tot = len(vidList)
    if 5 > pg_tot:
        u_ = pg_tot
    else:
        u_ = 5

    user = None
    signin_state = "Sign in with Google+"
    link = "sign-in"
    if use_google_login:
        session = request.environ.get('beaker.session')
        user = session.get('user', None)
        if not user is None:
            signin_state = "Sign Out"
            link = "sign-out"

    return template('templates/video_results.tpl',
                    spellcheck=isSpelledCorrect,
                    corrected_search=" ".join(spelled_words),
                    correct_link="?video_keywords=" + "+".join(spelled_words),
                    queryInput=input,
                    user=user,
                    link=link,
                    signin_state=signin_state,
                    vidList=vidList[0:u_],
                    page=1,
                    pg_tot=pg_tot)
Пример #7
0
def spelltest(tests, verbose=False, bias=None):
    import time
    n, bad, unknown, start = 0, 0, 0, time.clock()
    if bias:
        for target in tests: NWORDS[target] += bias
    for target,wrongs in tests.items():
        for wrong in wrongs.split():
            n += 1
            w = correct(wrong)
            if w!=target:
                bad += 1
                unknown += (target not in NWORDS)
                if verbose:
                    print ('%r => %r (%d); expected %r (%d)' % (wrong, w, NWORDS[w], target, NWORDS[target]))
    return dict(n=n, pct="{}%".format(int(100. - 100.*bad/n)), bad=bad, unknown=unknown, bias=bias, secs=int(time.clock()-start))
Пример #8
0
def signup():
    query = request.form['autocomplete']
    radio = request.form["algorithm"]
    print "radio", radio
    print("Query Term '" + query + "'")
    isCorrected = False
    new_query = []
    for q in query.lower().split(" "):
        corrected = correct(q)
        if corrected == q:
            continue
        else:
            isCorrected = True
        new_query.append(corrected)
    if isCorrected:
        new_query = " ".join(new_query)
        query = new_query
    print new_query
    if radio == "pagerank":
        r = json.loads(
            requests.get("http://localhost:8983/solr/myexample/select?q=" +
                         query + "&sort=pageRankFile%20desc").content)
    else:
        r = json.loads(
            requests.get("http://localhost:8983/solr/myexample/select?q=" +
                         query).content)
    r = r['response']
    print r
    result = []
    for doc in r['docs'][:10]:
        data = doc["id"]
        with open(data) as f:
            text = text_from_html(f.read())

        print text
        q_terms = [query] + query.split()
        text = " ".join(re.split("\s+", text))
        data = nltk.sent_tokenize(text)

        snippet = ""
        found = False
        for d in q_terms:
            for s in data:
                if d.lower() in s.lower():
                    # l = s.lower().find(d.lower)
                    snippet = s  #.lower().replace(d.lower(),"<b>"+d+"</b>")
                    found = True
                    break
            if found:
                break
        dicto = {}
        dicto['title'] = doc['title'][0]
        #dicto['description'] = doc['description']
        #dicto['id'] = doc['id']
        if snippet != "":
            # if l<20:
            #     start=0
            # start = l-20
            # end = l+140
            snippet = snippet[0:160]

        if "og_url" in doc:
            dicto['url'] = doc['og_url'][0]
        else:
            dicto['url'] = ""
        if snippet == "" and 'description' in doc:
            dicto['snippet'] = doc['description'][0]
        elif 'description' not in doc and snippet == "" or snippet != "":
            dicto['snippet'] = snippet
        result.append(dicto)

    return render_template('search.html',
                           new_query=new_query,
                           results=result,
                           display=True,
                           query=query,
                           display_new=isCorrected)
Пример #9
0
def chat():
  # keyword conditions
  condnext = False
  condweather = False
  condtime = False
  condlocation = False
  condtemp = False
  condkey = False
  condresponse = False
  foundinfo = False
  condtrain = False
  condcountry = False
  condspellcheck = True

  # global variables
  conversation = []
  location = ''
  prevlocation = location 
  time = 'today'
  key = ''
  keytemplate = []
  fulltime = ''
  numdays = ''
  logstr = ''
  printstr = ''
  responsedict = {} 	# Dictionary to hold all inputs without predefined responses. This dictionary will be written into predefined_responses.txt before exiting the program.


  # read data files
  citylist = readfile.readfile('cities.txt')
  keylist = readfile.readfile('keywords.txt')
  timelist = readfile.readfile('time.txt')
  condlist = readfile.readfile('conditions.txt')
  numlist = readfile.readfile('numbers.txt')
  countrylist = readfile.readfile('countries.txt')
  exitlist = ['exit', 'quit', 'bye', 'ok']

  # Greeting message
  printstr =  'Hello! You can ask me questions about the weather in any major city in the world. What would you like to know?'
  print printstr
  logstr += '\n\n' + printstr

  # Start main loop
  while True :
    foundinfo = False
    condtrain = False
    condcountry = False
    # read input from user
    input = raw_input('\nMe > ')
    logstr += '\nMe > ' + input + '\nBot > '
    if input in exitlist:
      if input == 'ok':
	exitans = raw_input("Do you want to quit? (y/n)")
	if exitans in ('y','Y','Yes','YES','yes'):
	  break
	else:
	  continue
      break
    
    if input == 'disable spellcheck':
      condspellcheck = False
      continue
    
    if input == 'enable spellcheck':
      condspellcheck = True
      continue
    
    condcorrected = False
    if condspellcheck:
      corrected_input = ''
      for i in input.split():
	str = spellcheck.correct(i)
	if str != i:
	  condcorrected = True
	corrected_input += str + ' '
      if condcorrected:
	print 'did you mean: \"' + corrected_input + '\"?'
	input = corrected_input
    
    currentstring = input.split()
    conversation.append(currentstring)
    
    # Start searching input for each of the keywords
    
    if input == 'train':
      condtrain = True
      printstr =  'Entering training mode. Enter input and response seperated by a "|": input|response. Type "exit" to quit training mode'
      print printstr
      logstr += '\n' + printstr + '\n'
      
      while True:
	traininput = raw_input('>')
	if traininput == 'exit':
	  break
	if traininput.find('|') < 0:
	  printstr =  'Format error: use input|response'
	  print printstr
	  logstr += '\n' + printstr + '\n'
	  continue
	traininput = traininput.split('|')
	responsedict[traininput[0]] = traininput[1]
    
    if condtrain:
      continue
    


    for i in countrylist:
      for j in currentstring:
	if lower(i[0]) == lower(j):
	  printstr = 'Which city in ' + i[0] + '?'
	  condcountry = True
	  foundinfo = True
	  break
      
    if condcountry:
      print printstr
      logstr += printstr
      continue
    

    if 'next' in input:
      foundinfo = True
      condnext = True
      condtime = False
      numdays = currentstring[currentstring.index('next') + 1]
      for i in numlist:
	if numdays == i[0]:
	  numdays = i[1]
	  break
      if re.match('[0-9]*$',numdays):
	numdays = int(numdays)
      else:
	numdays = ''
    
    if 'weather' in input:
      foundinfo = True
      condweather = True
      condkey = False
      condtemp = False
      key = ''
      keytemplate = []

    # get key from input
    for i in keylist:
      if i[0] in input:
	if 'sunday' in lower(input) and i[0] == 'sun':
	  break
	else:
	  foundinfo = True
	  condkey = True
	  condweather = False
	  condtemp = False
	  key = i[0]
	  keytemplate = i
	  break

    # get time from input
      for i in timelist:
	if lower(i[0]) in input:
	  foundinfo = True
	  condtime = True
	  numdays = ''
	  if lower(i[0]) != 'today' and lower(i[0]) != 'tomorrow':
	    time = i[1]
	    fulltime = i[0]
	    break
	  else:
	    time = i[0]
	    fulltime = time
	    break
    if fulltime == '':
      fulltime = time

    if numdays != '':
      condtime = True
      if numdays > 4:
	printstr =  'Forecast is available only for the next 4 days.'
	print printstr
	logstr += '\n' + printstr + '\n'
      else:
	time = ''
	fulltime = ''
	count = numdays
    
    # get location from input
    for i in citylist:
      if lower(i[0]) in input:
	foundinfo = True
	condlocation = True
	location = i[0]
	break
    
    # find if a new location has been mentioned. if not, don't fetch data again
    if location != prevlocation:
      newlocation = True
      condlocation = True
      prevlocation = location
    else:
      newlocation = False
    
    if location is '':
      if prevlocation is '':
	condlocation = False
      else:
	location = prevlocation
	newlocation = False
    
    location = location.replace(' ','-') #Google requires a '-' in 2-word city names
    result = False
    
    # get temperature from input
    if 'temperature' in input:
      foundinfo = True
      condtemp = True

    # User gave no infomation about weather. Switching to general predefined response based chat
    if not foundinfo:
      response = predefined_responses.respond(input, responsedict)
      if response == '':
	printstr =  "I don't know what that means. If I asked you the same question, what would you reply?"
	print printstr
	logstr += printstr
	responseinput = raw_input('Me > ')
	logstr += '\nMe > ' + responseinput
	if not responseinput in ('exit', 'quit'):
	  responsedict[input] = responseinput
	  print 'response learnt'
      else:
	printstr =  response
	print printstr
	logstr += printstr
      continue
    
    if condlocation:
      if newlocation:	#If location hasn't changed, don't fetch data again. It's already available
	printstr =  'Fetching weather information from Google...'
	print printstr
	logstr += printstr
	# Call Google weather to get current weather conditions
	google_result = weather.get_weather(location)
	if google_result == {}:
	  print 'Could not get data from google.'
	  continue
      
      
  # We have a valid location. Get further information

  # User has asked about temperature. Return temperature information and continue
      if condtemp:
	printstr =  temperature.temperature(google_result, time)
	print printstr
	logstr += printstr
	continue
      
  # User has asked about a specific weather condition. Print information. There are 2 possibilities:
  #    1. Find the condition in the next n days
  #    2. Find the condition in a specified day

      if condkey:

  # 1. User has asked about a specific condition in the 'next x days'. Return appropriate response
	printstr = ''
	timecounter = 0

	day_of_week = ''
	condition = ''
	if numdays != '':
	  for i in google_result['forecasts']:
	    count -= 1
	    if count < 0:
	      break
	    if key in lower(i['condition']):
	      result = True
	      day_of_week = i['day_of_week']
	      condition = i['condition']
	      break

	  for i in timelist:
	    if i[0] != 'today' and i[0] != 'tomorrow':
	      if i[1] == day_of_week:
		fulltime = i[0]
		break
	  if result:
	    printstr = keytemplate[3] + keytemplate[0] + ' on ' + fulltime
	  else:
	    printstr = keytemplate[4] + keytemplate[0] + ' in the next ' + str(numdays) + ' days.'

	  print printstr
	  logstr += printstr
	  continue

  # 2. User has asked about a particular condition on a particular day. Return appropriate response
	if time != 'today' and time != 'tomorrow':
	  for i in google_result['forecasts']:
	    if i['day_of_week'] == time:
	      if key in lower(i['condition']):
		printstr = keytemplate[3] + keytemplate[0] + ' on'
	      else:
		printstr = keytemplate[4] + keytemplate[0] + ' on'
	elif time == 'today':
	  fulltime = time
	  if key in lower(google_result['current_conditions']['condition']):
	    printstr = keytemplate[1] + keytemplate[0]
	  else:
	    printstr = keytemplate[2] + keytemplate[0]
	elif time == 'tomorrow':
	  fulltime = time
	  if key in lower(google_result['forecasts'][1]['condition']):
	    printstr = keytemplate[3] + keytemplate[0]
	  else:
	    printstr = keytemplate[4] + keytemplate[0]

	printstr =  printstr + ' ' + fulltime
	print printstr
	logstr += printstr
	continue

  # User is asking about today's weather. Print details
      elif time == '' or time == 'today' :
	printstr = sentence.sentence(google_result['current_conditions']['condition'], time)
	printstr += ' ' + fulltime + '. ' + google_result['current_conditions']['humidity'] + ' '
	if google_result['current_conditions'].has_key('wind_condition'):
	  printstr += google_result['current_conditions']['wind_condition']
	print printstr
	logstr += printstr
	continue

  # User is asking about weather of a particular day. Print details
      elif time == 'tomorrow':
	printstr = sentence.sentence(google_result['forecasts'][1]['condition'], time)
	printstr += ' ' + fulltime
	print printstr
	logstr += printstr
      else:
	found = False
	for i in range(4):
	  if google_result['forecasts'][i]['day_of_week'] == time:
	    printstr = sentence.sentence(google_result['forecasts'][i]['condition'], time)
	    printstr +=   " on" + ' ' +  fulltime
	    print printstr
	    logstr += printstr
	    found = True
	if not found:
	  printstr =  "Forecast for " + time + " is not available currently."
	  print printstr
	  logstr += printstr
	continue
      
    else:
      printstr =  'What\'s the location?'
      print printstr
      logstr += printstr
  # End of outermost while loop.

  # Print message before exiting program
  dictcount = 0
  for i in responsedict:
    dictcount += 1
  if dictcount > 0:
    printstr =  'Writing new entries to database...'
    print printstr
    logstr += printstr
  datafile = file('predefined_responses.txt', 'a')
  for i in responsedict.keys():
    trimmedi = re.sub('[^a-zA-Z0-9 ]+','', i)
    string = trimmedi + '|' + responsedict[i] + '\n'
    datafile.write(string)
  log.log(logstr)
  print 'Ending the program...'
  print 'Bye!'
  
# End of function chat()
Пример #10
0
def querySubmit(input, math=None):
    """"Retrieves the links to pages that contain the input string.
    This function will only display the first 5 entries based on page rank
    and relevance (higher the number words that match to the given link,
    the higher the relevance), the rest will be paginated."""
    isSpelledCorrect = True
    search_words = input.split(" ")
    spelled_words = []
    sites_matched = []
    url_to_word = {}
    check_None = lambda a, b: b in a and a[b] or ""

    for word in search_words:
        if word != "":
            spelled_words.append(correct(word))
        else:
            spelled_words.append("")
        sites_info = get_site_info(word)
        sites_matched += sites_info
        for site in sites_info:
            if not site['Url'] in url_to_word:
                url_to_word[site['Url']] = [word]
            elif not word in url_to_word[site['Url']]:
                url_to_word[site['Url']].append(word)

    # Rank the sites
    site_rank = [(check_None(s_info, 'Url'), check_None(s_info,'PageRank') or '', \
                  check_None(s_info, 'Title') or '', check_None(s_info, 'Description'))\
                 for s_info in sites_matched]
    if spelled_words != list(search_words):
        isSpelledCorrect = False

    # Sort urls based on rank
    site_rank.sort(key=lambda tup: tup[1], reverse=True)

    # Calculate the relevance of sites based on searched words
    site_relevance = []
    for site in site_rank:
        site_relevance.append(
            (site[0], len(url_to_word[site[0]]), site[2], site[3]))

    site_relevance.sort(key=lambda tup: tup[1], reverse=True)

    pg_tot = len(site_relevance)
    if 5 > pg_tot:
        u_ = pg_tot
    else:
        u_ = 5

    #place urls-rank in a session variable for quick access
    session = request.environ.get('beaker.session')
    session['site_relevance'] = site_relevance
    session.save()

    user = None
    signin_state = "Sign in with Google+"
    link = "sign-in"
    if use_google_login:
        session = request.environ.get('beaker.session')
        user = session.get('user', None)
        if not user is None:
            signin_state = "Sign Out"
            link = "sign-out"

        return template('templates/results.tpl',
                        spellcheck=isSpelledCorrect,
                        corrected_search=" ".join(spelled_words),
                        correct_link="?keywords=" + "+".join(spelled_words),
                        queryInput=input,
                        user=user,
                        link=link,
                        signin_state=signin_state,
                        siteList=site_relevance[0:u_],
                        page=1,
                        pg_tot=pg_tot,
                        math=math)
Пример #11
0
def spellcheck(name = " " ):
    return {"candidates":correct(name)}
Пример #12
0
def localDictSearch(words):

	for count,word_old in enumerate(words):

		word = removePunctuation(word_old)

		if word=="": #As this is the first step of the filter we have to check for words with no letters

			CURSOR.execute('select data from slang where key=?', (word_old[1],)) #Checks for emoticons

			for i in CURSOR:
				words[count][1]=i[0] #Set the word to be the first match in dictionary
				break;

			words[count][0] = 0 #Guarantees that letterless words/emoticons are ignored in future except for punctuation
			continue;

		elif word[0]==2:

			found=False #Used to tell if word was found in dictionary

			CURSOR.execute('select data from dictionary where key=?', (word[1],))

			for i in CURSOR:

				found=True

				if i[0].islower(): #Dictionary returns proper nouns that match the word
					words[count][0] = 0 #Not a proper noun
				else:
					words[count][0] = 1 #Proper noun

				break;

			if not found: #First check if it is an abbreviation/internet slang

				CURSOR.execute('select data from slang where key=?', (word[1],))

				for i in CURSOR:

					found=True

					punc=words[count][1].partition(word[1]) #Add punctuation to new word
					words[count][1] = punc[0]+i[0]+punc[2] #Replace word with translation
					words[count][0] = 0 #Guarantees that it is ignored in future except for punctuation

					break;

			if not found: #Second spellcheck the word

				word_new = spellcheck.correct(word[1])

				CURSOR.execute('select data from dictionary where key=?', (word_new,))

				for i in CURSOR:

					if i[0].islower(): #Dictionary returns proper nouns that match the word
						words[count][0] = 0 #Not a proper noun
					else:
						words[count][0] = 1 #Proper noun

					punc=words[count][1].partition(word[1]) #Add punctuation to new word
					words[count][1] = punc[0]+word_new+punc[2] #"

					break;
Пример #13
0
from spellcheck import correct
lrs = 'speling is cool'
lss = correct(lrs)

dict = {}
list = []
newlist = []

words = lrs.split()

for word in words:
    list.append(word)

for item in list:
    corr = correct(item)

    newlist.append(corr)

print lrs
print ' '.join(newlist)




Пример #14
0
def evaluate(event):
    res.configure(text="Did you mean:\n" + str(correct(entry.get())))
Пример #15
0
def spellcheck(name = " " ):
    return {"candidates":correct(name)}