Пример #1
0
    def __preprocess_search_query(self, query):
        # create tokens
        consumed = AnalyzeContent(text_weights=[(query, 1)])
        search_tokens = consumed.content_token_list()

        for i, token in enumerate(search_tokens):
            # attempt to spellcheck invalid tokens
            if len(token) >= 3:
                if self._spellcheck.unknown([token]):
                    search_tokens[i] = self._spellcheck.correction(token)
                    # print('corrected ' + token + ' to ' + search_tokens[i])
                    token = search_tokens[i]

            if i > 0 and autocomplete.predict_currword(search_tokens[i - 1]):
                prev = search_tokens[i - 1]
                pred = autocomplete.predict(prev, token)
            else:
                pred = autocomplete.predict_currword(token)

            if len(pred) > 0:
                best_pred = pred[0][0]
            else:
                best_pred = token

            search_tokens[i] = self._lemmatizer.lemmatize(best_pred)

        # remove stopwords if we have non-stopword terms. otherwise search purely by stopwords as a last case.
        non_stopwords = [x for x in search_tokens if x not in self._stopwords]
        if non_stopwords:
            search_tokens = non_stopwords

        return search_tokens
Пример #2
0
def predict(statement=[]):
    sentence = "".join(statement).split(" ")
    choices = []

    one = None
    two = None
    if len(sentence) > 1:
        length = len(
            autocomplete.predict_currword_given_lastword(
                sentence[-2], sentence[-1]))
        if length >= 1:
            one = autocomplete.predict_currword_given_lastword(
                sentence[-2], sentence[-1])[0]
        if length >= 2:
            two = autocomplete.predict_currword_given_lastword(
                sentence[-2], sentence[-1])[1]
    else:
        length = len(autocomplete.predict_currword(sentence[-1]))
        if length >= 1: one = autocomplete.predict_currword(sentence[-1])[0]
        if length >= 2: two = autocomplete.predict_currword(sentence[-1])[1]

    if one is not None: one = extract(one)
    if two is not None: two = extract(two)

    if one is not None: choices.append(one)
    else: choices.append("N/A")
    choices.append(sentence[-1])
    if two is not None: choices.append(two)
    else: choices.append("N/A")

    print " - ".join(choices)
    return choices
Пример #3
0
	def __init__(self, word, prev_word, num_choices):
		if prev_word is None:
			word_list = autocomplete.predict_currword(word,num_choice)
		else:
			tmp_wordlist1 = autocomplete.predict_currword(word,num_choice)
			tmp_wordlist2 = autocomplete.predict_currword_given_lastword(prev_word, word, num_choice)
			if len(tmp_wordlist1) > len(tmp_wordlist2):
				word_list = tmp_wordlist1
			else:
				word_list = tmp_wordlist2	
		self.w_size = len(word_list)
		for i in range(0, len(word_list)-1):
			(word_list[i])[1] = (len(word_list)/2 - 1)+((-1)^(i+1))*i
		self.make_wordTree(word_list)
Пример #4
0
def get_ac_list(str, count):
    ac_list = []

    words = str.split()

    # Note that the auto-complete library only supports up to two full words
    if len(words) > 2:
        return ac_list

    if len(words) > 1:
        try:
            l = autocomplete.predict(words[0], words[1], count)
            for name,num in l:
                ac_list.append(words[0]+' '+name)
        except:
            # Due to a bug in the auto-complete library,
            # words that contain number (e.g. abcde10) doesn't get
            # handled properly
            return []
    elif len(words) == 1:
        l = autocomplete.predict_currword(words[0], count)
        for name,num in l:
            ac_list.append(name)

    return ac_list
def display():
    global curr_word
    #print(curr_word)
    result = autocomplete.predict_currword(curr_word, top_n=1000)
    #print(result)
    total = calculate_frequency(result)
    #bn print(str(total) + "\n")
    ignite(total)
Пример #6
0
def autocomplete_word(text):
    try:
        autocomplete.load()
        autoword = autocomplete.predict_currword(text)
        autoword = re.sub(r'[^\w]', ' ', str(autoword[0]))
        autoword = re.sub(r'\d+', ' ', autoword)
        return True,autoword.strip()
    except:
        return False,"Can not predict word"
Пример #7
0
    async def completing(letters):
        """
            predict_currword() 
        """
        text = ' '
        words = await Complete.fetch_all_saved_user_events()
        for i in range(0, len(words)):
            pos = words[i]
            text = text + ' ' + str(pos['event'])

        models.train_models(text)
        return autocomplete.predict_currword(letters)
def predict_word(text, max_suggestions=10):
    if " " not in text:
        results = [
            x[0] for x in autocomplete.predict_currword(text.lower(),
                                                        top_n=max_suggestions)
        ]
    else:
        words = text.lower().split(" ")
        results = [
            x[0] for x in autocomplete.predict_currword_given_lastword(
                words[-2], words[-1], top_n=max_suggestions)
        ]
    return results
 def predictCity(self, string, threshold, newLocationDir, export=True):
     if not string: return None
     words = self.getFuzzTags(string)
     match = None
     score = 0
     autoCompleteCache = {}
     for word in words:
         searchWord = word[:4].strip().lower()
         if autoCompleteCache.get(searchWord, False):
             predictedWords = autoCompleteCache.get(searchWord, 0)
         else:
             predictedWords = autocomplete.predict_currword(searchWord,
                                                            top_n=10)
             autoCompleteCache[searchWord] = predictedWords
         for predictedWord in predictedWords:
             predictedCityNames = self.cityNames[predictedWord[0]]
             if not predictedCityNames:
                 continue
             for candidate in predictedCityNames:
                 newScore = fuzz.token_sort_ratio(string, candidate)
                 if newScore > score:
                     score = newScore
                     match = candidate
     if export is True:
         f = open(os.path.join(newLocationDir, 'no_match.txt'), 'a+')
     if match is None:
         if export is True:
             f.write('{0},{1},"{2}"\n'.format(str(score), str(match),
                                              string))
             f.close()
         return False  #No match
     if score >= threshold:
         print 'match:' + str(score) + '---' + str(match) + '---' + string
         return match
     else:
         if export is True:
             f.write('{0},"{1}","{2}"\n'.format(str(score), str(match),
                                                string))
             f.close()
         return True  #Not close enough match
	def predictCity(self,string,threshold,newLocationDir,export=True):
		if not string: return None
		words = self.getFuzzTags(string)
		match = None
		score = 0
		autoCompleteCache = {}
		for word in words:
			searchWord = word[:4].strip().lower()
			if autoCompleteCache.get(searchWord,False):
				predictedWords = autoCompleteCache.get(searchWord,0)
			else:
				predictedWords = autocomplete.predict_currword(searchWord,top_n=10)
				autoCompleteCache[searchWord] = predictedWords
			for predictedWord in predictedWords:
				predictedCityNames = self.cityNames[predictedWord[0]]
				if not predictedCityNames:
					continue
				for candidate in predictedCityNames:
					newScore = fuzz.token_sort_ratio(string,candidate)
					if newScore > score:
						score = newScore
						match = candidate
		if export is True: f = open(os.path.join(newLocationDir,'no_match.txt'),'a+')
		if match is None:
			if export is True:
				f.write('{0},{1},"{2}"\n'.format(str(score),str(match),string))
				f.close()
			return False #No match
		if score >= threshold:
			print 'match:'+str(score)+'---'+str(match)+'---'+string
			return match
		else:
			if export is True: 
				f.write('{0},"{1}","{2}"\n'.format(str(score),str(match),string))
				f.close()
			return True #Not close enough match
def predict_character(text, max_suggestions=27):
    if " " not in text:
        results = autocomplete.predict_currword(text.lower(),
                                                top_n=max_suggestions)
        characters = [
            x[0][len(text)] if not x[0][len(text):] == "" else " "
            for x in results
        ]
    else:
        words = text.lower().split(" ")
        results = autocomplete.predict_currword_given_lastword(
            words[-2], words[-1], top_n=max_suggestions)
        characters = [
            x[0][len(words[-1])] if not x[0][len(words[-1]):] == "" else " "
            for x in results
        ]
    probabilities = [x[1] for x in results]
    character_dict = {}
    for character in range(len(characters)):
        character_dict[characters[character]] = sum([
            probabilities[x] for x in range(len(characters))
            if characters[character] == characters[x]
        ])
    return sorted(character_dict, key=character_dict.__getitem__, reverse=True)