def find_definition(tree, query): try: nodes = tree.xpath(".//def-para/para/text/def-term[contains(.,'%s')]" % query) lev_nodes = sorted(map(lambda x: (x, levenshtein(query, x.text)), nodes), key=itemgetter(1)) return lev_nodes[0][0].iterancestors(tag='def-para').next() except Exception, e: raise CustomException("Path for definition not found")
def getCodeNumberForProvince(province): # Bind the first argument lv = lambda x: levenshtein(province.lower(), x.lower()) provinces = getProvinces() # Apply the levenshtein distance to all the province names computed = [(x['code_number'], lv(x['pretty_name'])) for x in provinces] # Return the code for the most similar one (lowest levenshtein distance) return min(computed, key=lambda x:x[1])[0]
def name_prompt(prompt, people, name): person = None while (person is None): print(prompt) choices = [ person for person in sorted(people, key=lambda s: levenshtein(s.full_name.lower(), name)) ][:5] for number,choice in enumerate(choices): print("{}) {}".format(number+1, choice.full_name)) user_choice = input('Choose one or write in: ') try: person = choices[int(user_choice)-1] except ValueError as e: return user_choice #TODO: move looping until valid name into here? except IndexError as e: print("Invalid selection") return person.full_name
def get_lv_score(self, tweet_text): best_score = 0 best_name = '' words = tweet_text.strip().split(' ') for name, cluster in self.clusters.items(): cluster_max = 0 for word in words: local_max = 0 for sent_word in cluster: local_max = max(local_max, levenshtein(word, sent_word)) cluster_max += local_max cluster_max /= len(words) if cluster_max > best_score: best_score = cluster_max best_name = name if self.sentiments[best_name] == self.sents['neg']: best_score *= -1 return best_score
matching_code = None # Go over all the recorded codes for current_name, current_code in codes.items(): # If there's a direct match if province["pretty_name"].lower() in current_name.lower(): matching_code = current_code break # If no matching code has been found, try to find the most similar province name # using levenshtein distance if not matching_code: min_val = len(current_name) * 2 for current_name, current_code in codes.items(): cur_val = levenshtein(province["pretty_name"].lower(), current_name.lower()) if min_val > cur_val: min_val = cur_val matching_code = current_code province["code_number"] = matching_code # Write the results to the file with io.open(output_file, "w", encoding='utf8') as the_file: the_file.write(json.dumps(provinces, indent=2, ensure_ascii=False)) print "{} provinces written to {}".format(len(provinces), output_file)