def search(self, phrase, subtitle=None, tolerance=None, func=None): """ obj.search(phrase, subtitle=None, tolerance=None) -> result generator Returns a generator of results matching the given search phrase. A secondary phrase can be given through the 'subtitle' parameter, and an optional levenshtein tolerance value can be given for filtering results. """ if not func: if subtitle is not None: func = lambda p,s,r: levenshtein(r.subtitle, s) else: func = lambda p,s,r: levenshtein('%s : %s' % \ (r.title,r.subtitle), p) \ if r.subtitle is not None else \ levenshtein(r.title, p) if tolerance is None: tolerance = int(self.db.settings.NULL.\ get('MetadataLookupTolerance', 5)) if subtitle is not None: res = self.command('-N', '"%s" "%s"' % (phrase, subtitle)) else: res = self.command('-M', '"%s"' % phrase) for r in res: r.levenshtein = func(phrase, subtitle, r) if r.levenshtein > tolerance: continue yield r
def search(self, phrase, subtitle=None, tolerance=None, func=None): """ obj.search(phrase, subtitle=None, tolerance=None) -> result generator Returns a generator of results matching the given search phrase. A secondary phrase can be given through the 'subtitle' parameter, and an optional levenshtein tolerance value can be given for filtering results. """ if not func: if subtitle is not None: func = lambda p, s, r: levenshtein(r.subtitle, s) else: func = lambda p,s,r: levenshtein('%s : %s' % \ (r.title,r.subtitle), p) \ if r.subtitle is not None else \ levenshtein(r.title, p) if tolerance is None: tolerance = int(self.db.settings.NULL.\ get('MetadataLookupTolerance', 5)) if subtitle is not None: res = self.command('-N', '"%s" "%s"' % (phrase, subtitle)) else: res = self.command('-M', '"%s"' % phrase) for r in res: r.levenshtein = func(phrase, subtitle, r) if r.levenshtein > tolerance: continue yield r
def calculateZipConfidence(zip1, zip2): if zip1 == "" or zip2 == "": return None else: distance = utility.levenshtein(zip1, zip2) confidence = 1 / (pow(distance + 1, distance)) return confidence
def calculateMiddleIConfidence(middle1, middle1AB, middle1DM, middle2, middle2AB, middle2DM): if middle1 == "" or middle2 == "": return None total = 0 if middle1 == middle2: return 1 if middle1AB == middle2 or middle2AB == middle1: total += 0.15 # if utility.compareWordsWithoutSpecialChars(middle1, middle2): # return 1 # if utility.compareNameByNickname(middle1, middle2): # total += 0.35 if utility.compareByContains(middle1, middle2): total += 0.15 if utility.compareDoubleMetaphones(middle1DM, middle2DM): total += 0.35 # if utility.compareByVisuallySimilarChars(middle1, middle2): # return 1 #change # manhattandistance = utility.compareWordsByKeyboardDistance(middle1, middle2) levDistance = utility.levenshtein(middle1, middle2) levConfidence = 1/(pow(levDistance+1,0.2*levDistance)) * .35 total += levConfidence return total
def calculateCityConfidence(city1, city2): if city1 == "" or city2 == "": return None #calculate two fully spelled out cities #levenshtein else: distance = utility.levenshtein(city1, city2) dmetaScore = 0 #double metaphone if utility.compareByDoubleMetaphone(city1, city2): dmetaScore = 0.5 #calculate abbreviations abbreviationScore = 0 shortenedScore = 0 if utility.compareByAbbrevSentence(city1, city2): abbreviationScore = (min(len(city1), len(city2))) / 5 #calculate shortened versions (if abbreviated skip) elif utility.compareByContains(city1, city2): shortenedScore = (min(len(city1), len(city2))) / max( len(city1), len(city2)) confidence = min( 1 / (pow(distance + 1, distance + 1)) + dmetaScore + abbreviationScore + shortenedScore, 1) return confidence
def get_query_prediction_value(query_prediction, title): dict = json.loads(s=query_prediction) diff = 0.0 for k, v in dict.items(): diff = diff + float(v) * utility.levenshtein(k, title) #print(title + ":" + str(diff)) return diff
def calculateCityConfidence(city1, city1AB, city1DM, city2, city2AB, city2DM): if city1 == "" or city2 == "": return None #calculate two fully spelled out cities #levenshtein else: total = 0 if city1 == city2: return 1 distance = utility.levenshtein(city1, city2) levConfidence = 1/(pow(distance+1, distance+1)) * 0.3 #double metaphone if utility.compareSentDoubleMetaphones(city1DM, city2DM): total += 0.5 #calculate abbreviations # abbreviationScore = 0 # shortenedScore = 0 # if utility.compareByAbbrevSentence(city1, city2): # abbreviationScore = (min(len(city1),len(city2)))/5 #calculate shortened versions (if abbreviated skip) if city1 == city2AB or city1AB == city2: total += 0.1 if utility.compareByContains(city1,city2): total += 0.1 return total
def calculateNameConfidence(name1, name1DM, name2, name2DM): total = 0 # if utility.compareByAbbrevWord(name1, name2): # total += 0.1 # if utility.compareWordsWithoutSpecialChars(name1, name2): # return 1 # if utility.compareNameByNickname(name1, name2): # total += 0.35 if name1 == name2: return 1 if utility.compareByContains(name1, name2): total += 0.2 if utility.compareDoubleMetaphones(name1DM, name2DM): total += 0.4 # if utility.compareByVisuallySimilarChars(name1, name2): # return 1 #CHANGE # manhattandistance = utility.compareWordsByKeyboardDistance(name1, name2) levDistance = utility.levenshtein(name1, name2) levConfidence = 1/(pow(levDistance+1,0.9*levDistance)) * 0.4 total += levConfidence return total
def calculateStreetConfidence(street1, street1DM, street2, street2DM): if street1 == "" or street2 == "": return None # street1 = street1.split(' ') # street2 = street2.split(' ') #convert street abbreviations to fully spelled out # try: # street1[-1] = dictionaries.streets[street1[-1]] # except KeyError: # pass # try: # street2[-1] = dictionaries.streets[street2[-1]] # except KeyError: # pass if street1 == street2: return 1 total = 0 if utility.compareSentDoubleMetaphones(street1DM, street2DM): total += 0.5 #double metaphone for each word # street1 = ' '.join(str(elem) for elem in street1) # street2 = ' '.join(str(elem) for elem in street2) #levenshtein distance = utility.levenshtein(street1, street2) total += 1/(pow(distance+1,0.2*distance)) * 0.5 return total
def calculateStreetConfidence(street1, street2): if street1 == "" or street2 == "": return None street1 = street1.split(' ') street2 = street2.split(' ') #convert street abbreviations to fully spelled out try: street1[-1] = dictionaries.streets[street1[-1]] except KeyError: pass try: street2[-1] = dictionaries.streets[street2[-1]] except KeyError: pass if street1 == street2: return 1 #double metaphone for each word for elem1, elem2 in zip(street1, street2): metaphoneConfidence = 0 if elem1 == None or elem2 == None: break if utility.compareByDoubleMetaphone(elem1, elem2): metaphoneConfidence = 1 / (max(len(street1), len(street2))) street1 = ' '.join(str(elem) for elem in street1) street2 = ' '.join(str(elem) for elem in street2) #levenshtein distance = utility.levenshtein(street1, street2) levenshteinConfidence = 1 / (pow(distance + 1, 0.2 * distance)) return metaphoneConfidence * 0.5 + levenshteinConfidence * 0.5
def calculateDOBConfidence(dob1, dob2): if dob1 == "" or dob2 == "": return None if dob1 == dob2: return 1 distance = utility.levenshtein(dob1, dob2) confidence = 1/pow(distance+1,0.5*distance) return confidence
def calculateZipConfidence(zip1, zip2): if zip1 == "" or zip2 == "": return None else: if zip1 == zip2: return 1 distance = utility.levenshtein(zip1, zip2) # distance -> confidence #0 -> 1 #1 -> 0.5 #2 -> 0.11 #3 -> .0156 confidence = 1/(pow(distance+1, distance)) return confidence
def calculateSexConfidence(sex1, sex2): if sex1 == "" or sex2 == "": return None try: sex1 = dictionaries.sex[sex1] except KeyError: pass try: sex2 = dictionaries.sex[sex2] except KeyError: pass distance = utility.levenshtein(sex1, sex2) confidence = 1 / (pow(distance + 1, distance)) return confidence
def calculateNameConfidence(name1, name2): total = 0 if utility.compareByAbbrevWord(name1, name2): total += 0.1 if utility.compareWordsWithoutSpecialChars(name1, name2): return 1 if utility.compareByContains(name1, name2): total += 0.1 if utility.compareByDoubleMetaphone(name1, name2): total += 0.4 levDistance = utility.levenshtein(name1, name2) levConfidence = 1 / (pow(levDistance + 1, 0.9 * levDistance)) * 0.4 total += levConfidence return total
def calculateStateConfidence(state1, state2): #convert abbreviations to full states if state1 == "" or state2 == "": return None else: try: state1 = dictionaries.states[state1] except KeyError: pass try: state2 = dictionaries.states[state2] except KeyError: pass distance = utility.levenshtein(state1, state2) confidence = 1 / (distance + 1) return confidence
def calculateSexConfidence(sex1, sex2): if sex1 == "" or sex2 == "": return None # try: # sex1 = dictionaries.sex[sex1] # except KeyError: # pass # try: # sex2 = dictionaries.sex[sex2] # except KeyError: # pass if sex1 == sex2: return 1 if len(sex1) != len(sex2): return 0 distance = utility.levenshtein(sex1, sex2) confidence = 1/(pow(distance+1, distance)) return confidence
def calculateMiddleIConfidence(middle1, middle2): if middle1 == "" or middle2 == "": return None total = 0 if utility.compareByAbbrevWord(middle1, middle2): total += 0.1 if utility.compareWordsWithoutSpecialChars(middle1, middle2): return 1 if utility.compareByContains(middle1, middle2): total += 0.1 if utility.compareByDoubleMetaphone(middle1, middle2): total += 0.4 levDistance = utility.levenshtein(middle1, middle2) levConfidence = 1 / (pow(levDistance + 1, 0.2 * levDistance)) * .4 total += levConfidence return total
def calculatePatientAcctNumConfidence(patientAcctNum1, patientAcctNum2): if patientAcctNum1 == "" or patientAcctNum2 == "": return 0 distance = utility.levenshtein(patientAcctNum1, patientAcctNum2) confidence = 1/pow(distance+1,0.15*distance) return confidence