def save(self, **kwargs): self.last_name = self.last_name.replace(u'\u2019', "'") # I know, but store it like this anyway. self.slug = slugify(self.name()) first_name_metaphone = dm(self.first_name) last_name_metaphone = dm(self.last_name) self.first_name_metaphone = first_name_metaphone[0] self.first_name_metaphone_alt = first_name_metaphone[1] or '' self.last_name_metaphone = last_name_metaphone[0] self.last_name_metaphone_alt = last_name_metaphone[1] or '' super(Person, self).save(**kwargs)
def save(self, **kwargs): self.last_name = self.last_name.replace( u'\u2019', "'") # I know, but store it like this anyway. self.slug = slugify(self.name()) first_name_metaphone = dm(self.first_name) last_name_metaphone = dm(self.last_name) self.first_name_metaphone = first_name_metaphone[0] self.first_name_metaphone_alt = first_name_metaphone[1] or '' self.last_name_metaphone = last_name_metaphone[0] self.last_name_metaphone_alt = last_name_metaphone[1] or '' super(Person, self).save(**kwargs)
def compare(s1f, s1l, s2f, s2l): s1f = unicode(s1f.lower()) s1l = unicode(s1l.lower()) s2f = unicode(s2f.lower()) s2l = unicode(s2l.lower()) s1 = "%s %s" % (s1f, s1l) s2 = "%s %s" % (s2f, s2l) soundex1 = soundex(s1) soundex2 = soundex(s2) soundex1f = soundex(s1f) soundex2f = soundex(s2f) soundex1l = soundex(s1l) soundex2l = soundex(s2l) soundexMatch = (soundex1 == soundex2) soundexFMatch = (soundex1f == soundex2f) soundexLMatch = (soundex1l == soundex2l) dm1 = dm(s1) dm2 = dm(s2) dm1f = dm(s1f) dm2f = dm(s2f) dm1l = dm(s1l) dm2l = dm(s2l) dmMatch = (dm1 == dm2 or dm1[0] == dm2[0] or dm1[0] == dm2[1] or dm1[1] == dm2[0]) dmFMatch = (dm1f == dm2f or dm1[0] == dm2[0] or dm1[0] == dm2[1] or dm1[1] == dm2[0]) dmLMatch = (dm1l == dm2l or dm1[0] == dm2[0] or dm1[0] == dm2[1] or dm1[1] == dm2[0]) jarowN = jarow(s1, s2) jarowNf = jarow(s1f, s2f) jarowNl = jarow(s1l, s2l) damerauN = damerau(s1, s2) damerauNf = damerau(s1f, s2f) damerauNl = damerau(s1l, s2l) qnumN = qnum(s1, s2) qnumNf = qnum(s1f, s2f) qnumNl = qnum(s1l, s2l) print "\n%s entered, %s wanted" % (s2, s1) #print "Soundex\t\tFull:%s/%s %s\tFirst:%s/%s %s\tLast:%s/%s %s" % (soundex1, soundex2, soundexMatch, soundex1f, soundex2f, soundexFMatch, soundex1l, soundex2l, soundexLMatch) print "Metaphone\tFull:%s/%s %s\tFirst:%s/%s %s\tLast:%s/%s %s" % (dm1, dm2, dmMatch, dm1f, dm2f, dmFMatch, dm1l, dm2l, dmLMatch) print "Algorithm\tFull name\tFirst name\tLast name" print "Jarow\t\t%.4f\t\t%.4f\t\t%.4f\nDamerau\t\t%.4f\t\t%.4f\t\t%.4f" % (jarowN, jarowNf, jarowNl, damerauN, damerauNf, damerauNl)
def search_people(search, force_similar=False, use_distance=True): people = [] sounds_people = 0 names = search.split(None, 3) if len(names) == 1: names[0] = names[0].replace(u'\u2019', "'") if force_similar: people = Person.objects.exclude( first_name__icontains=names[0]).exclude( last_name__icontains=names[0]) else: people = Person.objects.filter( Q(first_name__icontains=names[0]) | Q(last_name__icontains=names[0])) if force_similar: sounds_people = 2 dm_, dm_alt = dm(names[0]) people = people.filter( Q(first_name_metaphone=dm_) | Q(last_name_metaphone=dm_)) elif not people and re.match(r'(?i)[a-z\s\'-]+$', names[0]): sounds_people = 1 dm_, dm_alt = dm(names[0]) people = Person.objects.filter( Q(first_name_metaphone=dm_) | Q(last_name_metaphone=dm_) # Q(first_name_metaphone=dm_alt) | #Q(first_name_metaphone_alt=dm_) | # Q(last_name_metaphone_alt=dm_) | #Q(last_name_metaphone=dm_alt) ) # if not people: # allnames = [] # for p in Person.objects.all(): # allnames.extend((p.first_name, p.last_name)) # people = difflib.get_close_matches(names[0], allnames) # people = Person.objects.filter(Q(first_name__in=people) | Q(last_name__in=people)) if not people and use_distance: people = [] for p in Person.objects.all(): sim = distance(names[0].lower(), p.first_name.lower()) sim2 = distance(names[0].lower(), p.last_name.lower()) if sim >= threshold or sim2 >= threshold: people.append((1 - max(sim, sim2), p)) people.sort() people = [person for _, person in people] elif len(names) == 2: names[1] = names[1].replace(u'\u2019', "'") people = Person.objects.filter(first_name__icontains=names[0], last_name__icontains=names[1]) if (not people and re.match(r'(?i)[a-z\s\'-]+$', search)) or force_similar: sounds_people = 1 dm_first, dm_first_alt = dm(names[0]) dm_last, dm_last_alt = dm(names[1]) qs = Q() if dm_first: # Both names homophones if dm_last: qs |= Q(first_name_metaphone=dm_first, last_name_metaphone=dm_last) \ | Q(first_name_metaphone=dm_first, last_name_metaphone_alt=dm_last) \ | Q(first_name_metaphone_alt=dm_first, last_name_metaphone=dm_last) \ | Q(first_name_metaphone_alt=dm_first, last_name_metaphone_alt=dm_last) if dm_last_alt: qs |= Q(first_name_metaphone=dm_first, last_name_metaphone=dm_last_alt) \ | Q(first_name_metaphone_alt=dm_first, last_name_metaphone=dm_last_alt) # First name homophone, Last name match qs |= Q(first_name_metaphone=dm_first, last_name__icontains=names[1]) \ | Q(first_name_metaphone_alt=dm_first, last_name__icontains=names[1]) if dm_first_alt: qs |= Q(first_name_metaphone=dm_first_alt, last_name__icontains=names[1]) if dm_last: qs |= Q(first_name_metaphone=dm_first_alt, last_name_metaphone=dm_last) \ | Q(first_name_metaphone=dm_first_alt, last_name_metaphone_alt=dm_last) if dm_last_alt: qs |= Q(first_name_metaphone=dm_first_alt, last_name_metaphone=dm_last_alt) if dm_last: # First name match, last name homophone qs |= Q(first_name__icontains=names[0], last_name_metaphone=dm_last) \ | Q(first_name__icontains=names[0], last_name_metaphone_alt=dm_last) if dm_last_alt: qs |= Q(first_name__icontains=names[0], last_name_metaphone=dm_last_alt) people = Person.objects.filter(qs) if not people and use_distance: people = [] people2 = [] people3 = [] for p in Person.objects.all(): sim = distance(names[0].lower(), p.first_name.lower()) sim2 = distance(names[1].lower(), p.last_name.lower()) simB = distance(' '.join(names).lower(), ('%s %s' % (p.first_name, p.last_name)).lower()) if names[1].lower() == p.last_name.lower( ) and sim >= threshold: people.append((1 - sim, p)) elif re.search(names[0], p.first_name, re.I) and sim2 >= threshold: people2.append((1 - sim2, p)) elif simB >= threshold: people3.append((1 - simB, p)) elif sim >= threshold and sim2 >= threshold: people3.append((1 - max(sim, sim2), p)) people.sort() people2.sort() people3.sort() people = people + people2 + people3 people = [person for _, person in people] elif len(names) == 3: names[1] = names[1].replace(u'\u2019', "'") names[2] = names[2].replace(u'\u2019', "'") people = Person.objects.filter( Q(first_name__icontains=' '.join(names[0:2]), last_name__icontains=names[2]) | Q(first_name__icontains=names[0], last_name__icontains=' '.join(names[1:3]))) elif len(names) == 4: names[3] = names[3].replace(u'\u2019', "'") people = Person.objects.filter( Q(first_name__icontains=' '.join(names[0:3]), last_name__icontains=names[3]) | Q(first_name__icontains=names[0], last_name__icontains=' '.join(names[1:4]))) return people, sounds_people
def search_people(search, force_similar=False, use_distance=True): people = [] sounds_people = 0 names = search.split(None, 3) if len(names)==1: names[0] = names[0].replace(u'\u2019', "'") if force_similar: people = Person.objects.exclude(first_name__icontains=names[0]).exclude(last_name__icontains=names[0]) else: people = Person.objects.filter(Q(first_name__icontains=names[0]) | Q(last_name__icontains=names[0])) if force_similar: sounds_people = 2 dm_, dm_alt = dm(names[0]) people = people.filter( Q(first_name_metaphone=dm_) | Q(last_name_metaphone=dm_) ) elif not people and re.match('[a-z\s\'-]+$(?i)', names[0]): sounds_people = 1 dm_, dm_alt = dm(names[0]) people = Person.objects.filter( Q(first_name_metaphone=dm_) | Q(last_name_metaphone=dm_) #Q(first_name_metaphone=dm_alt) | #Q(first_name_metaphone_alt=dm_) | #Q(last_name_metaphone_alt=dm_) | #Q(last_name_metaphone=dm_alt) ) #if not people: # allnames = [] # for p in Person.objects.all(): # allnames.extend((p.first_name, p.last_name)) # people = difflib.get_close_matches(names[0], allnames) # people = Person.objects.filter(Q(first_name__in=people) | Q(last_name__in=people)) if not people and use_distance: people = [] for p in Person.objects.all(): sim = distance(names[0].lower(), p.first_name.lower()) sim2 = distance(names[0].lower(), p.last_name.lower()) if sim >= threshold or sim2 >= threshold: people.append((1-max(sim, sim2), p)) people.sort() people = [ person for _, person in people ] elif len(names)==2: names[1] = names[1].replace(u'\u2019', "'") people = Person.objects.filter(first_name__icontains=names[0], last_name__icontains=names[1]) if (not people and re.match('[a-z\s\'-]+$(?i)', search)) or force_similar: sounds_people = 1 dm_first, dm_first_alt = dm(names[0]) dm_last, dm_last_alt = dm(names[1]) qs = Q() if dm_first: # # Both names homophones if dm_last: qs |= Q(first_name_metaphone=dm_first, last_name_metaphone=dm_last) \ | Q(first_name_metaphone=dm_first, last_name_metaphone_alt=dm_last) \ | Q(first_name_metaphone_alt=dm_first, last_name_metaphone=dm_last) \ | Q(first_name_metaphone_alt=dm_first, last_name_metaphone_alt=dm_last) if dm_last_alt: qs |= Q(first_name_metaphone=dm_first, last_name_metaphone=dm_last_alt) \ | Q(first_name_metaphone_alt=dm_first, last_name_metaphone=dm_last_alt) # First name homophone, Last name match qs |= Q(first_name_metaphone=dm_first, last_name__icontains=names[1]) \ | Q(first_name_metaphone_alt=dm_first, last_name__icontains=names[1]) if dm_first_alt: qs |= Q(first_name_metaphone=dm_first_alt, last_name__icontains=names[1]) if dm_last: qs |= Q(first_name_metaphone=dm_first_alt, last_name_metaphone=dm_last) \ | Q(first_name_metaphone=dm_first_alt, last_name_metaphone_alt=dm_last) if dm_last_alt: qs |= Q(first_name_metaphone=dm_first_alt, last_name_metaphone=dm_last_alt) if dm_last: # First name match, last name homophone qs |= Q(first_name__icontains=names[0], last_name_metaphone=dm_last) \ | Q(first_name__icontains=names[0], last_name_metaphone_alt=dm_last) if dm_last_alt: qs |= Q(first_name__icontains=names[0], last_name_metaphone=dm_last_alt) people = Person.objects.filter( qs ) if not people and use_distance: people = [] people2 = [] people3 = [] for p in Person.objects.all(): sim = distance(names[0].lower(), p.first_name.lower()) sim2 = distance(names[1].lower(), p.last_name.lower()) simB = distance(' '.join(names).lower(), ('%s %s' % (p.first_name, p.last_name)).lower()) if names[1].lower() == p.last_name.lower() and sim >= threshold: people.append((1-sim, p)) elif re.search(names[0], p.first_name, re.I) and sim2 >= threshold: people2.append((1-sim2, p)) elif simB >= threshold: people3.append((1-simB, p)) elif sim >= threshold and sim2 >= threshold: people3.append((1-max(sim, sim2), p)) people.sort() people2.sort() people3.sort() people = people + people2 + people3 people = [ person for _, person in people ] elif len(names)==3: names[1] = names[1].replace(u'\u2019', "'") names[2] = names[2].replace(u'\u2019', "'") people = Person.objects.filter( Q(first_name__icontains=' '.join(names[0:2]), last_name__icontains=names[2]) | Q(first_name__icontains=names[0], last_name__icontains=' '.join(names[1:3])) ) elif len(names)==4: names[3] = names[3].replace(u'\u2019', "'") people = Person.objects.filter( Q(first_name__icontains=' '.join(names[0:3]), last_name__icontains=names[3]) | Q(first_name__icontains=names[0], last_name__icontains=' '.join(names[1:4])) ) return people, sounds_people