def identify_doubles(doubles=doubles, repo=repo): doubles = doubles.values() total = len(doubles) i = 0 # doubles = doubles[:10] for ls in doubles: #find the biography for each item in the list i += 1 print i, 'of', total, ':', ls bios = [repo.get_biography(local_id=id) for id in ls] #find the person for each biography persons = [bio.get_person() for bio in bios] #identify the persons p1 = persons[0] for p2 in persons[1:]: print 'identifying', p1, p2 repo.identify(p1, p2)
def doubles_in_suggestions_list(doubles=doubles, repo=repo): doubles = doubles.values() total = len(doubles) i = 0 # doubles = doubles[:10] for ls in doubles: #ls is a list of lcoal ids #find the biography for each item in the list i += 1 print i, 'of', total, ':', ls bios = [repo.get_biography(local_id=id) for id in ls] #find the person for each biography persons = [bio.get_person() for bio in bios] #add to similiarty cache p1 = persons[0] for p2 in persons[1:]: print 'add to similairty cache', p1, p2 repo.db.add_to_similarity_cache(p1.bioport_id, p2.bioport_id, 1.0)