Exemplo n.º 1
0
def cluster_sets_from_marktables():
    # { (100, 123) -> name }
    ref100 = get_bib10x()
    ref700 = get_bib70x()
    bibref_2_name = dict([((100, ref), generate_last_name_cluster_str(name)) for ref, name in ref100] +
                         [((700, ref), generate_last_name_cluster_str(name)) for ref, name in ref700])

    all_recs = get_all_valid_bibrecs()

    all_bibrefrecs = chain(set((100, ref, rec) for rec, ref in get_bibrefrec_subset(100, all_recs, map(itemgetter(0), ref100))),
                           set((700, ref, rec) for rec, ref in get_bibrefrec_subset(700, all_recs, map(itemgetter(0), ref700))))

    last_name_2_bibs = {}

    for bibrefrec in all_bibrefrecs:
        table, ref, unused = bibrefrec
        name = bibref_2_name[(table, ref)]
        last_name_2_bibs[name] = last_name_2_bibs.get(name, []) + [bibrefrec]

    cluster_sets = []

    for name, bibrecrefs in last_name_2_bibs.items():
        new_cluster_set = Cluster_set()
        new_cluster_set.clusters = [Cluster_set.Cluster([bib]) for bib in bibrecrefs]
        new_cluster_set.last_name = name
        cluster_sets.append(new_cluster_set)

    return cluster_sets
Exemplo n.º 2
0
def tortoise_last_name(name, pure=False):
    lname = generate_last_name_cluster_str(name)

    names = create_lastname_list_from_personid()
    names = filter(lambda x: x[0] == name, names)

    if names:
        pids = names[0][1]
        bibauthor_print("Found %s(%s), %d pids" % (name, lname, len(pids)))
        disambiguate_last_name(pids, lname, pure, False)
    else:
        bibauthor_print("Sorry, %s(%s) not found in the last name clusters" % (name, lname))
Exemplo n.º 3
0
def create_lastname_list_from_personid():
    '''
    This function generates a dictionary from a last name
    to list of personids which have this lastname.
    '''
    # ((personid, [full Name1], Nbibs) ... )
    all_names = get_all_names_from_personid()

    # ((personid, last_name, Nbibs) ... )
    all_names = ((row[0], generate_last_name_cluster_str(iter(row[1]).next()), row[2])
                  for row in all_names)

    # { (last_name, [(personid)... ], Nbibs) ... }
    all_names = groupby(sorted(all_names, key=itemgetter(1)), key=itemgetter(1))
    all_names = ((key, list(data)) for key, data in all_names)
    all_names = ((key, map(itemgetter(0), data), sum(x[2] for x in data)) for key, data in all_names)

    return all_names