def tortoise(pure=False, force_matrix_creation=False, skip_matrix_creation=False, last_run=None): assert not force_matrix_creation or not skip_matrix_creation # The computation must be forced in case we want # to compute pure results force_matrix_creation = force_matrix_creation or pure if not skip_matrix_creation: logger.log("Preparing cluster sets.") clusters, _lnames, sizes = delayed_cluster_sets_from_personid( pure, last_run) logger.log("Building all matrices.") clusters = [(s, ) for s in clusters] schedule_workers( lambda x: force_create_matrix(x, force=force_matrix_creation), clusters) logger.log("Preparing cluster sets.") clusters, _lnames, sizes = delayed_cluster_sets_from_personid( pure, last_run) clusters = [(s(), ) for s in clusters] logger.log("Starting disambiguation.") schedule_workers(wedge_and_store, clusters)
def tortoise_tweak_coefficient(lastnames, min_coef, max_coef, stepping, build_matrix=True): bibauthor_print('Coefficient tweaking!') bibauthor_print('Cluster sets from mark...') lnames = set([generate_last_name_cluster_str(n) for n in lastnames]) coefficients = [x/100. for x in range(int(min_coef*100),int(max_coef*100),int(stepping*100))] if build_matrix: schedule_workers(_create_matrix, lnames) schedule_workers(_collect_statistics_lname_coeff, ((x,y) for x in lnames for y in coefficients ))
def tortoise_from_scratch(): bibauthor_print("Preparing cluster sets.") cluster_sets, _lnames, sizes = delayed_cluster_sets_from_marktables() bibauthor_print("Building all matrices.") schedule_workers(lambda x: force_create_matrix(x, force=True), cluster_sets) empty_tortoise_results_table() bibauthor_print("Preparing cluster sets.") cluster_sets, _lnames, sizes = delayed_cluster_sets_from_marktables() bibauthor_print("Starting disambiguation.") schedule_workers(wedge, cluster_sets)
def tortoise_from_scratch(): logger.log("Preparing cluster sets.") cluster_sets, _lnames, sizes = delayed_cluster_sets_from_marktables() logger.log("Building all matrices.") cluster_sets = [(s, ) for s in cluster_sets] schedule_workers(lambda x: force_create_matrix(x, force=True), cluster_sets) empty_tortoise_results_table() logger.log("Preparing cluster sets.") cluster_sets, _lnames, sizes = delayed_cluster_sets_from_marktables() cluster_sets = [(s(), ) for s in cluster_sets] logger.log("Starting disambiguation.") schedule_workers(wedge_and_store, cluster_sets)
def tortoise(pure=False, force_matrix_creation=False, skip_matrix_creation=False, last_run=None): assert not force_matrix_creation or not skip_matrix_creation # The computation must be forced in case we want # to compute pure results force_matrix_creation = force_matrix_creation or pure if not skip_matrix_creation: bibauthor_print("Preparing cluster sets.") clusters, _lnames, sizes = delayed_cluster_sets_from_personid(pure, last_run) bibauthor_print("Building all matrices.") schedule_workers(lambda x: force_create_matrix(x, force=force_matrix_creation), clusters) bibauthor_print("Preparing cluster sets.") clusters, _lnames, sizes = delayed_cluster_sets_from_personid(pure, last_run) bibauthor_print("Starting disambiguation.") schedule_workers(wedge_and_store, clusters)
def tortoise_last_names(names_list): schedule_workers(tortoise_last_name, names_list)
def tortoise_last_names(names_args_list): schedule_workers(tortoise_last_name, names_args_list, with_kwargs=True)