def test_update_self_cites_tables(self): from invenio.legacy.bibrank.selfcites_indexer import update_self_cites_tables from invenio.legacy.bibrank.selfcites_indexer import get_authors_tags tags = get_authors_tags() config = {} update_self_cites_tables(1, config, tags)
def fill_self_cites_tables(config): """ This will fill the self-cites tables with data The purpose of this function is to fill these tables on a website that never ran the self-cites daemon """ algorithm = config['algorithm'] tags = get_authors_tags() all_ids = [r[0] for r in run_sql('SELECT id FROM bibrec ORDER BY id')] citations_fun = get_citations_fun(algorithm) write_message('using %s' % citations_fun.__name__) if algorithm == 'friends': # We only needs this table for the friends algorithm or assimilated # Fill intermediary tables for index, recid in enumerate(all_ids): if index % 1000 == 0: msg = 'intermediate %d/%d' % (index, len(all_ids)) task_update_progress(msg) write_message(msg) task_sleep_now_if_required() update_self_cites_tables(recid, config, tags) # Fill self-cites table for index, recid in enumerate(all_ids): if index % 1000 == 0: msg = 'final %d/%d' % (index, len(all_ids)) task_update_progress(msg) write_message(msg) task_sleep_now_if_required() compute_and_store_self_citations(recid, tags, citations_fun)
def fill_self_cites_tables(rank_method_code, config): """ This will fill the self-cites tables with data The purpose of this function is to fill these tables on a website that never ran the self-cites daemon This is an optimization when running on empty tables, and we hope the result is the same as the compute_and_store_self_citations. """ begin_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S") algorithm = config['algorithm'] tags = get_authors_tags() selfcites_dic = {} all_ids = intbitset(run_sql('SELECT id FROM bibrec ORDER BY id')) citations_fun = get_citations_fun(algorithm) write_message('using %s' % citations_fun.__name__) if algorithm == 'friends': # We only needs this table for the friends algorithm or assimilated # Fill intermediary tables for index, recid in enumerate(all_ids): if index % 1000 == 0: msg = 'intermediate %d/%d' % (index, len(all_ids)) task_update_progress(msg) write_message(msg) task_sleep_now_if_required() update_self_cites_tables(recid, config, tags) # Fill self-cites table for index, recid in enumerate(all_ids): if index % 1000 == 0: msg = 'final %d/%d' % (index, len(all_ids)) task_update_progress(msg) write_message(msg) task_sleep_now_if_required() compute_and_store_self_citations(recid, tags, citations_fun, selfcites_dic) intoDB(selfcites_dic, begin_date, rank_method_code) store_weights_cache(selfcites_dic)