def _get_personids_to_update_extids(papers=None): ''' It returns the set of personids of which we should recalculate their external ids. @param papers: papers @type papers: set or None @return: personids @rtype: set ''' last_log = get_user_logs(userinfo='daemon', action='PID_UPDATE', only_most_recent=True) if last_log: daemon_last_time_run = last_log[0][2] modified_bibrecs = get_modified_papers_since(daemon_last_time_run) else: modified_bibrecs = get_all_valid_papers() if papers: modified_bibrecs &= set(papers) if not modified_bibrecs: return None if bconfig.LIMIT_EXTERNAL_IDS_COLLECTION_TO_CLAIMED_PAPERS: modified_bibrecs = [rec[0] for rec in get_claimed_papers_from_papers(modified_bibrecs)] personids_to_update_extids = set() for bibrec in modified_bibrecs: personids_to_update_extids |= set(get_authors_of_claimed_paper(bibrec)) return personids_to_update_extids
def run_tortoise(from_scratch, last_names_thresholds=None, single_threaded=False): _prepare_tortoise_cache() from invenio.bibauthorid_tortoise import tortoise, \ tortoise_from_scratch, tortoise_last_name, tortoise_last_names if single_threaded and last_names_thresholds: for last_name, threshold in last_names_thresholds.items(): tortoise_last_name(last_name, wedge_threshold=threshold, from_mark=from_scratch) elif last_names_thresholds: names_with_args = list() for last_name, threshold in last_names_thresholds.items(): kwargs = dict() if from_scratch: kwargs['from_mark'] = from_scratch else: kwargs['pure'] = from_scratch if threshold: args = (last_name, threshold) else: args = (last_name, ) names_with_args.append((args, kwargs)) tortoise_last_names(names_with_args) elif from_scratch: tortoise_from_scratch() else: start_time = get_db_time() tortoise_db_name = 'tortoise' last_run = get_user_logs(userinfo=tortoise_db_name, only_most_recent=True) if last_run: modified = get_modified_papers_since(last_run[0][2]) else: modified = [] tortoise(modified) insert_user_log(tortoise_db_name, '-1', '', '', '', timestamp=start_time)
def run_tortoise(from_scratch): from invenio.bibauthorid_tortoise import tortoise, tortoise_from_scratch if from_scratch: tortoise_from_scratch() else: start_time = get_db_time() tortoise_db_name = 'tortoise' last_run = get_user_logs(userinfo=tortoise_db_name, only_most_recent=True) if last_run: modified = get_modified_papers_since(last_run[0][2]) else: modified = [] tortoise(modified) insert_user_log(tortoise_db_name, '-1', '', '', '', timestamp=start_time)
def run_rabbit(paperslist, all_records=False): if not paperslist and all_records: rabbit_with_log(None, True, 'bibauthorid_daemon, update_personid on all papers') elif not paperslist: last_log = get_user_logs(userinfo='daemon', action='PID_UPDATE', only_most_recent=True) if len(last_log) >= 1: #select only the most recent papers recently_modified = get_modified_papers_since(since=last_log[0][2]) if not recently_modified: bibtask.write_message("update_personID_table_from_paper: " "All person entities up to date.", stream=sys.stdout, verbose=0) else: bibtask.write_message("update_personID_table_from_paper: Running on: " + str(recently_modified), stream=sys.stdout, verbose=0) rabbit_with_log(recently_modified, True, 'bibauthorid_daemon, run_personid_fast_assign_papers on ' + str([paperslist, all_records, recently_modified])) else: rabbit_with_log(None, True, 'bibauthorid_daemon, update_personid on all papers') else: rabbit_with_log(paperslist, True, 'bibauthorid_daemon, personid_fast_assign_papers on ' + str(paperslist), partial=True)
def run_rabbit(paperslist, all_records=False): if not paperslist and all_records: rabbit_with_log(None, True, 'bibauthorid_daemon, update_personid on all papers') elif not paperslist: last_log = get_user_logs(userinfo='daemon', action='PID_UPDATE', only_most_recent=True) if len(last_log) >= 1: # select only the most recent papers recently_modified = get_modified_papers_since(since=last_log[0][2]) if not recently_modified: bibtask.write_message( "update_personID_table_from_paper: " "All person entities up to date.", stream=sys.stdout, verbose=0) else: bibtask.write_message( "update_personID_table_from_paper: Running on: " + str(recently_modified), stream=sys.stdout, verbose=0) rabbit_with_log( recently_modified, True, 'bibauthorid_daemon, run_personid_fast_assign_papers on ' + str([paperslist, all_records, recently_modified])) else: rabbit_with_log( None, True, 'bibauthorid_daemon, update_personid on all papers') else: rabbit_with_log(paperslist, True, 'bibauthorid_daemon, personid_fast_assign_papers on ' + str(paperslist), partial=True)