def get_missing_records(): inspire_ids = get_all_ids_in_current_system(prepend_id_with="") missing_ids = [] for inspire_id in inspire_ids: if not record_exists(inspire_id=inspire_id): missing_ids.append(inspire_id) print("Missing {} records.".format(len(missing_ids))) print(missing_ids) return missing_ids
def get_missing_records(): """ Finds all records that are missing in the new system (compared to the legacy environment) and returns the IDs as a list :return: an array of missing IDd """ inspire_ids = get_all_ids_in_current_system(prepend_id_with="") missing_ids = [] for inspire_id in inspire_ids: if not record_exists(inspire_id=inspire_id): missing_ids.append(inspire_id) print("Missing {} records.".format(len(missing_ids))) print(missing_ids) return missing_ids
def find_duplicates_and_remove(): """Will go through the application to find any duplicates then remove them.""" inspire_ids = get_all_ids_in_current_system(prepend_id_with="") duplicates = [] for inspire_id in inspire_ids: matches = get_records_matching_field('inspire_id', inspire_id, doc_type=CFG_PUB_TYPE) if len(matches['hits']['hits']) > 1: duplicates.append(matches['hits']['hits'][0]['_source']['recid']) print('There are {} duplicates. Going to remove.'.format(len(duplicates))) do_unload(duplicates) # reindex submissions for dashboard view admin_indexer = AdminIndexer() admin_indexer.reindex(recreate=True)
def migrate(missing, start, end, date=None): """Migrates all content from HEPData.""" print(missing) if missing: inspire_ids = get_missing_records() else: inspire_ids = get_all_ids_in_current_system(date) print("Found {} inspire ids to load.".format(len(inspire_ids))) if start is not None: _slice = slice(int(start), end) inspire_ids = inspire_ids[_slice] print("Sliced, going to load {} records.".format(len(inspire_ids))) print(inspire_ids) load_files(inspire_ids)
def migrate(start, end, date=None, missing_only=False): """ Migrates all content from HEPData :return: """ if missing_only: inspire_ids = get_missing_records() else: inspire_ids = get_all_ids_in_current_system(date) print("Found {} inspire ids to load.".format(len(inspire_ids))) if start is not None: _slice = slice(int(start), end) inspire_ids = inspire_ids[_slice] print("Sliced, going to load {} records.".format(len(inspire_ids))) print(inspire_ids) load_files(inspire_ids)
def find_duplicates_and_remove(): """ Will go through the application to find any duplicates then remove them. :return: """ inspire_ids = get_all_ids_in_current_system(prepend_id_with="") duplicates = [] for inspire_id in inspire_ids: matches = get_records_matching_field('inspire_id', inspire_id, doc_type=CFG_PUB_TYPE) if len(matches['hits']['hits']) > 1: duplicates.append(matches['hits']['hits'][0]['_source']['recid']) print('There are {} duplicates. Going to remove.'.format(len(duplicates))) do_unload(duplicates) # reindex submissions for dashboard view admin_indexer = AdminIndexer() admin_indexer.reindex(recreate=True)
def test_get_ids_in_current_system(): ids = get_all_ids_in_current_system() assert (ids is not None)