def _convert_files(obj, eng): from invenio_knowledge.api import get_kb_mappings mappings = dict( map( lambda item: (item['key'], item['value']), get_kb_mappings('JOURNALS') ) ) ws = WorldScientific(mappings) target_folder_full = get_storage_path(suffix=target_folder) args = obj.extra_data['args'] # By default, we set the from date as today to_date = args.get("to_date") or datetime.now().strftime('%Y-%m-%d') # By last resort, we set the from date a week before from_date = args.get("from_date") or cache.get(date_key) \ or (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d') obj.extra_data['args']["to_date"] = to_date obj.extra_data['args']["from_date"] = from_date insert_files = [] filenames = obj.data['extracted_files'] for filename in filenames: date = ws.get_date(filename) if from_date <= date <= to_date: marc = ws.get_record(filename) if marc: filename = basename(filename) filename = join(target_folder_full, filename) insert_files.append(filename) with open(filename, 'w') as outfile: outfile.write(marc) obj.log.info("Converted {0} articles between {1} to {2}".format( len(insert_files), from_date, to_date )) obj.data['insert'] = insert_files obj.data["result_path"] = target_folder_full obj.log.debug("Saved converted files to {0}".format(target_folder_full)) obj.log.debug("{0} files to add".format( len(obj.data["insert"]), ))
def _convert_files(obj, eng): from invenio_knowledge.api import get_kb_mappings mappings = dict( map( lambda item: (item['key'], item['value']), get_kb_mappings('JOURNALS') ) ) ws = WorldScientific(mappings) target_folder_full = get_storage_path(suffix=target_folder) args = obj.extra_data['args'] # By default, we set the from date as today to_date = args.get("to_date") or datetime.now().strftime('%Y-%m-%d') # By last resort, we set the from date months before from_date = args.get("from_date") if not from_date: if args.get("reharvest"): # Since "beginning" of time when not specified from_date = datetime.strptime("1900-01-01", "%Y-%m-%d") else: # Dynamic date in the past when not specified and not reharvest from_date = datetime.now() - timedelta(weeks=weeks_threshold)\ .strftime('%Y-%m-%d') obj.extra_data['args']["to_date"] = to_date obj.extra_data['args']["from_date"] = from_date insert_files = [] if args.get("reharvest"): filenames = obj.data['all_extracted_files'] else: filenames = obj.data['newly_extracted_files'] for filename in filenames: date = ws.get_date(filename) if date is None or (from_date <= date <= to_date): marc = ws.get_record(filename) if marc: filename = basename(filename) filename = join(target_folder_full, filename) insert_files.append(filename) with open(filename, 'w') as outfile: outfile.write(marc) else: obj.log.info("Filtered out {0} ({1})".format(filename, date)) obj.log.info("Converted {0}/{1} articles between {2} to {3}".format( len(insert_files), len(filenames), from_date, to_date )) obj.data['insert'] = insert_files obj.data["result_path"] = target_folder_full obj.log.debug("Saved converted files to {0}".format(target_folder_full)) obj.log.debug("{0} files to add".format( len(obj.data["insert"]), ))