def create_oaiharvest_log(task_id, oai_src_id, marcxmlfile): """ Function which creates the harvesting logs :param task_id: bibupload task id :param oai_src_id: :param marcxmlfile: """ file_fd = open(marcxmlfile, "r") xml_content = file_fd.read(-1) file_fd.close() create_oaiharvest_log_str(task_id, oai_src_id, xml_content)
def upload_step(obj, eng): """ Perform the upload step. :param obj: Bibworkflow Object to process :param eng: BibWorkflowEngine processing the object """ from invenio.legacy.oaiharvest.dblayer import create_oaiharvest_log_str uploaded_task_ids = [] #Work comment: # #Prepare in case of filtering the files to up, #no filtering, no other things to do new_dict_representation = records_api.Record(obj.data) marcxml_value = new_dict_representation.legacy_export_as_marc() task_id = None # Get a random sequence ID that will allow for the tasks to be # run in order, regardless if parallel task execution is activated sequence_id = random.randrange(1, 60000) bibtask.task_sleep_now_if_required() extract_path = plotextractor_getter.make_single_directory(cfg['CFG_TMPSHAREDDIR'], eng.uuid) # Now we launch BibUpload tasks for the final MARCXML files filepath = extract_path + os.sep + str(obj.id) file_fd = open(filepath, 'w') file_fd.write(marcxml_value) file_fd.close() mode = ["-r", "-i"] arguments = obj.extra_data["_repository"]["arguments"] if os.path.exists(filepath): try: args = mode if sequence_id: args.extend(['-I', str(sequence_id)]) if arguments.get('u_name', ""): args.extend(['-N', arguments.get('u_name', "")]) if arguments.get('u_priority', 5): args.extend(['-P', str(arguments.get('u_priority', 5))]) args.append(filepath) task_id = bibtask.task_low_level_submission("bibupload", "oaiharvest", *tuple(args)) create_oaiharvest_log_str(task_id, obj.extra_data["_repository"]["id"], marcxml_value) except Exception as msg: eng.log.error("An exception during submitting oaiharvest task occured : %s " % (str(msg))) return None else: eng.log.error("marcxmlfile %s does not exist" % (filepath,)) if task_id is None: eng.log.error("an error occurred while uploading %s from %s" % (filepath, obj.extra_data["_repository"]["name"])) else: uploaded_task_ids.append(task_id) eng.log.info("material harvested from source %s was successfully uploaded" % (obj.extra_data["_repository"]["name"],)) if cfg['CFG_INSPIRE_SITE']: # Launch BibIndex,Webcoll update task to show uploaded content quickly bibindex_params = ['-w', 'collection,reportnumber,global', '-P', '6', '-I', str(sequence_id), '--post-process', 'bst_run_bibtask[taskname="webcoll", user="******", P="6", c="HEP"]'] bibtask.task_low_level_submission("bibindex", "oaiharvest", *tuple(bibindex_params)) eng.log.info("end of upload")
def upload_step_marcxml(obj, eng): """Perform the upload step with MARCXML in obj.data(). :param obj: BibWorkflowObject to process :param eng: BibWorkflowEngine processing the object """ from invenio.base.globals import cfg from invenio.legacy.oaiharvest.dblayer import create_oaiharvest_log_str from invenio.legacy.bibsched.bibtask import task_low_level_submission repository = obj.extra_data.get("repository", {}) sequence_id = random.randrange(1, 60000) arguments = repository.get("arguments", {}) default_args = [] default_args.extend(['-I', str(sequence_id)]) if arguments.get('u_name', ""): default_args.extend(['-N', arguments.get('u_name', "")]) if arguments.get('u_priority', 5): default_args.extend(['-P', str(arguments.get('u_priority', 5))]) extract_path = os.path.join( cfg['CFG_TMPSHAREDDIR'], str(eng.uuid) ) if not os.path.exists(extract_path): os.makedirs(extract_path) filepath = extract_path + os.sep + str(obj.id) if "f" in repository.get("postprocess", []): # We have a filter. file_uploads = [ ("{0}.insert.xml".format(filepath), ["-i"]), ("{0}.append.xml".format(filepath), ["-a"]), ("{0}.correct.xml".format(filepath), ["-c"]), ("{0}.holdingpen.xml".format(filepath), ["-o"]), ] else: # We do not, so we get the data from the record file_fd = open(filepath, 'w') file_fd.write(obj.get_data()) file_fd.close() file_uploads = [(filepath, ["-r", "-i"])] task_id = None for location, mode in file_uploads: if os.path.exists(location): try: args = mode + [filepath] + default_args task_id = task_low_level_submission("bibupload", "oaiharvest", *tuple(args)) repo_id = repository.get("id") if repo_id: create_oaiharvest_log_str( task_id, repo_id, obj.get_data() ) except Exception as msg: eng.log.error( "An exception during submitting OAI harvester task occurred: %s " % ( str(msg))) if task_id is None: eng.log.error("an error occurred while uploading %s from %s" % (filepath, repository.get("name", "Unknown"))) else: eng.log.info( "material harvested from source %s was successfully uploaded" % (repository.get("name", "Unknown"),)) eng.log.info("end of upload")
def upload_step(obj, eng): """Perform the upload step. :param obj: BibWorkflowObject to process :param eng: BibWorkflowEngine processing the object """ from invenio.legacy.oaiharvest.dblayer import create_oaiharvest_log_str from invenio.modules.records.api import Record from invenio.legacy.bibsched.bibtask import task_low_level_submission repository = obj.extra_data.get("repository", {}) sequence_id = random.randrange(1, 60000) arguments = repository.get("arguments", {}) default_args = [] default_args.extend(['-I', str(sequence_id)]) if arguments.get('u_name', ""): default_args.extend(['-N', arguments.get('u_name', "")]) if arguments.get('u_priority', 5): default_args.extend(['-P', str(arguments.get('u_priority', 5))]) extract_path = os.path.join( cfg['CFG_TMPSHAREDDIR'], str(eng.uuid) ) if not os.path.exists(extract_path): os.makedirs(extract_path) filepath = extract_path + os.sep + str(obj.id) if "f" in repository.get("postprocess", []): # We have a filter. file_uploads = [ ("{0}.insert.xml".format(filepath), ["-i"]), ("{0}.append.xml".format(filepath), ["-a"]), ("{0}.correct.xml".format(filepath), ["-c"]), ("{0}.holdingpen.xml".format(filepath), ["-o"]), ] else: # We do not, so we get the data from the record marcxml_value = Record(obj.data.dumps()).legacy_export_as_marc() file_fd = open(filepath, 'w') file_fd.write(marcxml_value) file_fd.close() file_uploads = [(filepath, ["-r", "-i"])] task_id = None for location, mode in file_uploads: if os.path.exists(location): try: args = mode + [filepath] + default_args task_id = task_low_level_submission("bibupload", "oaiharvest", *tuple(args)) repo_id = repository.get("id") if repo_id: create_oaiharvest_log_str( task_id, repo_id, obj.get_data() ) except Exception as msg: eng.log.error( "An exception during submitting oaiharvest task occured : %s " % ( str(msg))) if task_id is None: eng.log.error("an error occurred while uploading %s from %s" % (filepath, repository.get("name", "Unknown"))) else: eng.log.info( "material harvested from source %s was successfully uploaded" % (repository.get("name", "Unknown"),)) eng.log.info("end of upload")