def do_upgrade(): """Implement your upgrades here.""" from invenio.modules.workflows.models import BibWorkflowObject from invenio.modules.deposit.models import Deposition q = BibWorkflowObject.query.filter(BibWorkflowObject.id_user != 0).all() for b in q: try: d = Deposition(b) except KeyError: logger.info("Fixing data in {}".format(b.id)) b.set_data( dict( type='upload', title='Untitled', files=[], drafts={}, sips=[], )) d = Deposition(b) d.save() s = _get_state(d) c = str(b.get_extra_data().get('_task_counter')) co = str(b.get_extra_data().get('task_counter')) if s == 'inprogress-new': if c == "[0, 0, 3, 1]": b.save(task_counter=[0, 3, 1]) elif c == "[0, 0, 3, 2]": b.save(task_counter=[0, 3, 2]) elif c == "None" and co == "[0, 0, 3, 1]": b.save(task_counter=[0, 3, 1]) elif s == 'inprogress-edit': if c == "[0, 0, 1, 0]": b.save(task_counter=[0, 1, 0]) elif c == "[0, 0, 1, 1]": b.save(task_counter=[0, 1, 1]) elif c == "[0, 4, 3, 0]": b.save(task_counter=[4, 3, 0]) elif c == "None" and co == "[0, 0, 1, 1]": b.save(task_counter=[0, 1, 1]) elif c == "None" and co == "[0, 0, 1, 0]": b.save(task_counter=[0, 1, 0]) elif b.id == 2076: b.save(task_counter=[4, 3, 0]) b.workflow.save(status=4) elif s == 'error': if c == "[0, 0, 1, 0]": b.save(task_counter=[0, 1, 0]) elif c == "[0, 0, 1, 1]": b.save(task_counter=[0, 1, 1]) elif c == "[0, 0, 3, 1]": b.save(task_counter=[0, 3, 1]) elif c == "[0, 0, 3, 2]": b.save(task_counter=[0, 3, 2]) elif c == "[0, 4, 3, 0]": b.save(task_counter=[4, 3, 0]) elif c == "[0, 4, 3, 1]": b.save(task_counter=[4, 3, 1])
def create(obj, dummy_eng): #FIXME change share tmp directory from invenio.config import CFG_TMPSHAREDDIR from invenio.legacy.bibsched.bibtask import task_low_level_submission, \ bibtask_allocate_sequenceid d = Deposition(obj) sip = d.get_latest_sip(sealed=False) sip.seal() tmp_file_fd, tmp_file_path = mkstemp( prefix="webdeposit-%s-%s" % (d.id, sip.uuid), suffix='.xml', dir=CFG_TMPSHAREDDIR, ) os.write(tmp_file_fd, sip.package) os.close(tmp_file_fd) # Trick to have access to task_sequence_id in subsequent tasks. d.workflow_object.task_sequence_id = bibtask_allocate_sequenceid() task_id = task_low_level_submission( 'bibupload', 'webdeposit', '-r' if 'recid' in sip.metadata else '-i', tmp_file_path, '-I', str(d.workflow_object.task_sequence_id)) sip.task_ids.append(task_id) d.update()
def _finalize_sip(obj, dummy_eng): d = Deposition(obj) sip = d.get_latest_sip(sealed=False) sip.package = make_record( sip.metadata, is_dump=is_dump ).legacy_export_as_marc() d.update()
def _mint_pid(obj, dummy_eng): d = Deposition(obj) recjson = d.get_latest_sip(sealed=False).metadata if 'recid' not in recjson: raise Exception("'recid' not found in sip metadata.") pid_text = None pid = recjson.get(pid_field, None) if not pid: # No pid found in recjson, so create new pid with user supplied # function. pid_text = recjson[pid_field] = pid_creator(recjson) else: # Pid found - check if it should be minted if existing_pid_checker and existing_pid_checker(pid, recjson): pid_text = pid # Create an assign pid internally - actually registration will happen # asynchronously later. if pid_text: current_app.logger.info("Registering pid %s" % pid_text) pid_obj = PersistentIdentifier.create(pid_store_type, pid_text) if pid_obj is None: pid_obj = PersistentIdentifier.get(pid_store_type, pid_text) try: pid_obj.assign("rec", recjson['recid']) except Exception: register_exception(alert_admin=True) d.update()
def _classify_paper_with_deposit(obj, eng): from invenio.modules.deposit.models import Deposition deposition = Deposition(obj) data = None if not fast_mode: for f in deposition.files: if f.name and ".pdf" in f.name.lower(): data = f.get_syspath() break callback = bibclassify_exhaustive_call if not data: try: metadata = deposition.get_latest_sip().metadata except AttributeError as err: obj.log.error("Error getting data: {0}".format(err)) data = [ metadata.get("title", {}).get("title", ""), metadata.get("abstract", {}).get("summary", "") ] callback = bibclassify_exhaustive_call_text classify_paper(obj, eng, callback, data, taxonomy, rebuild_cache, no_cache, output_mode, output_limit, spires, match_mode, with_author_keywords, extract_acronyms, only_core_tags, fast_mode)
def _prefill_draft(obj, eng): if not getattr(request, 'is_api_request', False): draft_cache = DepositionDraftCacheManager.get() if draft_cache.has_data(): d = Deposition(obj) draft_cache.fill_draft(d, draft_id, clear=clear) d.update()
def _run_tasks(obj, dummy_eng): from invenio.legacy.bibsched.bibtask import task_low_level_submission d = Deposition(obj) sip = d.get_latest_sip(sealed=True) recid = sip.metadata['recid'] communities = sip.metadata.get('provisional_communities', []) common_args = [] sequenceid = getattr(d.workflow_object, 'task_sequence_id', None) if sequenceid: common_args += ['-I', str(sequenceid)] if update: tasklet_name = 'bst_openaire_update_upload' else: tasklet_name = 'bst_openaire_new_upload' task_id = task_low_level_submission( 'bibtasklet', 'webdeposit', '-T', tasklet_name, '--argument', 'recid=%s' % recid, *common_args ) sip.task_ids.append(task_id) for c in communities: task_id = task_low_level_submission( 'webcoll', 'webdeposit', '-c', 'provisional-user-%s' % c, *common_args ) sip.task_ids.append(task_id) d.update()
def test_load_workflow(self): from invenio.modules.workflows.models import BibWorkflowObject from invenio.modules.deposit.models import Deposition q = BibWorkflowObject.query.filter( BibWorkflowObject.id_user != 0).all() for b in q: Deposition(b)
def _reserved_recid(obj, dummy_eng): d = Deposition(obj) sip = d.get_latest_sip(sealed=False) reserved_doi = sip.metadata.get('prereserve_doi', None) if reserved_doi and reserved_doi['recid']: sip.metadata['recid'] = reserved_doi['recid'] d.update()
def _create_obj(o): try: obj = Deposition(o) except InvalidDepositionType as err: current_app.logger.exception(err) return None if type is None or obj.type == type: return obj return None
def _bibdocfile_update(obj, eng): if process: d = Deposition(obj) sip = d.get_latest_sip(sealed=False) recid = sip.metadata.get('recid') if recid: brd = BibRecDocs(int(recid)) process(d, brd) d.update()
def get_title(bwo): """Return title of object.""" deposit_object = Deposition(bwo) sip = deposit_object.get_latest_sip() if sip: # Get the SmartJSON object record = sip.metadata return record.get("title", {"title": "No title"}).get("title") else: return "User submission in progress!!"
def _create_recid(obj, dummy_eng): d = Deposition(obj) sip = d.get_latest_sip(sealed=False) if sip is None: raise Exception("No submission information package found.") if 'recid' not in sip.metadata: from invenio.legacy.bibupload.engine import create_new_record sip.metadata['recid'] = create_new_record() d.update()
def _process_sip(obj, dummy_eng): d = Deposition(obj) metadata = d.get_latest_sip(sealed=False).metadata if processor is not None: processor(d, metadata) elif processor is None and hasattr(d.type, 'process_sip_metadata'): d.type.process_sip_metadata(d, metadata) d.update()
def add_files_to_task_results(obj, eng): """Add Deposition attached files to task results.""" from invenio.modules.deposit.models import Deposition d = Deposition(obj) for file_obj in d.files: fileinfo = { "type": "file", "filename": file_obj.name, "full_path": file_obj.get_syspath(), } obj.add_task_result(file_obj.name, fileinfo, "workflows/results/files.html")
def _merge_record(obj, eng): d = Deposition(obj) sip = d.get_latest_sip(sealed=False) # Get the current record, which contains all fields. current_record = get_record( sip.metadata.get('recid'), reset_cache=True ) form_class = d.get_draft(draft_id).form_class # Create a simplified record from the current record, that only # contains fields concerning this deposition. current_simple_record = deposition_record( current_record, [form_class], pre_process_load=pre_process_load, post_process_load=post_process_load, process_export=partial(process_export, d), ) # Create a simplified record from the changes the user have made. changed_simple_record = make_record(sip.metadata, is_dump=True) # Make an initial patch of current record (e.g. some default values set # by the form, might not exists in the current record) for k in current_simple_record: if k not in current_record: current_record[k] = current_simple_record[k] # Export clean dumps current_simple_json = current_simple_record.dumps(clean=True) changed_simple_json = changed_simple_record.dumps(clean=True) current_full_json = current_record.dumps(clean=True) # Merge changes from changed record into the current record. sip.metadata = merge_func( d, current_full_json, current_simple_json, changed_simple_json, ) # Ensure we are based on latest version_id to prevent being rejected in # the bibupload queue. hst_record = HstRECORD.query.filter_by( id_bibrec=sip.metadata.get('recid') ).order_by(HstRECORD.job_date.desc()).first() sip.metadata['modification_date'] = hst_record.job_date.isoformat() d.update()
def inform_submitter(obj, eng): """Send a mail to submitter with the outcome of the submission.""" from invenio.modules.access.control import acc_get_user_email from invenio.ext.email import send_email d = Deposition(obj) id_user = d.workflow_object.id_user email = acc_get_user_email(id_user) if was_approved(obj, eng): body = 'Accepted: ' extra_data = d.workflow_object.get_extra_data() body += extra_data.get('url', '') else: body = 'Rejected' send_email(CFG_SITE_SUPPORT_EMAIL, email, 'Subject', body, header='header')
def formatter(bwo, **kwargs): """Return formatted data of object.""" from invenio.modules.formatter.engine import format_record deposit_object = Deposition(bwo) submission_data = deposit_object.get_latest_sip() marcxml = submission_data.package of = kwargs.get("format", "hd") if of == "xm": return marcxml else: return format_record(recID=None, of=kwargs.get("format", "hd"), xml_record=marcxml)
def get_description(bwo): """Return description of object.""" deposit_object = Deposition(bwo) sip = deposit_object.get_latest_sip() if sip: record = sip.metadata identifiers = [record.get("arxiv_id", "")] categories = [record.get("type_of_doc", "")] return render_template('workflows/styles/submission_record.html', categories=categories, identifiers=identifiers) else: from invenio.modules.access.control import acc_get_user_email id_user = deposit_object.workflow_object.id_user return "Submitted by: %s" % str(acc_get_user_email(id_user))
def _prepare_sip(obj, dummy_eng): d = Deposition(obj) sip = d.get_latest_sip(sealed=False) if sip is None: sip = d.create_sip() # FIXME: Move to somewhere more appropriate # create_sip by default stick files into the files attribute. if 'files' in sip.metadata: sip.metadata['fft'] = sip.metadata['files'] del sip.metadata['files'] sip.agents = [Agent(role='creator', from_request_context=True)] d.update()
def _render_form(obj, eng): d = Deposition(obj) draft = d.get_or_create_draft(draft_id) if getattr(request, 'is_api_request', False): form = draft.get_form(validate_draft=True) if form.errors: error_messages = [] for field, msgs in form.errors: for m in msgs: error_messages.append( field=field, message=m, code=error_codes['validation_error'], ) d.set_render_context(dict( response=dict( message="Bad request", status=400, errors=error_messages, ), status=400, )) eng.halt("API: Draft did not validate") else: if draft.is_completed(): eng.jumpCallForward(1) else: form = draft.get_form(validate_draft=draft.validate) form.validate = True d.set_render_context(dict( template_name_or_list=form.get_template(), deposition=d, deposition_type=( None if d.type.is_default() else d.type.get_identifier() ), uuid=d.id, draft=draft, form=form, my_depositions=list(Deposition.get_depositions( current_user, type=d.type )), )) d.update() eng.halt('Wait for form submission.')
def halt_to_render(obj, eng): """Halt the workflow - waiting to be resumed.""" d = Deposition(obj) sip = d.get_latest_sip(sealed=False) d.set_render_context( dict( template_name_or_list="deposit/pending.html", deposition=d, deposition_type=(None if d.type.is_default() else d.type.get_identifier()), uuid=d.id, sip=sip, my_depositions=Deposition.get_depositions(current_user, type=d.type), format_record=format_record, )) obj.last_task = "halt_to_render" eng.halt("User submission complete.")
def _send_robotupload(obj, eng): from invenio.modules.deposit.models import Deposition from invenio.modules.workflows.errors import WorkflowError from inspire.utils.robotupload import make_robotupload_marcxml from invenio.base.globals import cfg d = Deposition(obj) sip = d.get_latest_sip(d.submitted) if not sip: raise WorkflowError("No sip found", eng.uuid, obj.id) if not d.submitted: sip.seal() d.update() if url is None: base_url = cfg.get("CFG_ROBOTUPLOAD_SUBMISSION_BASEURL") callback_url = os.path.join(cfg["CFG_SITE_URL"], "callback/workflows/robotupload") obj.log.info("Sending Robotupload to {0} with callback {1}".format( base_url, callback_url)) result = make_robotupload_marcxml(url=base_url, marcxml=sip.package, callback_url=callback_url, nonce=obj.id) if "[INFO]" not in result.text: if "cannot use the service" in result.text: # IP not in the list obj.log.error("Your IP is not in " "CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS " "on host") obj.log.error(result.text) from invenio.modules.workflows.errors import WorkflowError txt = "Error while submitting robotupload: {0}".format(result.text) raise WorkflowError(txt, eng.uuid, obj.id) else: obj.log.info("Robotupload sent!") obj.log.info(result.text) eng.halt("Waiting for robotupload: {0}".format(result.text))
def _api_validate_files(obj, eng): if getattr(request, 'is_api_request', False): d = Deposition(obj) if len(d.files) < 1: d.set_render_context(dict( response=dict( message="Bad request", status=400, errors=[dict( message="Minimum one file must be provided.", code=error_codes['validation_error'] )], ), status=400, )) d.update() eng.halt("API: No files provided") else: # Mark all drafts as completed for draft in d.drafts.values(): draft.complete() d.update()
def do_upgrade(): """ Implement your upgrades here """ from invenio.modules.workflows.models import BibWorkflowObject from invenio.modules.workflows.engine import ObjectVersion, WorkflowStatus from invenio.modules.deposit.models import Deposition for o in BibWorkflowObject.query.filter(BibWorkflowObject.id_user!=0).all(): d = Deposition(o) if is_error(d): warn(o, 'ERROR', "run workflow") sip = d.get_latest_sip(sealed=False) for k in ['first_author', 'additional_authors']: if k in sip.metadata: sip.metadata['_%s' % k] = sip.metadata[k] del sip.metadata[k] d.run_workflow(headless=True) elif is_done(d): if o.version != ObjectVersion.COMPLETED or o.workflow.status != WorkflowStatus.COMPLETED: if o.version == ObjectVersion.HALTED and o.workflow.status == ObjectVersion.HALTED: warn(o, 'DONE', "wf status %s -> %s" % (o.workflow.status, WorkflowStatus.COMPLETED)) warn(o, 'DONE', "obj version %s -> %s" % (o.version, ObjectVersion.COMPLETED)) o.workflow.status = WorkflowStatus.COMPLETED o.version = ObjectVersion.COMPLETED elif o.version == ObjectVersion.COMPLETED and o.workflow.status == 5: warn(o, 'DONE', "wf status %s -> %s" % (o.workflow.status, WorkflowStatus.COMPLETED)) o.workflow.status = WorkflowStatus.COMPLETED elif o.version == ObjectVersion.HALTED and o.workflow.status == WorkflowStatus.COMPLETED: warn(o, 'DONE', "obj version %s -> %s" % (o.version, ObjectVersion.COMPLETED)) o.version = ObjectVersion.COMPLETED else: warn(o, 'DONE', "Unmatched version %s status %s" % (o.version, o.workflow.status if o.workflow else None)) else: info_msg(o, 'DONE') elif is_inprogress(d): if is_submitted(d): if o.version == ObjectVersion.HALTED and o.workflow.status == WorkflowStatus.HALTED: info_msg(o, 'INPROGRESS/SUBMITTED') elif o.version == ObjectVersion.INITIAL and o.workflow.status == WorkflowStatus.NEW: info_msg(o, 'INPROGRESS/SUBMITTED') elif o.version == ObjectVersion.HALTED and o.workflow.status == WorkflowStatus.COMPLETED: warn(o, 'INPROGRESS/SUBMITTED', "wf status %s -> %s" % (o.workflow.status, WorkflowStatus.HALTED)) o.workflow.status = WorkflowStatus.HALTED else: warn(o, 'INPROGRESS/SUBMITTED', "Unmatched version %s status %s" % (o.version, o.workflow.status if o.workflow else None)) elif is_unsubmitted(d): if o.workflow is None: if o.version != ObjectVersion.INITIAL: warn(o, 'INPROGRESS/UNSUBMITTED', "Unmatched version %s status %s" % (o.version, o.workflow.status if o.workflow else None)) else: info_msg(o, 'INPROGRESS/UNSUBMITTED') elif o.version == ObjectVersion.HALTED and o.workflow.status == WorkflowStatus.HALTED: info_msg(o, 'INPROGRESS/UNSUBMITTED') elif o.version == ObjectVersion.HALTED and o.workflow.status == WorkflowStatus.RUNNING: warn(o, 'INPROGRESS/UNSUBMITTED', "wf status %s -> %s" % (o.workflow.status, WorkflowStatus.HALTED)) o.workflow.status = WorkflowStatus.HALTED elif o.version == ObjectVersion.RUNNING and o.workflow.status == WorkflowStatus.RUNNING: warn(o, 'INPROGRESS/UNSUBMITTED', "wf status %s -> %s" % (o.workflow.status, WorkflowStatus.HALTED)) warn(o, 'INPROGRESS/UNSUBMITTED', "obj version %s -> %s" % (o.version, ObjectVersion.HALTED)) o.version = ObjectVersion.HALTED o.workflow.status = WorkflowStatus.HALTED elif o.version == ObjectVersion.HALTED and o.workflow.status == WorkflowStatus.COMPLETED: warn(o, 'INPROGRESS/UNSUBMITTED', "wf status %s -> %s" % (o.workflow.status, WorkflowStatus.HALTED)) o.workflow.status = WorkflowStatus.HALTED else: warn(o, 'INPROGRESS/UNSUBMITTED', "Unmatched version %s status %s" % (o.version, o.workflow.status if o.workflow else None)) else: warn(o, 'INPROGRESS/?', "Unmatched version %s status %s" % (o.version, o.workflow.status if o.workflow else None)) global good, bad warnings.warn("Good: %s Bad: %s" % (good, bad)) db.session.commit()
def _hold_for_approval(obj, dummy_eng): from invenio.modules.workflows.tasks.marcxml_tasks import approve_record d = Deposition(obj) if d.type.hold_for_upload: approve_record(obj, dummy_eng)
def _load_record(obj, eng): d = Deposition(obj) sip = d.get_latest_sip(sealed=True) record = get_record(sip.metadata.get('recid'), reset_cache=True) if not is_sip_uploaded(sip, record=record): if getattr(request, 'is_api_request', False): d.set_render_context( dict( response=dict( message="Conflict", status=409, errors="Upload not yet fully integrated. Please wait" " a few moments.", ), status=409, )) else: from flask import flash flash( "Editing is only possible after your upload have been" " fully integrated. Please wait a few moments, then try" " to reload the page.", category='warning') d.set_render_context( dict( template_name_or_list="deposit/completed.html", deposition=d, deposition_type=(None if d.type.is_default() else d.type.get_identifier()), uuid=d.id, sip=sip, my_depositions=Deposition.get_depositions(current_user, type=d.type), format_record=format_record, )) d.update() eng.halt("Wait for record to be uploaded") # Check if record is already loaded, if so, skip. if d.drafts: eng.jumpCallForward(1) # Load draft draft = d.get_or_create_draft(draft_id) # Fill draft with values from recjson record_to_draft(record, draft=draft, post_process=post_process, producer=producer) d.update() # Stop API request if getattr(request, 'is_api_request', False): d.set_render_context(dict( response=d.marshal(), status=201, )) eng.halt("API request")
def has_submission(obj, eng): """Check if deposition has submission.""" d = Deposition(obj) return d.has_sip()