示例#1
0
class SimpleRecordDeposition(DepositionType):
    """Simple record submission - no support for editing nor REST API."""

    workflow = [
        # Pre-fill draft with values passed in from request
        prefill_draft(draft_id='default'),
        # Render form and wait for user to submit
        render_form(draft_id='default'),
        # Create the submission information package by merging form data
        # from all drafts (in this case only one draft exists).
        prepare_sip(),
        # Process metadata to match your JSONAlchemy record model. This will
        # call process_sip_metadata() on your subclass.
        process_sip_metadata(),
        # Reserve a new record id, so that we can provide proper feedback to
        # user before the record has been uploaded.
        create_recid(),
        # Generate MARC based on metadata dictionary.
        finalize_record_sip(is_dump=False),
        # Hold the deposition for admin approval
        hold_for_approval(),
        # Seal the SIP and write MARCXML file and call bibupload on it
        upload_record_sip(),
    ]

    hold_for_upload = False

    @classmethod
    def render_completed(cls, d):
        """Page to render when deposition was successfully completed."""
        ctx = dict(
            deposition=d,
            deposition_type=(None if d.type.is_default() else
                             d.type.get_identifier()),
            uuid=d.id,
            my_depositions=Deposition.get_depositions(current_user,
                                                      type=d.type),
            sip=d.get_latest_sip(),
            format_record=format_record,
        )

        return render_template('deposit/completed.html', **ctx)

    @classmethod
    def process_sip_metadata(cls, deposition, metadata):
        """Implement this method in your subclass to process"""
        """metadata prior to MARC generation."""
        pass
示例#2
0
class literature_simple(literature):

    """Literature deposit submission without approval."""

    workflow = [
        # Pre-fill draft with values passed in from request
        prefill_draft(draft_id='default'),
        # Render form and wait for user to submit
        render_form(draft_id='default'),
        # Create the submission information package by merging form data
        # from all drafts (in this case only one draft exists).
        prepare_sip(),
        # Process metadata to match your JSONAlchemy record model. This will
        # call process_sip_metadata() on your subclass.
        process_sip_metadata(),
        # Generate MARC based on metadata dictionary.
        finalize_record_sip(is_dump=False),
        # Upload the marcxml locally (good for debugging)
        upload_record_sip(),
    ]

    name = "Literature (No approval)"
    name_plural = "Literature (No approval) submissions"
示例#3
0
class upload(DepositionType):
    """
    Zenodo deposition workflow
    """
    workflow = [
        p.IF_ELSE(
            has_submission,
            # Existing deposition
            [
                # Load initial record
                load_record(
                    draft_id='_edit',
                    post_process=process_draft
                ),
                # Render the form and wait until it is completed
                render_form(draft_id='_edit'),
            ],
            # New deposition
            [
                # Load pre-filled data from cache
                prefill_draft(draft_id='_default'),
                # Render the form and wait until it is completed
                render_form(draft_id='_default'),
                # Test if all files are available for API
                api_validate_files(),
            ]
        ),
        # Create the submission information package by merging data
        # from all drafts - i.e. generate the recjson.
        prepare_sip(),
        p.IF_ELSE(
            has_submission,
            [
                # Process SIP recjson
                process_sip_metadata(process_recjson_edit),
                # Merge SIP metadata into record and generate MARC
                merge_record(
                    draft_id='_edit',
                    post_process_load=process_draft,
                    process_export=process_recjson_edit,
                    merge_func=merge,
                ),
                # Set file restrictions
                process_bibdocfile(process=process_files),
            ],
            [
                # Check for reserved recids.
                reserved_recid(),
                # Reserve a new record id
                create_recid(),
                # Register DOI in internal pid store.
                mint_pid(
                    pid_field='doi',
                    pid_store_type='doi',
                    pid_creator=lambda recjson: create_doi(
                        recid=recjson['recid']
                    )['doi'],
                    existing_pid_checker=check_existing_pid,
                ),
                # Process SIP metadata
                process_sip_metadata(process_recjson_new),
            ]
        ),
        # Generate MARC based on recjson structure
        finalize_record_sip(),
        p.IF_ELSE(
            has_submission,
            [
                # Seal the SIP and write MARCXML file and call bibupload on it
                upload_record_sip(),
                # Schedule background tasks.
                run_tasks(update=True),
            ],
            [
                # Note: after upload_record_sip(), has_submission will return
                # True no matter if it's a new or editing of a deposition.
                upload_record_sip(),
                run_tasks(update=False),
            ]
        ),
    ]
    name = "Upload"
    name_plural = "Uploads"
    editable = True
    stopable = True
    enabled = True
    default = True
    api = True
    draft_definitions = {
        '_default': ZenodoForm,
        '_edit': ZenodoEditForm,
    }

    marshal_metadata_fields = dict(
        access_right=fields.String,
        communities=fields.List(fields.Raw),
        conference_acronym=fields.String,
        conference_dates=fields.String,
        conference_place=fields.String,
        conference_title=fields.String,
        conference_url=fields.String,
        conference_session=fields.String,
        conference_session_part=fields.String,
        creators=fields.Raw(default=[]),
        description=fields.String,
        doi=fields.String(default=''),
        embargo_date=ISODate,
        grants=fields.List(fields.Raw),
        image_type=fields.String(default=''),
        imprint_isbn=fields.String,
        imprint_place=fields.String,
        imprint_publisher=fields.String,
        journal_issue=fields.String,
        journal_pages=fields.String,
        journal_title=fields.String,
        journal_volume=fields.String,
        keywords=fields.Raw(default=[]),
        license=fields.String,
        notes=fields.String(default=''),
        partof_pages=fields.String,
        partof_title=fields.String,
        prereserve_doi=fields.Raw,
        publication_date=ISODate,
        publication_type=fields.String(default=''),
        references=fields.List(fields.String, default=[]),
        related_identifiers=fields.Raw(default=[]),
        thesis_supervisors=fields.Raw(default=[]),
        title=fields.String,
        upload_type=fields.String,
    )

    marshal_metadata_edit_fields = marshal_metadata_fields.copy()
    del marshal_metadata_edit_fields['prereserve_doi']
    #marshal_metadata_edit_fields.update(dict(
    #    recid=fields.Integer,
    #    version_id=UTCISODateTime,
    #))

    marshal_deposition_fields = DepositionType.marshal_deposition_fields.copy()
    del marshal_deposition_fields['drafts']

    marshal_draft_fields = DepositionType.marshal_draft_fields.copy()
    marshal_draft_fields['metadata'] = fields.Nested(
        marshal_metadata_fields, attribute='values'
    )
    del marshal_draft_fields['id']
    del marshal_draft_fields['completed']

    @classmethod
    def default_draft_id(cls, deposition):
        if deposition.has_sip() and '_edit' in deposition.drafts:
            return '_edit'
        return '_default'

    @classmethod
    def marshal_deposition(cls, deposition):
        """
        Generate a JSON representation for REST API of a Deposition
        """
        # Get draft
        if deposition.has_sip() and '_edit' in deposition.drafts:
            draft = deposition.get_draft('_edit')
            metadata_fields = cls.marshal_metadata_edit_fields
        elif deposition.has_sip():
            # FIXME: Not based on latest available data in record.
            sip = deposition.get_latest_sip(sealed=True)
            draft = record_to_draft(
                Record.create(sip.package, master_format='marc'),
                post_process=process_draft
            )
            metadata_fields = cls.marshal_metadata_edit_fields
        else:
            draft = deposition.get_or_create_draft('_default')
            metadata_fields = cls.marshal_metadata_fields

        # Fix known differences in marshalling
        draft.values = filter_empty_elements(draft.values)
        if 'grants' not in draft.values:
            draft.values['grants'] = []

        # Set disabled values to None in output
        for field, flags in draft.flags.items():
            if 'disabled' in flags and field in draft.values:
                del draft.values[field]

        # Marshal deposition
        obj = marshal(deposition, cls.marshal_deposition_fields)
        # Marshal the metadata attribute
        obj['metadata'] = marshal(unicodifier(draft.values), metadata_fields)

        # Add record and DOI information from latest SIP
        for sip in deposition.sips:
            if sip.is_sealed():
                recjson = sip.metadata
                if recjson.get('recid'):
                    obj['record_id'] = fields.Integer().format(
                        recjson.get('recid')
                    )
                    obj['record_url'] = fields.String().format(url_for(
                        'record.metadata',
                        recid=recjson.get('recid'),
                        _external=True
                    ))
                if recjson.get('doi') and \
                   recjson.get('doi').startswith(CFG_DATACITE_DOI_PREFIX+"/"):
                    obj['doi'] = fields.String().format(recjson.get('doi'))
                    obj['doi_url'] = fields.String().format(
                        "http://dx.doi.org/%s" % obj['doi']
                    )
                break

        return obj

    @classmethod
    def marshal_draft(cls, obj):
        """
        Generate a JSON representation for REST API of a DepositionDraft
        """
        return marshal(obj, cls.marshal_draft_fields)

    @classmethod
    def api_action(cls, deposition, action_id):
        if action_id == 'publish':
            return deposition.run_workflow(headless=True)
        elif action_id == 'edit':
            # Trick: Works in combination with load_record task to provide
            # proper response codes to API clients.
            if deposition.state == 'done' or deposition.drafts:
                deposition.reinitialize_workflow()
            return deposition.run_workflow(headless=True)
        elif action_id == 'discard':
            deposition.stop_workflow()
            deposition.save()
            return deposition.marshal(), 201
        raise InvalidApiAction(action_id)

    @classmethod
    def api_metadata_schema(cls, draft_id):
        schema = super(upload, cls).api_metadata_schema(draft_id)
        if schema and draft_id == '_edit':
            if 'recid' in schema['schema']:
                del schema['schema']['recid']
            if 'modification_date' in schema['schema']:
                del schema['schema']['modification_date']
        return schema

    @classmethod
    def render_completed(cls, d):
        """
        Render page when deposition was successfully completed
        """
        ctx = dict(
            deposition=d,
            deposition_type=(
                None if d.type.is_default() else d.type.get_identifier()
            ),
            uuid=d.id,
            my_depositions=Deposition.get_depositions(
                current_user, type=d.type
            ),
            sip=d.get_latest_sip(),
            format_record=format_record,
        )

        return render_template('deposit/completed.html', **ctx)
示例#4
0
class literature(SimpleRecordDeposition, WorkflowBase):
    """Literature deposit submission."""

    object_type = "submission"

    workflow = [
        # Pre-fill draft with values passed in from request
        prefill_draft(draft_id='default'),
        # Render form and wait for user to submit
        render_form(draft_id='default'),
        add_files_to_task_results,
        # Create the submission information package by merging form data
        # from all drafts (in this case only one draft exists).
        prepare_sip(),
        # Process metadata to match your JSONAlchemy record model. This will
        # call process_sip_metadata() on your subclass.
        process_sip_metadata(),
        # Generate MARC based on metadata dictionary.
        finalize_record_sip(is_dump=False),
        halt_to_render,
        classify_paper_with_deposit(
            taxonomy="HEPont.rdf",
            output_mode="dict",
        ),
        halt_record_with_action(action="inspire_approval",
                                message="Accept submission?"),
        workflow_if(was_approved),
        [send_robotupload()],
        inform_submitter
    ]

    name = "Literature"
    name_plural = "Literature submissions"
    group = "Articles & Preprints"
    draft_definitions = {
        'default': LiteratureForm,
    }

    @staticmethod
    def get_title(bwo):
        """Return title of object."""
        deposit_object = Deposition(bwo)
        sip = deposit_object.get_latest_sip()
        if sip:
            # Get the SmartJSON object
            record = sip.metadata
            return record.get("title", {"title": "No title"}).get("title")
        else:
            return "User submission in progress!!"

    @staticmethod
    def get_description(bwo):
        """Return description of object."""
        deposit_object = Deposition(bwo)
        sip = deposit_object.get_latest_sip()
        if sip:
            record = sip.metadata
            identifiers = [record.get("arxiv_id", "")]
            categories = [record.get("type_of_doc", "")]
            return render_template('workflows/styles/submission_record.html',
                                   categories=categories,
                                   identifiers=identifiers)
        else:
            from invenio.modules.access.control import acc_get_user_email
            id_user = deposit_object.workflow_object.id_user
            return "Submitted by: %s" % str(acc_get_user_email(id_user))

    @staticmethod
    def formatter(bwo, **kwargs):
        """Return formatted data of object."""
        from invenio.modules.formatter.engine import format_record
        deposit_object = Deposition(bwo)
        submission_data = deposit_object.get_latest_sip()
        marcxml = submission_data.package

        of = kwargs.get("format", "hd")
        if of == "xm":
            return marcxml
        else:
            return format_record(recID=None,
                                 of=kwargs.get("format", "hd"),
                                 xml_record=marcxml)

    @classmethod
    #TODO: ensure that this regex is correct
    def match_authors_initials(self, author_name):
        """Check if author's name contains only its initials."""
        return not bool(re.compile(r'[^A-Z. ]').search(author_name))

    @classmethod
    def process_sip_metadata(cls, deposition, metadata):
        """Map fields to match jsonalchemy configuration."""
        delete_keys = []
        field_list = ['abstract', 'title']

        # maps from a form field to the corresponding MarcXML field
        field_map = {
            'abstract': "summary",
            'title': "title",
            'subject_term': "term",
            'defense_date': "date",
            'university': "university",
            'degree_type': "degree_type",
            'journal_title': "journal_title",
            'page_range': "page_artid",
            'article_id': "page_artid",
            'volume': "journal_volume",
            'year': "year",
            'issue': "journal_issue"
        }

        # ============================
        # Abstract, Title and Subjects
        # ============================
        for field in field_list:
            if field in metadata:
                tmp_field = metadata[field]
                metadata[field] = {}
                metadata[field][field_map[field]] = tmp_field

        if "subject_term" in metadata:
            tmp_field = metadata["subject_term"]
            metadata["subject_term"] = [{
                "term": t,
                "scheme": "INSPIRE"
            } for t in tmp_field]

        # =======
        # Authors
        # =======
        metadata['authors'] = filter(None, metadata['authors'])
        if 'authors' in metadata and metadata['authors']:
            first_author = metadata['authors'][0].get('full_name').split(',')
            if len(first_author) > 1 and \
                    literature.match_authors_initials(first_author[1]):
                first_author[1] = first_author[1].replace(' ', '')
                metadata['authors'][0]['full_name'] = ",".join(first_author)
            metadata['_first_author'] = metadata['authors'][0]
            metadata['_first_author']['email'] = ''
            if metadata['authors'][1:]:
                metadata['_additional_authors'] = metadata['authors'][1:]
                for k in metadata['_additional_authors']:
                    try:
                        additional_author = k.get('full_name').split(',')
                        if len(additional_author) > 1 and \
                                literature.match_authors_initials(additional_author[1]):
                            additional_author[1] = additional_author[
                                1].replace(' ', '')
                            k['full_name'] = ",".join(additional_author)
                        k['email'] = ''
                    except AttributeError:
                        pass
            delete_keys.append('authors')

        # ===========
        # Supervisors
        # ===========
        if 'supervisors' in metadata and metadata['supervisors']:
            metadata['thesis_supervisor'] = metadata['supervisors'][0]
            metadata['thesis_supervisor']['email'] = ''
            #metadata['_additional_authors'] = metadata['authors'][1:]
            delete_keys.append('supervisors')

        # ==============
        # Thesis related
        # ==============
        thesis_fields = filter(lambda field: field in metadata,
                               ['defense_date', 'university', 'degree_type'])
        if thesis_fields:
            metadata['thesis'] = {}

            for field in thesis_fields:
                metadata['thesis'][field_map[field]] = metadata[field]

            delete_keys.extend(thesis_fields)

        # ========
        # Category
        # ========
        metadata['collections'] = [{'primary': "HEP"}]

        # ===============
        # Abstract source
        # ===============
        if 'title_arXiv' in metadata:
            metadata['abstract']['source'] = 'arXiv'

        # ========
        # arXiv ID
        # ========
        if 'arxiv_id' in metadata:
            metadata['report_number'] = "$$9arXiv$$aoai:arXiv.org:" + metadata[
                'arxiv_id']

        # ========
        # Language
        # ========
        metadata['language'] = unicode(
            dict(LiteratureForm.languages).get(metadata['language']))

        # ==========
        # Experiment
        # ==========
        if 'experiment' in metadata:
            metadata['accelerator_experiment'] = {}
            metadata['accelerator_experiment']['experiment'] = metadata[
                'experiment']
            delete_keys.append('experiment')

        # ===============
        # Conference Info
        # ===============
        if 'conf_name' in metadata:
            if 'nonpublic_note' in metadata:
                field = [metadata['nonpublic_note'], metadata['conf_name']]
                metadata['nonpublic_note'] = field
            else:
                metadata['nonpublic_note'] = [metadata['conf_name']]
            metadata['collections'].extend([{'primary': "ConferencePaper"}])
            delete_keys.append('conf_name')

        # =======
        # License
        # =======
        if 'license_url' in metadata:
            metadata['license'] = {}
            metadata['license']['url'] = metadata['license_url']
            delete_keys.append('license_url')

        # ===========
        # Files (FFT)
        # ===========
        if 'fft' in metadata and metadata['fft']:
            fft = metadata['fft']
            metadata['fft'] = {}
            metadata['fft']['url'] = fft[0]['path']

        # ================
        # Publication Info
        # ================
        publication_fields = filter(lambda field: field in metadata, [
            'journal_title', 'page_range', 'article_id', 'volume', 'year',
            'issue'
        ])
        if publication_fields:
            metadata['publication_info'] = {}

            for field in publication_fields:
                metadata['publication_info'][
                    field_map[field]] = metadata[field]

            if 'page_nr' not in metadata and 'page_range' in publication_fields:
                pages = metadata['page_range'].split('-')
                if len(pages) == 2:
                    try:
                        metadata['page_nr'] = int(pages[1]) - int(pages[0])
                    except ValueError:
                        pass

            delete_keys.extend(publication_fields)

            if 'nonpublic_note' in metadata and len(
                    metadata['nonpublic_note']) > 1:
                del metadata['nonpublic_note'][0]

            if {'primary': "ConferencePaper"} in metadata['collections']:
                metadata['collections'].remove({'primary': "ConferencePaper"})
            metadata['collections'].append({'primary': "Published"})

        # ===================
        # Delete useless data
        # ===================
        for key in delete_keys:
            del metadata[key]