예제 #1
0
파일: legacyjson.py 프로젝트: stain/zenodo
class LegacyMetadataSchemaV1(common.CommonMetadataSchemaV1):
    """Legacy JSON metadata."""

    upload_type = fields.String(
        attribute='resource_type.type',
        required=True,
        validate=validate.OneOf(choices=ObjectType.get_types()),
    )
    publication_type = fields.Method(
        'dump_publication_type',
        attribute='resource_type.subtype',
        validate=validate.OneOf(
            choices=ObjectType.get_subtypes('publication')),
    )
    image_type = fields.Method(
        'dump_image_type',
        attribute='resource_type.subtype',
        validate=validate.OneOf(choices=ObjectType.get_subtypes('image')),
    )
    openaire_type = fields.Method('dump_openaire_type',
                                  attribute='resource_type.openaire_subtype')

    license = fields.Method('dump_license', 'load_license')
    communities = fields.Method('dump_communities', 'load_communities')
    grants = fields.Method('dump_grants', 'load_grants')

    prereserve_doi = fields.Method('dump_prereservedoi', 'load_prereservedoi')

    journal_title = SanitizedUnicode(attribute='journal.title')
    journal_volume = SanitizedUnicode(attribute='journal.volume')
    journal_issue = SanitizedUnicode(attribute='journal.issue')
    journal_pages = SanitizedUnicode(attribute='journal.pages')

    conference_title = SanitizedUnicode(attribute='meeting.title')
    conference_acronym = SanitizedUnicode(attribute='meeting.acronym')
    conference_dates = SanitizedUnicode(attribute='meeting.dates')
    conference_place = SanitizedUnicode(attribute='meeting.place')
    conference_url = SanitizedUrl(attribute='meeting.url')
    conference_session = SanitizedUnicode(attribute='meeting.session')
    conference_session_part = SanitizedUnicode(
        attribute='meeting.session_part')

    imprint_isbn = SanitizedUnicode(attribute='imprint.isbn')
    imprint_place = SanitizedUnicode(attribute='imprint.place')
    imprint_publisher = SanitizedUnicode(attribute='imprint.publisher')

    partof_pages = SanitizedUnicode(attribute='part_of.pages')
    partof_title = SanitizedUnicode(attribute='part_of.title')

    thesis_university = SanitizedUnicode(attribute='thesis.university')
    thesis_supervisors = fields.Nested(common.PersonSchemaV1,
                                       many=True,
                                       attribute='thesis.supervisors')

    def _dump_subtype(self, obj, type_):
        """Get subtype."""
        if obj.get('resource_type', {}).get('type') == type_:
            return obj.get('resource_type', {}).get('subtype', missing)
        return missing

    def dump_publication_type(self, obj):
        """Get publication type."""
        return self._dump_subtype(obj, 'publication')

    def dump_image_type(self, obj):
        """Get publication type."""
        return self._dump_subtype(obj, 'image')

    def dump_openaire_type(self, obj):
        """Get OpenAIRE type."""
        return obj.get('resource_type', {}).get('openaire_subtype', missing)

    def dump_license(self, obj):
        """Dump license."""
        return obj.get('license', {}).get('id', missing)

    def load_license(self, data):
        """Load license."""
        if isinstance(data, six.string_types):
            license = data
        if isinstance(data, dict):
            license = data['id']
        return {'$ref': 'https://dx.zenodo.org/licenses/{0}'.format(license)}

    def dump_grants(self, obj):
        """Get grants."""
        res = []
        for g in obj.get('grants', []):
            if g.get('program', {}) == 'FP7' and \
                    g.get('funder', {}).get('doi') == '10.13039/501100000780':
                res.append(dict(id=g['code']))
            else:
                res.append(dict(id=g['internal_id']))
        return res or missing

    def load_grants(self, data):
        """Load grants."""
        if not isinstance(data, list):
            raise ValidationError(_('Not a list.'))
        result = set()
        errors = set()
        for g in data:
            if not isinstance(g, dict):
                raise ValidationError(_('Element not an object.'))
            g = g.get('id')
            if not g:
                continue
            # FP7 project grant
            if not g.startswith('10.13039/'):
                g = '10.13039/501100000780::{0}'.format(g)
            # Check that the PID exists
            grant_pid = PersistentIdentifier.query.filter_by(
                pid_type='grant', pid_value=g).one_or_none()
            if not grant_pid or grant_pid.status != PIDStatus.REGISTERED:
                errors.add(g)
                continue
            result.add(g)
        if errors:
            raise ValidationError('Invalid grant ID(s): {0}'.format(
                ', '.join(errors)),
                                  field_names='grants')
        return [{
            '$ref': 'https://dx.zenodo.org/grants/{0}'.format(grant_id)
        } for grant_id in result] or missing

    def dump_communities(self, obj):
        """Dump communities type."""
        return [dict(identifier=x) for x in obj.get('communities', [])] \
            or missing

    def load_communities(self, data):
        """Load communities type."""
        if not isinstance(data, list):
            raise ValidationError(_('Not a list.'))
        invalid_format_comms = [
            c for c in data if not (isinstance(c, dict) and 'identifier' in c)
        ]
        if invalid_format_comms:
            raise ValidationError(
                'Invalid community format: {}.'.format(invalid_format_comms),
                field_names='communities')

        comm_ids = list(
            sorted([x['identifier'] for x in data if x.get('identifier')]))
        errors = {c for c in comm_ids if not Community.get(c)}
        if errors:
            raise ValidationError('Invalid communities: {0}'.format(
                ', '.join(errors)),
                                  field_names='communities')
        return comm_ids or missing

    def dump_prereservedoi(self, obj):
        """Dump pre-reserved DOI."""
        recid = obj.get('recid')
        if recid:
            prefix = None
            if not current_app:
                prefix = '10.5072'  # Test prefix

            return dict(
                recid=recid,
                doi=doi_generator(recid, prefix=prefix),
            )
        return missing

    def load_prereservedoi(self, obj):
        """Load pre-reserved DOI.

        The value is not important as we do not store it. Since the deposit and
        record id are now the same
        """
        return missing

    @pre_dump()
    def predump_related_identifiers(self, data):
        """Split related/alternate identifiers.

        This ensures that we can just use the base schemas definitions of
        related/alternate identifies.
        """
        relids = data.pop('related_identifiers', [])
        alids = data.pop('alternate_identifiers', [])

        for a in alids:
            a['relation'] = 'isAlternateIdentifier'

        if relids or alids:
            data['related_identifiers'] = relids + alids

        return data

    @pre_load()
    def preload_related_identifiers(self, data):
        """Split related/alternate identifiers.

        This ensures that we can just use the base schemas definitions of
        related/alternate identifies for loading.
        """
        # Legacy API does not accept alternate_identifiers, so force delete it.
        data.pop('alternate_identifiers', None)

        for r in data.pop('related_identifiers', []):
            # Problem that API accepted one relation while documentation
            # presented a different relation.
            if r.get('relation') in [
                    'isAlternativeIdentifier', 'isAlternateIdentifier'
            ]:
                k = 'alternate_identifiers'
                r.pop('relation')
            else:
                k = 'related_identifiers'

            data.setdefault(k, [])
            data[k].append(r)

    @pre_load()
    def preload_resource_type(self, data):
        """Prepare data for easier deserialization."""
        if data.get('upload_type') != 'publication':
            data.pop('publication_type', None)
        if data.get('upload_type') != 'image':
            data.pop('image_type', None)

    @pre_load()
    def preload_license(self, data):
        """Default license."""
        acc = data.get('access_right', AccessRight.OPEN)
        if acc in [AccessRight.OPEN, AccessRight.EMBARGOED]:
            if 'license' not in data:
                if data.get('upload_type') == 'dataset':
                    data['license'] = 'CC0-1.0'
                else:
                    data['license'] = 'CC-BY-4.0'

    @post_load()
    def merge_keys(self, data):
        """Merge dot keys."""
        prefixes = [
            'resource_type',
            'journal',
            'meeting',
            'imprint',
            'part_of',
            'thesis',
        ]

        for p in prefixes:
            for k in list(data.keys()):
                if k.startswith('{0}.'.format(p)):
                    key, subkey = k.split('.')
                    if key not in data:
                        data[key] = dict()
                    data[key][subkey] = data.pop(k)

        # Pre-reserve DOI is implemented differently now.
        data.pop('prereserve_doi', None)

    @validates('communities')
    def validate_communities(self, values):
        """Validate communities."""
        for v in values:
            if not isinstance(v, six.string_types):
                raise ValidationError(_('Invalid community identifier.'),
                                      field_names=['communities'])

    @validates_schema
    def validate_data(self, obj):
        """Validate resource type."""
        type_ = obj.get('resource_type', {}).get('type')
        if type_ in ['publication', 'image']:
            type_dict = {
                'type': type_,
                'subtype': obj.get('resource_type', {}).get('subtype')
            }
            field_names = ['{0}_type'.format(type_)]
        else:
            type_dict = {'type': type_}
            field_names = ['upload_type']

        if ObjectType.get_by_dict(type_dict) is None:
            raise ValidationError(
                _('Invalid upload, publication or image type.'),
                field_names=field_names,
            )
        if not is_valid_openaire_type(obj.get('resource_type', {}),
                                      obj.get('communities', [])):
            raise ValidationError(
                _('Invalid OpenAIRE subtype.'),
                field_names=['openaire_subtype'],
            )