def get_type_title(self, obj): """Get type title.""" try: return ObjectType.get_by_dict(obj['upload_type'])['title']['en'] except Exception: from flask import current_app current_app.logger.exception("Failed object {}".format(obj['upload_type'])) raise
def is_openaire_publication(record): """Determine if record is a publication for OpenAIRE.""" oatype = ObjectType.get_by_dict(record.get('resource_type')).get( 'openaire', {}) if not oatype or oatype['type'] != _OAType.publication: return False # Has grants, is part of ecfunded community or is open access. if record.get('grants') or 'ecfunded' in record.get('communities', []) or \ 'open' == record.get('access_right'): return True return False
def test_object_type(): """Test object type.""" types = ['publication', 'poster', 'presentation', 'software', 'dataset', 'image', 'video'] def _assert_obj(obj): assert '$schema' in obj assert 'id' in obj assert 'internal_id' in obj assert 'title' in obj assert 'en' in obj['title'] assert 'title_plural' in obj assert 'en' in obj['title_plural'] assert 'schema.org' in obj for c in obj.get('children', []): _assert_obj(c) for t in types: _assert_obj(ObjectType.get(t)) assert ObjectType.get('invalid') is None
def load_resource_type(self, data): """Split the resource type and into seperate keys.""" if not isinstance(data, string_types): raise ValidationError('Not a string.', field_names=['resource_type']) if not ObjectType.validate_internal_id(data): raise ValidationError('Not a valid type.', field_names=['resource_type']) serialized_object = {} split_data = data.split('-') if len(split_data) == 2: serialized_object['type'], serialized_object['subtype'] = \ split_data else: serialized_object['type'] = split_data[0] return serialized_object
def validate_data(self, obj): """Validate resource type.""" type_ = obj.get('resource_type.type') if type_ in ['publication', 'image']: type_dict = { 'type': type_, 'subtype': obj.get('resource_type.subtype'), } field_names = ['{0}_type'.format(type_)] else: type_dict = {'type': type_} field_names = ['upload_type'] if ObjectType.get_by_dict(type_dict) is None: raise ValidationError( _('Invalid upload, publication or image type.'), field_names=field_names, )
def test_object_type(app): """Test object type.""" types = ["publication", "poster", "presentation", "software", "dataset", "image", "video"] def _assert_obj(obj): assert "$schema" in obj assert "id" in obj assert "internal_id" in obj assert "title" in obj assert "en" in obj["title"] assert "title_plural" in obj assert "en" in obj["title_plural"] assert "schema.org" in obj for c in obj.get("children", []): _assert_obj(c) for t in types: _assert_obj(ObjectType.get(t))
def resource_types(self): """Create an object list with the available resource types.""" resource_list = [] for item in itervalues(ObjectType.index_id): internal_id = item['internal_id'] parent = item.get('parent') if parent: resolved_parent = ObjectType.get(item['internal_id'])['parent'] parent_type_title = resolved_parent['title']['en'] elif item.get('children'): continue else: parent_type_title = '' resource_list.append({ 'type': parent_type_title, 'title': item['title']['en'], 'id': internal_id }) resource_list.sort(key=lambda x: x['title']) return resource_list
def validate_data(self, obj): """Validate resource type.""" type_ = obj.get('resource_type', {}).get('type') if type_ in ['publication', 'image']: type_dict = { 'type': type_, 'subtype': obj.get('resource_type', {}).get('subtype') } field_names = ['{0}_type'.format(type_)] else: type_dict = {'type': type_} field_names = ['upload_type'] if ObjectType.get_by_dict(type_dict) is None: raise ValidationError( _('Invalid upload, publication or image type.'), field_names=field_names, ) if not is_valid_openaire_type(obj.get('resource_type', {}), obj.get('communities', [])): raise ValidationError( _('Invalid OpenAIRE subtype.'), field_names=['openaire_subtype'], )
def is_openaire_dataset(record): """Determine if record is a dataset for OpenAIRE.""" oatype = ObjectType.get_by_dict(record.get('resource_type')).get( 'openaire', {}) return oatype and oatype['type'] == _OAType.dataset
class LegacyMetadataSchemaV1(common.CommonMetadataSchemaV1): """Legacy JSON metadata.""" upload_type = fields.String( attribute='resource_type.type', required=True, validate=validate.OneOf(choices=ObjectType.get_types()), ) publication_type = fields.Method( 'dump_publication_type', attribute='resource_type.subtype', validate=validate.OneOf( choices=ObjectType.get_subtypes('publication')), ) image_type = fields.Method( 'dump_image_type', attribute='resource_type.subtype', validate=validate.OneOf(choices=ObjectType.get_subtypes('image')), ) openaire_type = fields.Method('dump_openaire_type', attribute='resource_type.openaire_subtype') license = fields.Method('dump_license', 'load_license') communities = fields.Method('dump_communities', 'load_communities') grants = fields.Method('dump_grants', 'load_grants') prereserve_doi = fields.Method('dump_prereservedoi', 'load_prereservedoi') journal_title = SanitizedUnicode(attribute='journal.title') journal_volume = SanitizedUnicode(attribute='journal.volume') journal_issue = SanitizedUnicode(attribute='journal.issue') journal_pages = SanitizedUnicode(attribute='journal.pages') conference_title = SanitizedUnicode(attribute='meeting.title') conference_acronym = SanitizedUnicode(attribute='meeting.acronym') conference_dates = SanitizedUnicode(attribute='meeting.dates') conference_place = SanitizedUnicode(attribute='meeting.place') conference_url = SanitizedUrl(attribute='meeting.url') conference_session = SanitizedUnicode(attribute='meeting.session') conference_session_part = SanitizedUnicode( attribute='meeting.session_part') imprint_isbn = SanitizedUnicode(attribute='imprint.isbn') imprint_place = SanitizedUnicode(attribute='imprint.place') imprint_publisher = SanitizedUnicode(attribute='imprint.publisher') partof_pages = SanitizedUnicode(attribute='part_of.pages') partof_title = SanitizedUnicode(attribute='part_of.title') thesis_university = SanitizedUnicode(attribute='thesis.university') thesis_supervisors = fields.Nested(common.PersonSchemaV1, many=True, attribute='thesis.supervisors') def _dump_subtype(self, obj, type_): """Get subtype.""" if obj.get('resource_type', {}).get('type') == type_: return obj.get('resource_type', {}).get('subtype', missing) return missing def dump_publication_type(self, obj): """Get publication type.""" return self._dump_subtype(obj, 'publication') def dump_image_type(self, obj): """Get publication type.""" return self._dump_subtype(obj, 'image') def dump_openaire_type(self, obj): """Get OpenAIRE type.""" return obj.get('resource_type', {}).get('openaire_subtype', missing) def dump_license(self, obj): """Dump license.""" return obj.get('license', {}).get('id', missing) def load_license(self, data): """Load license.""" if isinstance(data, six.string_types): license = data if isinstance(data, dict): license = data['id'] return {'$ref': 'https://dx.zenodo.org/licenses/{0}'.format(license)} def dump_grants(self, obj): """Get grants.""" res = [] for g in obj.get('grants', []): if g.get('program', {}) == 'FP7' and \ g.get('funder', {}).get('doi') == '10.13039/501100000780': res.append(dict(id=g['code'])) else: res.append(dict(id=g['internal_id'])) return res or missing def load_grants(self, data): """Load grants.""" if not isinstance(data, list): raise ValidationError(_('Not a list.')) result = set() errors = set() for g in data: if not isinstance(g, dict): raise ValidationError(_('Element not an object.')) g = g.get('id') if not g: continue # FP7 project grant if not g.startswith('10.13039/'): g = '10.13039/501100000780::{0}'.format(g) # Check that the PID exists grant_pid = PersistentIdentifier.query.filter_by( pid_type='grant', pid_value=g).one_or_none() if not grant_pid or grant_pid.status != PIDStatus.REGISTERED: errors.add(g) continue result.add(g) if errors: raise ValidationError('Invalid grant ID(s): {0}'.format( ', '.join(errors)), field_names='grants') return [{ '$ref': 'https://dx.zenodo.org/grants/{0}'.format(grant_id) } for grant_id in result] or missing def dump_communities(self, obj): """Dump communities type.""" return [dict(identifier=x) for x in obj.get('communities', [])] \ or missing def load_communities(self, data): """Load communities type.""" if not isinstance(data, list): raise ValidationError(_('Not a list.')) invalid_format_comms = [ c for c in data if not (isinstance(c, dict) and 'identifier' in c) ] if invalid_format_comms: raise ValidationError( 'Invalid community format: {}.'.format(invalid_format_comms), field_names='communities') comm_ids = list( sorted([x['identifier'] for x in data if x.get('identifier')])) errors = {c for c in comm_ids if not Community.get(c)} if errors: raise ValidationError('Invalid communities: {0}'.format( ', '.join(errors)), field_names='communities') return comm_ids or missing def dump_prereservedoi(self, obj): """Dump pre-reserved DOI.""" recid = obj.get('recid') if recid: prefix = None if not current_app: prefix = '10.5072' # Test prefix return dict( recid=recid, doi=doi_generator(recid, prefix=prefix), ) return missing def load_prereservedoi(self, obj): """Load pre-reserved DOI. The value is not important as we do not store it. Since the deposit and record id are now the same """ return missing @pre_dump() def predump_related_identifiers(self, data): """Split related/alternate identifiers. This ensures that we can just use the base schemas definitions of related/alternate identifies. """ relids = data.pop('related_identifiers', []) alids = data.pop('alternate_identifiers', []) for a in alids: a['relation'] = 'isAlternateIdentifier' if relids or alids: data['related_identifiers'] = relids + alids return data @pre_load() def preload_related_identifiers(self, data): """Split related/alternate identifiers. This ensures that we can just use the base schemas definitions of related/alternate identifies for loading. """ # Legacy API does not accept alternate_identifiers, so force delete it. data.pop('alternate_identifiers', None) for r in data.pop('related_identifiers', []): # Problem that API accepted one relation while documentation # presented a different relation. if r.get('relation') in [ 'isAlternativeIdentifier', 'isAlternateIdentifier' ]: k = 'alternate_identifiers' r.pop('relation') else: k = 'related_identifiers' data.setdefault(k, []) data[k].append(r) @pre_load() def preload_resource_type(self, data): """Prepare data for easier deserialization.""" if data.get('upload_type') != 'publication': data.pop('publication_type', None) if data.get('upload_type') != 'image': data.pop('image_type', None) @pre_load() def preload_license(self, data): """Default license.""" acc = data.get('access_right', AccessRight.OPEN) if acc in [AccessRight.OPEN, AccessRight.EMBARGOED]: if 'license' not in data: if data.get('upload_type') == 'dataset': data['license'] = 'CC0-1.0' else: data['license'] = 'CC-BY-4.0' @post_load() def merge_keys(self, data): """Merge dot keys.""" prefixes = [ 'resource_type', 'journal', 'meeting', 'imprint', 'part_of', 'thesis', ] for p in prefixes: for k in list(data.keys()): if k.startswith('{0}.'.format(p)): key, subkey = k.split('.') if key not in data: data[key] = dict() data[key][subkey] = data.pop(k) # Pre-reserve DOI is implemented differently now. data.pop('prereserve_doi', None) @validates('communities') def validate_communities(self, values): """Validate communities.""" for v in values: if not isinstance(v, six.string_types): raise ValidationError(_('Invalid community identifier.'), field_names=['communities']) @validates_schema def validate_data(self, obj): """Validate resource type.""" type_ = obj.get('resource_type', {}).get('type') if type_ in ['publication', 'image']: type_dict = { 'type': type_, 'subtype': obj.get('resource_type', {}).get('subtype') } field_names = ['{0}_type'.format(type_)] else: type_dict = {'type': type_} field_names = ['upload_type'] if ObjectType.get_by_dict(type_dict) is None: raise ValidationError( _('Invalid upload, publication or image type.'), field_names=field_names, ) if not is_valid_openaire_type(obj.get('resource_type', {}), obj.get('communities', [])): raise ValidationError( _('Invalid OpenAIRE subtype.'), field_names=['openaire_subtype'], )
def _openaire_type(self, obj): return ObjectType.get_by_dict( obj.get('metadata', {}).get('resource_type') ).get('openaire')
def is_openaire_other(record): """Determine if the record has type 'other' for OpenAIRE.""" oatype = ObjectType.get_by_dict(record.get('resource_type')).get( 'openaire', {}) return oatype and oatype['type'] == _OAType.other
def get_type(self, obj): """Get record CSL type.""" metadata = obj['metadata'] obj_type = ObjectType.get_by_dict(metadata.get('resource_type')) return obj_type.get('csl', missing) if obj_type else missing
def _openaire_type(self, obj): return ObjectType.get_by_dict( obj.get('metadata', {}).get('resource_type')).get('openaire')
def is_openaire_software(record): """Determine if the record is a software for OpenAIRE.""" oatype = ObjectType.get_by_dict(record.get('resource_type')).get( 'openaire', {}) return oatype and oatype['type'] == _OAType.software
def _get_schema_class(self, obj): data = obj['metadata'] obj_type = ObjectType.get_by_dict(data['resource_type']) return getattr(schemas, obj_type['schema.org'][19:])
def get_type(self, obj): """Get record CSL type.""" metadata = obj['metadata'] obj_type = ObjectType.get_by_dict(metadata.get('resource_type')) return obj_type.get('csl', 'article') if obj_type else 'article'