示例#1
0
class FunderRelationSchema(Schema):
    """Funder schema."""

    name = SanitizedUnicode(
        validate=validate.Length(min=1, error=_('Name cannot be blank.')))
    id = SanitizedUnicode()

    @validates_schema
    def validate_funder(self, data, **kwargs):
        """Validates that either id either name are present."""
        id_ = data.get("id")
        name = data.get("name")
        if id_:
            data = {"id": id_}
        elif name:
            data = {"name": name}

        if not id_ and not name:
            raise ValidationError(
                _("An existing id or a free text name must be present"),
                "funder")
示例#2
0
class CommunityMetadataSchema(Schema):
    """Community metadata schema."""

    COMMUNITY_TYPES = [
        'organization',
        'event',
        'topic',
        'project',
    ]

    title = SanitizedUnicode(required=True, validate=_not_blank(max=250))
    description = SanitizedUnicode(validate=_not_blank(max=2000))

    curation_policy = SanitizedHTML(validate=_not_blank(max=2000))
    page = SanitizedHTML(validate=_not_blank(max=2000))

    # TODO: Use general small vocabularies
    type = SanitizedUnicode(validate=validate.OneOf(COMMUNITY_TYPES))
    website = fields.Url(validate=_not_blank())
    funding = fields.List(fields.Nested(FundingSchema))
    organizations = fields.List(fields.Nested(AffiliationSchema))
示例#3
0
class AffiliationSchema(Schema):
    """Affiliation of a creator/contributor."""

    id = SanitizedUnicode()
    name = SanitizedUnicode()

    @validates_schema
    def validate_affiliation(self, data, **kwargs):
        """Validates that either id either name are present."""
        id_ = data.get("id")
        name = data.get("name")
        if id_:
            data = {"id": id_}
        elif name:
            data = {"name": name}

        if not id_ and not name:
            raise ValidationError(
                _("An existing id or a free text name must be present"),
                "affiliations"
            )
示例#4
0
class CreatorSchema(Schema):
    """Creator schema."""

    person_or_org = fields.Nested(PersonOrOrganizationSchema, required=True)
    role = SanitizedUnicode()
    affiliations = fields.List(fields.Nested(AffiliationSchema))

    @validates_schema
    def validate_role(self, data, **kwargs):
        """Validate role."""
        if 'role' in data:
            validate_entry('creators.role', data)
示例#5
0
class ReferenceSchema(IdentifierSchema):
    """Reference schema."""

    SCHEMES = ["isni", "grid", "crossreffunderid", "other"]

    def __init__(self, **kwargs):
        """Refer schema constructor."""
        super().__init__(allowed_schemes=self.SCHEMES,
                         identifier_required=False,
                         **kwargs)

    reference = SanitizedUnicode(required=True)
示例#6
0
class AwardRelationSchema(Schema):
    """Award relation schema."""

    id = SanitizedUnicode()
    number = SanitizedUnicode()
    title = i18n_strings
    identifiers = IdentifierSet(
        fields.Nested(
            partial(IdentifierSchema,
                    allowed_schemes=award_schemes,
                    identifier_required=False)))

    @validates_schema
    def validate_data(self, data, **kwargs):
        """Validate either id or number/title are present."""
        id_ = data.get("id")
        number = data.get("number")
        title = data.get("title")
        if not id_ and not (number and title):
            raise ValidationError(
                _("An existing id or number/title must be present."), "award")
示例#7
0
class FunderSchema(IdentifierSchema):
    """Funder schema."""

    def __init__(self, **kwargs):
        """Constructor."""
        super().__init__(
            fail_on_unknown=False, identifier_required=False, **kwargs)

    name = SanitizedUnicode(
        required=True,
        validate=_not_blank(_('Name cannot be blank.'))
    )
class AccessSchema(Schema):
    """Access schema."""

    metadata = SanitizedUnicode(required=True)
    files = SanitizedUnicode(required=True)
    embargo = NestedAttribute(EmbargoSchema)
    status = SanitizedUnicode(dump_only=False)
    owned_by = List(Nested(Agent))

    def validate_protection_value(self, value, field_name):
        """Check that the protection value is valid."""
        if value not in AccessStatusEnum.list():
            raise ValidationError(
                _("'{}' must be either '{}', '{}' or '{}'").format(
                    field_name,
                    *AccessStatusEnum.list(),
                ),
                "record",
            )

    @validates("metadata")
    def validate_record_protection(self, value):
        """Validate the record protection value."""
        self.validate_protection_value(value, "metadata")

    @validates_schema
    def validate_embargo(self, data, **kwargs):
        """Validate that the properties are consistent with each other."""
        metadata = data.get("metadata", "")
        embargo = data.get("embargo", "")
        if AccessStatusEnum.EMBARGOED.value == metadata and not embargo:
            raise ValidationError(
                _("Embargo must be set if metadata is Embargoed"),
                field_name="embargo",
            )

    @validates("files")
    def validate_files_protection(self, value):
        """Validate the files protection value."""
        self.validate_protection_value(value, "files")
示例#9
0
class AwardSchema(BaseVocabularySchema):
    """Award schema."""

    identifiers = IdentifierSet(
        fields.Nested(
            partial(IdentifierSchema,
                    allowed_schemes=award_schemes,
                    identifier_required=False)))
    number = SanitizedUnicode(required=True,
                              validate=validate.Length(
                                  min=1, error=_('Number cannot be blank.')))
    funder = fields.Nested(FunderRelationSchema)

    acronym = SanitizedUnicode()

    id = SanitizedUnicode(
        validate=validate.Length(min=1, error=_('Pid cannot be blank.')))

    @validates_schema
    def validate_id(self, data, **kwargs):
        """Validates ID."""
        is_create = "record" not in self.context
        if is_create and "id" not in data:
            raise ValidationError(_("Missing PID."), "id")
        if not is_create:
            data.pop("id", None)

    @post_load(pass_many=False)
    def move_id(self, data, **kwargs):
        """Moves id to pid."""
        if "id" in data:
            data["pid"] = data.pop("id")
        return data

    @pre_dump(pass_many=False)
    def extract_pid_value(self, data, **kwargs):
        """Extracts the PID value."""
        data['id'] = data.pid.pid_value
        return data
示例#10
0
class SubjectSchema(Schema):
    """Subject schema."""

    id = SanitizedUnicode()
    subject = SanitizedUnicode()
    scheme = SanitizedUnicode()

    @validates_schema
    def validate_subject(self, data, **kwargs):
        """Validates that either id either name are present."""
        id_ = data.get("id")
        subject = data.get("subject")
        if id_:
            data = {"id": id_}
        elif subject:
            data = {"subject": subject}

        if not id_ and not subject:
            raise ValidationError(
                _("An existing id or a free text subject must be present"),
                "subjects"
            )
示例#11
0
class LanguageSchema(Schema):
    """Language schema."""
    class Meta:
        """Meta class to discard unknown fields."""

        unknown = EXCLUDE

    id = SanitizedUnicode(required=True)
    # TODO: replace ".en" with UI serialization
    title = fields.Raw(attribute="metadata.title.en", dump_only=True)
    description = fields.Raw(attribute="metadata.description.en",
                             dump_only=True)
    props = fields.Raw(attribute="metadata.props", dump_only=True)
示例#12
0
class SecretLink(Schema):
    """Schema for a secret link."""

    id = fields.String(dump_only=True)
    created_at = TZDateTime(timezone=timezone.utc,
                            format='iso',
                            required=False,
                            dump_only=True)
    expires_at = TZDateTime(timezone=timezone.utc,
                            format='iso',
                            required=False)
    permission = fields.String(required=False)
    token = SanitizedUnicode(dump_only=True)
示例#13
0
class AccessSchema(Schema):
    """Access schema."""

    record = SanitizedUnicode(required=True)
    files = SanitizedUnicode(required=True)
    embargo = fields.Nested(EmbargoSchema)

    def validate_protection_value(self, value, field_name):
        """Check that the protection value is valid."""
        if value not in ["public", "restricted"]:
            raise ValidationError(
                _("'{}' must be either 'public' or 'restricted'").format(
                    field_name), "record")

    @validates("record")
    def validate_record_protection(self, value):
        """Validate the record protection value."""
        self.validate_protection_value(value, "record")

    @validates("files")
    def validate_files_protection(self, value):
        """Validate the files protection value."""
        self.validate_protection_value(value, "files")
示例#14
0
class MetadataSchema(Schema):
    """Schema for the record metadata."""

    # Metadata fields
    resource_type = fields.Nested(VocabularySchema, required=True)
    creators = fields.List(fields.Nested(CreatorSchema),
                           required=True,
                           validate=validate.Length(
                               min=1,
                               error=_("Missing data for required field.")))
    title = SanitizedUnicode(required=True, validate=validate.Length(min=3))
    additional_titles = fields.List(fields.Nested(TitleSchema))
    publisher = SanitizedUnicode()
    publication_date = EDTFDateString(required=True)
    subjects = fields.List(fields.Nested(SubjectSchema))
    contributors = fields.List(fields.Nested(ContributorSchema))
    dates = fields.List(fields.Nested(DateSchema))
    languages = fields.List(fields.Nested(VocabularySchema))
    # alternate identifiers
    identifiers = IdentifierSet(
        fields.Nested(
            partial(IdentifierSchema,
                    allowed_schemes=record_identifiers_schemes)))
    related_identifiers = fields.List(fields.Nested(RelatedIdentifierSchema))
    sizes = fields.List(
        SanitizedUnicode(
            validate=_not_blank(_('Size cannot be a blank string.'))))
    formats = fields.List(
        SanitizedUnicode(
            validate=_not_blank(_('Format cannot be a blank string.'))))
    version = SanitizedUnicode()
    rights = fields.List(fields.Nested(RightsSchema))
    description = SanitizedHTML(validate=validate.Length(min=3))
    additional_descriptions = fields.List(fields.Nested(DescriptionSchema))
    locations = fields.Nested(FeatureSchema)
    funding = fields.List(fields.Nested(FundingSchema))
    references = fields.List(fields.Nested(ReferenceSchema))
class DescriptionSchema(Schema):
    """Schema for the additional descriptions."""

    DESCRIPTION_TYPES = [
        "abstract", "methods", "seriesinformation", "tableofcontents",
        "technicalinfo", "other"
    ]
    description = SanitizedHTML(required=True, validate=validate.Length(min=3))
    type = SanitizedUnicode(
        required=True,
        validate=validate.OneOf(
            choices=DESCRIPTION_TYPES,
            error=_(
                'Invalid description type. {input} not one of {choices}.')))
    lang = fields.Nested(LanguageSchema)
示例#16
0
class AffiliationSchema(Schema):
    """Affiliation of a creator/contributor."""

    name = SanitizedUnicode(required=True)
    identifiers = fields.Dict()

    @validates("identifiers")
    def validate_identifiers(self, value):
        """Validate well-formed identifiers are passed."""
        if len(value) == 0:
            raise ValidationError(_("Invalid identifier."))

        for identifier in value.keys():
            validator = getattr(idutils, 'is_' + identifier, None)
            # NOTE: identifier key cannot be empty string
            if not identifier or (validator
                                  and not validator(value.get(identifier))):
                raise ValidationError(_(f"Invalid identifier ({identifier})."))
class AccessSchema(Schema):
    """Access schema."""

    metadata = fields.Bool(required=True)
    owned_by = fields.List(fields.Nested(Agent))
    access_right = SanitizedUnicode(required=True)
    embargo_date = ISODateString()
    access_condition = fields.Nested(AccessConditionSchema)

    @validates("embargo_date")
    def validate_embargo_date(self, value):
        """Validate that embargo date is in the future."""
        if arrow.get(value).date() <= arrow.utcnow().date():
            raise ValidationError(
                _("Embargo date must be in the future."), field_names=["embargo_date"]
            )

    @validates_schema
    def validate_access_right(self, data, **kwargs):
        """Validate that access right is one of the allowed ones."""
        validate_entry("access_right", data)
示例#18
0
class FilesOptionsSchema(Schema):
    """Basic files options schema class."""

    enabled = fields.Bool(missing=True)
    # allow unsetting
    default_preview = SanitizedUnicode(allow_none=True)

    def get_attribute(self, obj, attr, default):
        """Override how attributes are retrieved when dumping.

        NOTE: We have to access by attribute because although we are loading
              from an external pure dict, but we are dumping from a data-layer
              object whose fields should be accessed by attributes and not
              keys. Access by key runs into FilesManager key access protection
              and raises.
        """
        value = getattr(obj, attr, default)

        if attr == "default_preview" and not value:
            return default

        return value
示例#19
0
class MetadataSchema(Schema):
    """Schema for the record metadata."""

    field_load_permissions = {
        # TODO: define "can_admin" action
    }

    field_dump_permissions = {
        # TODO: define "can_admin" action
    }

    class Meta:
        """Meta class to accept unknwon fields."""

        unknown = INCLUDE

    # Metadata fields
    resource_type = fields.Nested(ResourceTypeSchema, required=True)
    creators = fields.List(fields.Nested(CreatorSchema), required=True)
    title = SanitizedUnicode(required=True, validate=validate.Length(min=3))
    additional_titles = fields.List(fields.Nested(TitleSchema))
    publisher = SanitizedUnicode()
    publication_date = EDTFDateString(required=True)
    subjects = fields.List(fields.Nested(SubjectSchema))
    contributors = fields.List(fields.Nested(ContributorSchema))
    dates = fields.List(fields.Nested(DateSchema))
    languages = fields.List(fields.Nested(LanguageSchema))
    # alternate identifiers
    identifiers = fields.List(fields.Nested(IdentifierSchema))
    related_identifiers = fields.List(
        fields.Nested(RelatedIdentifierSchema),
        validate=_no_duplicates,
        error=_('Invalid related identifiers cannot contain duplicates.'))
    sizes = fields.List(
        SanitizedUnicode(
            validate=_not_blank(_('Size cannot be a blank string.'))))
    formats = fields.List(
        SanitizedUnicode(
            validate=_not_blank(_('Format cannot be a blank string.'))))
    version = SanitizedUnicode()
    rights = fields.List(fields.Nested(RightsSchema))
    description = SanitizedUnicode(validate=validate.Length(min=3))
    additional_descriptions = fields.List(fields.Nested(DescriptionSchema))
    locations = fields.List(fields.Nested(LocationSchema))
    funding = fields.List(fields.Nested(FundingSchema))
    references = fields.List(fields.Nested(ReferenceSchema))
示例#20
0
class AccessSchema(Schema):
    """Access schema."""

    metadata = fields.Bool(required=True)
    files = fields.Bool(required=True)
    owned_by = fields.List(fields.Integer,
                           validate=validate.Length(min=1),
                           required=True)
    access_right = SanitizedUnicode(required=True)
    embargo_date = ISODateString()
    access_condition = fields.Nested(AccessConditionSchema)

    @validates('embargo_date')
    def validate_embargo_date(self, value):
        """Validate that embargo date is in the future."""
        if arrow.get(value).date() <= arrow.utcnow().date():
            raise ValidationError(_('Embargo date must be in the future.'),
                                  field_names=['embargo_date'])

    @validates_schema
    def validate_access_right(self, data, **kwargs):
        """Validate that access right is one of the allowed ones."""
        validate_entry('access_right', data)
示例#21
0
class FileSchema(Schema):
    """Service schema for files."""

    key = SanitizedUnicode(dump_only=True)
    created = TZDateTime(timezone=timezone.utc, format='iso', dump_only=True)
    updated = TZDateTime(timezone=timezone.utc, format='iso', dump_only=True)

    status = GenMethod('dump_status')

    metadata = Dict(dump_only=True)

    checksum = Str(dump_only=True, attribute='file.checksum')
    storage_class = Str(dump_only=True, attribute='file.storage_class')
    mimetype = Str(dump_only=True, attribute='file.mimetype')
    size = Number(attribute='file.size')
    version_id = UUID(attribute='file.version_id')
    file_id = UUID(attribute='file.file_id')
    bucket_id = UUID(attribute='file.bucket_id')

    links = Links()

    def dump_status(self, obj):
        """Dump file status."""
        return 'completed' if obj.file else 'pending'
示例#22
0
class DataCite43Schema(Schema):
    """DataCite 4.3 Marshmallow Schema."""

    # PIDS-FIXME: What about versioning links and related ids
    types = fields.Method("get_type")
    titles = fields.Method("get_titles")
    creators = fields.List(fields.Nested(CreatorSchema43),
                           attribute="metadata.creators")
    contributors = fields.List(fields.Nested(ContributorSchema43),
                               attribute="metadata.contributors")
    publisher = fields.Str(attribute="metadata.publisher")
    publicationYear = fields.Method("get_publication_year")
    subjects = fields.List(fields.Nested(SubjectSchema43),
                           attribute="metadata.subjects")
    dates = fields.Method("get_dates")
    language = fields.Method("get_language")
    identifiers = fields.Method("get_identifiers")
    relatedIdentifiers = fields.Method("get_related_identifiers")
    sizes = fields.List(SanitizedUnicode(), attribute="metadata.sizes")
    formats = fields.List(SanitizedUnicode(), attribute="metadata.formats")
    version = SanitizedUnicode(attribute="metadata.version")
    rightsList = fields.List(fields.Nested(RightSchema43),
                             attribute="metadata.rights")
    descriptions = fields.Method("get_descriptions")
    geoLocations = fields.Method("get_locations")
    fundingReferences = fields.List(fields.Nested(FundingSchema43),
                                    attribute="metadata.funding")
    schemaVersion = fields.Constant("http://datacite.org/schema/kernel-4")

    def get_type(self, obj):
        """Get resource type."""
        resource_type = obj["metadata"]["resource_type"]

        return {
            "resourceTypeGeneral": "FIXME",
            "resourceType": "FIXME",
        }

    def get_titles(self, obj):
        """Get titles list."""
        metadata = obj["metadata"]

        titles = [{"title": metadata.get("title")}]
        additional_titles = metadata.get("additional_titles", [])

        for add_title in additional_titles:
            title = {"title": add_title.get("title")}
            type_ = add_title.get("type")
            if type_:
                title["titleType"] = type_.capitalize()
            lang = add_title.get("lang")
            if lang:
                title["lang"] = lang

            titles.append(title)

        return titles

    def get_publication_year(self, obj):
        """Get publication year from edtf date."""
        # PIDS-FIXME: Make the EDTFDateString somehow access the year?
        return missing

    def get_dates(self, obj):
        """Get dates."""
        dates = [{
            "date": obj["metadata"]["publication_date"],
            "dateType": "Issued"
        }]

        for date in obj["metadata"].get("dates", []):
            to_append = {
                "date": date["date"],
                "dateType": date["type"].capitalize()
            }
            desc = date.get("description")
            if desc:
                to_append["dateInformation"] = desc

            dates.append(to_append)

        return dates or missing

    def get_language(self, obj):
        """Get language."""
        metadata = obj["metadata"]
        languages = metadata.get("languages")

        if languages:
            # PIDS-FIXME: How to choose? the first?
            return languages[0]["id"]

        return missing

    def get_identifiers(self, obj):
        """Get identifiers list."""
        serialized_identifiers = []

        # Identifiers field
        metadata = obj["metadata"]
        identifiers = metadata.get("identifiers", [])
        for id_ in identifiers:
            serialized_identifiers.append({
                "identifier": id_["identifier"],
                "identifierType": id_["scheme"]
            })

        # PIDs field
        pids = obj["pids"]
        for scheme, id_ in pids.items():
            serialized_identifiers.append({
                "identifier": id_["identifier"],
                "identifierType": scheme.upper()
            })

        return serialized_identifiers or missing

    def get_related_identifiers(self, obj):
        """Get related identifiers."""
        # PIDS-FIXME: This might get much more complex depending on the id
        serialized_identifiers = []
        metadata = obj["metadata"]
        identifiers = metadata.get("related_identifiers", [])
        for rel_id in identifiers:
            serialized_identifiers.append({
                "relatedIdentifier":
                rel_id["identifier"],
                "relatedIdentifierType":
                rel_id["scheme"].upper(),
                "relationType":
                rel_id["relation_type"].capitalize(),
                "resourceTypeGeneral":
                rel_id["resource_type"]["type"],
            })

        return serialized_identifiers or missing

    def get_descriptions(self, obj):
        """Get titles list."""
        metadata = obj["metadata"]
        descriptions = []

        description = metadata.get("description")
        if description:
            descriptions.append({
                "description": description,
                "descriptionType": "Abstract"
            })

        additional_descriptions = metadata.get("additional_descriptions", [])
        for add_desc in additional_descriptions:
            description = {
                "description": add_desc["description"],
                "descriptionType": add_desc["type"].capitalize(),
            }

            lang = add_desc.get("lang")
            if lang:
                description["lang"] = lang

            descriptions.append(description)

        return descriptions or missing

    def get_locations(self, obj):
        """Get locations."""
        locations = []

        for location in obj["metadata"].get("locations", []):
            place = location.get("place")
            serialized_location = {}
            if place:
                serialized_location["geoLocationPlace"] = place
            geometry = location.get("geometry")
            if geometry:
                geo_type = geometry["type"]
                # PIDS-FIXME: Scalable enough?
                # PIDS-FIXME: Implement Box and Polygon serialization
                if geo_type == "Point":
                    serialized_location["geoLocationPoint"] = {
                        "pointLatitude": geometry["coordinates"][0],
                        "pointLongitude": geometry["coordinates"][1],
                    }

            locations.append(serialized_location)
        return locations or missing
class AffiliationSchema(Schema):
    """Affiliation of a creator/contributor."""

    name = SanitizedUnicode(required=True)
    identifiers = IdentifierSet(fields.Nested(IdentifierSchema), )
class LanguageSchema(Schema):
    """Language schema."""

    id = SanitizedUnicode(required=True)
    title = fields.Raw(dump_only=True)
    description = fields.Raw(dump_only=True)
class ResourceTypeSchema(Schema):
    """Resource type schema."""

    id = SanitizedUnicode(required=True)
    title = fields.Dict(dump_only=True)
示例#26
0
class DataCite43Schema(Schema):
    """DataCite JSON 4.3 Marshmallow Schema."""

    # PIDS-FIXME: What about versioning links and related ids
    types = fields.Method('get_type')
    titles = fields.Method('get_titles')
    creators = fields.List(fields.Nested(CreatorSchema43),
                           attribute='metadata.creators')
    contributors = fields.List(fields.Nested(ContributorSchema43),
                               attribute='metadata.contributors')
    publisher = fields.Str(attribute='metadata.publisher')
    publicationYear = fields.Method("get_publication_year")
    subjects = fields.Method("get_subjects")
    dates = fields.Method('get_dates')
    language = fields.Method('get_language')
    identifiers = fields.Method('get_identifiers')
    relatedIdentifiers = fields.Method('get_related_identifiers')
    sizes = fields.List(SanitizedUnicode(), attribute="metadata.sizes")
    formats = fields.List(SanitizedUnicode(), attribute="metadata.formats")
    version = SanitizedUnicode(attribute="metadata.version")
    rightsList = fields.Method('get_rights')
    descriptions = fields.Method('get_descriptions')
    geoLocations = fields.Method("get_locations")
    fundingReferences = fields.List(fields.Nested(FundingSchema43),
                                    attribute='metadata.funding')
    schemaVersion = fields.Constant("http://datacite.org/schema/kernel-4")

    def get_type(self, obj):
        """Get resource type."""
        props = get_vocabulary_props(
            'resourcetypes',
            ['props.datacite_general', 'props.datacite_type'],
            obj["metadata"]["resource_type"]["id"],
        )
        return {
            'resourceTypeGeneral': props.get("datacite_general", "Other"),
            'resourceType': props.get("datacite_type", ""),
        }

    def _merge_main_and_additional(self, obj, field, default_type=None):
        """Return merged list of main + additional titles/descriptions."""
        result = []
        main_value = obj["metadata"].get(field)

        if main_value:
            item = {field: strip_html(main_value)}
            if default_type:
                item[f"{field}Type"] = default_type
            result.append(item)

        additional_values = obj["metadata"].get(f"additional_{field}s", [])
        for v in additional_values:
            item = {field: strip_html(v.get(field))}

            # Type
            type_id = v.get("type", {}).get("id")
            if type_id:
                props = get_vocabulary_props(f"{field}types",
                                             ["props.datacite"], type_id)
                if "datacite" in props:
                    item[f"{field}Type"] = props["datacite"]

            # Language
            lang_id = v.get("lang", {}).get("id")
            if lang_id:
                item["lang"] = lang_id

            result.append(item)

        return result or missing

    def get_titles(self, obj):
        """Get titles list."""
        return self._merge_main_and_additional(obj, "title")

    def get_descriptions(self, obj):
        """Get descriptions list."""
        return self._merge_main_and_additional(obj,
                                               "description",
                                               default_type="Abstract")

    def get_publication_year(self, obj):
        """Get publication year from edtf date."""
        try:
            publication_date = obj["metadata"]["publication_date"]
            parsed_date = parse_edtf(publication_date)
            return str(parsed_date.lower_strict().tm_year)
        except ParseException:
            # Should not fail since it was validated at service schema
            current_app.logger.error("Error parsing publication_date field for"
                                     f"record {obj['metadata']}")
            raise ValidationError(_("Invalid publication date value."))

    def get_dates(self, obj):
        """Get dates."""
        dates = [{
            "date": obj["metadata"]["publication_date"],
            "dateType": "Issued"
        }]

        for date in obj["metadata"].get("dates", []):
            date_type_id = date.get("type", {}).get("id")
            props = get_vocabulary_props('datetypes', ["props.datacite"],
                                         date_type_id)
            to_append = {
                "date": date["date"],
                "dateType": props.get("datacite", "Other")
            }
            desc = date.get("description")
            if desc:
                to_append["dateInformation"] = desc

            dates.append(to_append)

        return dates or missing

    def get_language(self, obj):
        """Get language."""
        languages = obj["metadata"].get("languages", [])
        if languages:
            # DataCite support only one language, so we take the first.
            return languages[0]["id"]

        return missing

    def get_identifiers(self, obj):
        """Get (main and alternate) identifiers list."""
        serialized_identifiers = []

        # pids go first so the DOI from the record is included
        pids = obj["pids"]
        for scheme, id_ in pids.items():
            id_scheme = get_scheme_datacite(
                scheme,
                "RDM_RECORDS_IDENTIFIERS_SCHEMES",
                default=scheme,
            )

            if id_scheme:
                serialized_identifiers.append({
                    "identifier": id_["identifier"],
                    "identifierType": id_scheme,
                })

        # Identifiers field
        identifiers = obj["metadata"].get("identifiers", [])
        for id_ in identifiers:
            scheme = id_["scheme"]
            id_scheme = get_scheme_datacite(scheme,
                                            "RDM_RECORDS_IDENTIFIERS_SCHEMES",
                                            default=scheme)
            if id_scheme:
                # DataCite only accepts a DOI identifier that is the official
                # registered DOI - ones in the alternate identifier field are
                # dropped
                if id_scheme != 'DOI':
                    serialized_identifiers.append({
                        "identifier":
                        id_["identifier"],
                        "identifierType":
                        id_scheme,
                    })

        return serialized_identifiers or missing

    def get_related_identifiers(self, obj):
        """Get related identifiers."""
        serialized_identifiers = []
        metadata = obj["metadata"]
        identifiers = metadata.get("related_identifiers", [])
        for rel_id in identifiers:
            relation_type_id = rel_id.get("relation_type", {}).get("id")
            props = get_vocabulary_props("relationtypes", ["props.datacite"],
                                         relation_type_id)

            scheme = rel_id["scheme"]
            id_scheme = get_scheme_datacite(
                scheme,
                "RDM_RECORDS_IDENTIFIERS_SCHEMES",
                default=scheme,
            )

            if id_scheme:
                serialized_identifier = {
                    "relatedIdentifier": rel_id["identifier"],
                    "relationType": props.get("datacite", ""),
                    "relatedIdentifierType": id_scheme,
                }

                resource_type_id = rel_id.get("resource_type", {}).get("id")
                if resource_type_id:
                    props = get_vocabulary_props(
                        "resourcetypes",
                        # Cache is on both keys so query datacite_type as well
                        # even though it's not accessed.
                        ["props.datacite_general", "props.datacite_type"],
                        resource_type_id)
                    serialized_identifier["resourceTypeGeneral"] = props.get(
                        "datacite_general", "Other")

                serialized_identifiers.append(serialized_identifier)

        return serialized_identifiers or missing

    def get_locations(self, obj):
        """Get locations."""
        locations = []

        loc_list = obj["metadata"].get("locations", {}).get("features", [])
        for location in loc_list:
            place = location.get("place")
            serialized_location = {}
            if place:
                serialized_location["geoLocationPlace"] = place
            geometry = location.get("geometry")
            if geometry:
                geo_type = geometry["type"]
                # PIDS-FIXME: Scalable enough?
                # PIDS-FIXME: Implement Box and Polygon serialization
                if geo_type == "Point":
                    serialized_location["geoLocationPoint"] = {
                        "pointLatitude": geometry["coordinates"][0],
                        "pointLongitude": geometry["coordinates"][1],
                    }

            locations.append(serialized_location)
        return locations or missing

    def get_subjects(self, obj):
        """Get datacite subjects."""
        subjects = obj["metadata"].get("subjects", [])
        if not subjects:
            return missing

        serialized_subjects = []
        ids = []
        for subject in subjects:
            sub_text = subject.get("subject")
            if sub_text:
                serialized_subjects.append({"subject": sub_text})
            else:
                ids.append(subject.get("id"))

        if ids:
            subjects_service = (current_service_registry.get("subjects"))
            subjects = subjects_service.read_many(system_identity, ids)
            validator = validate.URL()
            for subject in subjects:
                serialized_subj = {
                    "subject": subject.get("subject"),
                    "subjectScheme": subject.get("scheme"),
                }
                id_ = subject.get("id")

                try:
                    validator(id_)
                    serialized_subj["valueURI"] = id_
                except ValidationError:
                    pass

                serialized_subjects.append(serialized_subj)

        return serialized_subjects if serialized_subjects else missing

    def get_rights(self, obj):
        """Get datacite rigths."""
        rights = obj["metadata"].get("rights", [])
        if not rights:
            return missing

        serialized_rights = []
        ids = []
        for right in rights:
            _id = right.get("id")
            if _id:
                ids.append(_id)
            else:
                serialized_right = {
                    "rights": right.get("title").get(current_default_locale()),
                }

                link = right.get("link")
                if link:
                    serialized_right["rightsUri"] = link

                serialized_rights.append(serialized_right)

        if ids:
            rights = vocabulary_service.read_many(system_identity, "licenses",
                                                  ids)
            for right in rights:
                serialized_right = {
                    "rights": right.get("title").get(current_default_locale()),
                    "rightsIdentifierScheme": right.get("props").get("scheme"),
                    "rightsIdentifier": right.get("id"),
                }
                link = right.get("props").get("url")
                if link:
                    serialized_right["rightsUri"] = link

                serialized_rights.append(serialized_right)

        return serialized_rights if serialized_rights else missing
示例#27
0
        )
        return "", 204

    @request_search_args
    @request_view_args
    @response_handler(many=True)
    def search(self):
        """List secret links for a record."""
        items = self.service.read_secret_links(
            id_=resource_requestctx.view_args["pid_value"],
            identity=g.identity,
        )
        return items.to_dict(), 200


request_pid_args = request_parser({"client": SanitizedUnicode()},
                                  location='args')


class RDMManagedPIDProviderResource(RecordResource):
    """PID provider resource."""
    def create_url_rules(self):
        """Create the URL rules for the pid provider resource."""
        def p(route):
            """Prefix a route with the URL prefix."""
            return f"{self.config.url_prefix}{route}"

        routes = self.config.routes
        return [
            route("GET", p(routes["item"]), self.create),
            route("DELETE", p(routes["item"]), self.delete),
示例#28
0
class IdentifierSchema(IS):
    """Identifier schema with optional status field."""
    status = SanitizedUnicode()
示例#29
0
def test_extensions(app, minimal_record):
    """Test metadata extensions schema."""
    # Setup metadata extensions
    RDM_RECORDS_METADATA_NAMESPACES = {
        'dwc': {
            '@context': 'https://example.com/dwc/terms'
        },
        'nubiomed': {
            '@context': 'https://example.com/nubiomed/terms'
        }
    }

    RDM_RECORDS_METADATA_EXTENSIONS = {
        'dwc:family': {
            'elasticsearch': 'keyword',
            'marshmallow': SanitizedUnicode(required=True)
        },
        'dwc:behavior': {
            'elasticsearch': 'text',
            'marshmallow': SanitizedUnicode()
        },
        'nubiomed:number_in_sequence': {
            'elasticsearch': 'long',
            'marshmallow': Integer()
        },
        'nubiomed:scientific_sequence': {
            'elasticsearch': 'long',
            'marshmallow': List(Integer())
        },
        'nubiomed:original_presentation_date': {
            'elasticsearch': 'date',
            'marshmallow': ISODateString()
        },
        'nubiomed:right_or_wrong': {
            'elasticsearch': 'boolean',
            'marshmallow': Bool()
        }
    }

    orig_metadata_extensions = (
        app.extensions['invenio-rdm-records'].metadata_extensions
    )

    app.extensions['invenio-rdm-records'].metadata_extensions = (
        MetadataExtensions(
            RDM_RECORDS_METADATA_NAMESPACES,
            RDM_RECORDS_METADATA_EXTENSIONS
        )
    )

    # Minimal if not absent
    valid_minimal = {
        'dwc:family': 'Felidae'
    }
    minimal_record['extensions'] = valid_minimal
    data = MetadataSchema().load(minimal_record)
    assert valid_minimal == data.get('extensions')

    # Full
    valid_full = {
        'dwc:family': 'Felidae',
        'dwc:behavior': 'Plays with yarn, sleeps in cardboard box.',
        'nubiomed:number_in_sequence': 3,
        'nubiomed:scientific_sequence': [1, 1, 2, 3, 5, 8],
        'nubiomed:original_presentation_date': '2019-02-14',
        'nubiomed:right_or_wrong': True,
    }
    minimal_record['extensions'] = valid_full
    data = MetadataSchema().load(minimal_record)
    assert valid_full == data.get('extensions')

    # Invalid
    invalid_number_in_sequence = {
        'dwc:family': 'Felidae',
        'nubiomed:scientific_sequence': [1, 'l', 2, 3, 5, 8],
    }
    minimal_record['extensions'] = invalid_number_in_sequence
    with pytest.raises(ValidationError):
        data = MetadataSchema().load(minimal_record)

    app.extensions['invenio-rdm-records'].metadata_extensions = (
        orig_metadata_extensions
    )
示例#30
0
class PropsSchema(Schema):
    """Schema for the URL schema."""

    url = SanitizedUnicode(validate=_valid_url(_('Not a valid URL.')))
    scheme = SanitizedUnicode()