Пример #1
0
class Thing(Schema):
    type = String(description='Only required when it is nested.')
    url = URL(dump_only=True, description='The URL of the resource.')
    same_as = List(URL(dump_only=True), dump_only=True, data_key='sameAs')
    updated = DateTime('iso', dump_only=True, description=m.Thing.updated)
    created = DateTime('iso', dump_only=True, description=m.Thing.created)

    @post_load
    def remove_type(self, data: dict):
        data.pop('type', None)
class ResourceDCATSchema(ResourceMixin, Schema):
    ext_ident = Str(validate=validate.Length(max=36))
    title_pl = Str()
    title_en = Str(allow_none=True)
    description_pl = Str(allow_none=True)
    description_en = Str(allow_none=True)
    created = DateTime(allow_none=True)
    modified = DateTime(allow_none=True)
    link = URL()
    format = Str()
    file_mimetype = Str(allow_none=True)

    @pre_load(pass_many=True)
    def prepare_multi_data(self, data, **kwargs):
        for res in data:
            if res.get('modified') and not res.get('created'):
                res['created'] = res['modified']
            if not res.get('title_pl') and res.get('title_en'):
                res['title_pl'] = res['title_en']
        return data

    @post_load(pass_many=True)
    def postprocess_data(self, data, **kwargs):
        for res in data:
            if res['created'] is None:
                res.pop('created')
            if res['modified'] is None:
                res.pop('modified')
        return data

    class Meta:
        ordered = True
        unknown = EXCLUDE
class XMLResourceSchema(ResourceMixin, Schema):
    ext_ident = Str(data_key='extIdent',
                    validate=validate.Length(max=36),
                    required=True)
    int_ident = Int(data_key='intIdent')
    status = Str(data_key='@status',
                 validate=validate.OneOf(choices=['draft', 'published']))
    link = URL(data_key='url')
    title_pl = Str()
    title_en = Str()
    description_pl = Str()
    description_en = Str()
    availability = Str(validate=validate.OneOf(choices=['local', 'remote']))
    data_date = Date(data_key='dataDate')
    created = DateTime(data_key='created', allow_none=True)
    modified = DateTime(data_key='lastUpdateDate', allow_none=True)
    special_signs = List(Str())

    class Meta:
        ordered = True
        unknown = EXCLUDE

    @pre_load
    def prepare_data(self, data, **kwargs):
        if 'title' in data and isinstance(data.get('title'), dict):
            data['title_en'] = data['title'].get('english', '')
            data['title_pl'] = data['title'].get('polish', '')
        if 'description' in data and isinstance(data.get('description'), dict):
            data['description_en'] = data['description'].get('english', '')
            data['description_pl'] = data['description'].get('polish', '')
        data['availability'] = data.get('availability', 'local')
        special_signs = data.pop('specialSigns', {})
        if 'specialSign' in special_signs:
            data['special_signs'] = special_signs['specialSign']
        return data

    @validates_schema
    def validate_int_ident(self, data, **kwargs):
        int_ident = data.get('int_ident')
        dataset_int_ident = self.context.get('dataset_int_ident')
        organization = self.context['organization']
        if int_ident and not dataset_int_ident:
            raise ValidationError(
                _('intIdent value for related dataset is also required!'),
                field_name='int_ident')
        if int_ident and dataset_int_ident and organization and not Resource.raw.filter(
                id=int_ident,
                dataset_id=dataset_int_ident,
                dataset__organization=organization).exists():
            msg = _(
                'Resource with id: %(r_id)s, dataset\'s id: %(d_id)s and institution "%(ins)s" was not found.'
            ) % {
                'r_id': int_ident,
                'd_id': dataset_int_ident,
                "ins": organization.title
            }
            raise ValidationError(msg, field_name='int_ident')
Пример #4
0
 class PostArgs(Schema):
     ids = List(String(),
                required=True,
                description='A list of tags identifiers.')
     org = String(
         description='The name of an existing organization in the DB. '
         'If not set, the default organization is used.')
     provider = URL(
         description=
         'The Base URL of the provider. By default is this Devicehub.')
class AuthoritySchema(BaseSchema):
    name = String(required=True, validate=validate.Length(max=200))
    service = String(validate=validate.Length(max=200))
    email = Email()
    contact_name = String(data_key='contactName',
                          validate=validate.Length(max=200))
    site_url = URL(data_key='siteURL')
    phone = String(validate=validate.Length(max=200))
    purpose = String(required=True)
    interval_before = String(data_key='intervalBefore',
                             validate=validate_duration)
class CategorySchema(Schema):
    description = Str()
    display_name = Str()
    uuid = UUID()
    image_url = URL(data_key='image_display_url')
    name = Str()
    title = Str()

    class Meta:
        fields = ('title', 'description', 'uuid', 'image_url')
        unknown = EXCLUDE
class DataSourceSerializer(Schema):
    source_type = Str()
    url = URL()
    title = Str()
    last_import_timestamp = DateTime()
    update_frequency = Method('get_update_frequency')

    def get_update_frequency(self, obj):
        translations = {}
        for lang in settings.MODELTRANS_AVAILABLE_LANGUAGES:
            with override(lang):
                translations[lang] = str(obj.get_frequency_in_days_display())
        return translations
Пример #8
0
class RightsMixin:
    """License rights mixin."""
    def __init__(self, **kwargs):
        """Rights schema constructor."""
        super().__init__(fail_on_unknown=False,
                         identifier_required=False,
                         **kwargs)

    class RightsRelated(StrictKeysMixin):
        """Related rights URI."""

        uri = URL()

    icon = URL()
    related = Nested(RightsRelated)
class ResourceSchema(ResourceMixin, Schema):
    mimetype = Str(allow_none=True)
    cache_last_updated = Str(allow_none=True)
    cache_url = Str(allow_none=True)
    created = DateTime()
    description = Str()
    hash = Str()
    ext_ident = Str(data_key='id', validate=validate.Length(max=36))
    modified = DateTime(data_key='last_modified', allow_none=True)
    mimetype_inner = Str(allow_none=True)
    title = Str(data_key='name')
    format = Str()
    link = URL(data_key='url')
    datastore_active = Bool()
    package_id = UUID()
    position = Int()
    resource_type = Str(allow_none=True)
    revision_id = UUID()
    size = Str(allow_none=True)
    state = Str()
    url_type = Str(allow_none=True)

    class Meta:
        fields = ('created', 'modified', 'ext_ident', 'title', 'description',
                  'link', 'format')
        unknown = EXCLUDE

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # TODO: Does it makes sense to validate format here? Disabled for now.
        self.format_validation = False

    @validates_schema
    def validate_format(self, data, **kwargs):
        value = data.get('format')
        if self.format_validation and value and value not in SUPPORTED_RESOURCE_FORMATS:
            error = _('Unsupported format: %(format)s.') % {'format': value}
            raise ValidationError(error, field_name='format')

    @pre_load
    def prepare_data(self, data, **kwargs):
        if 'format' in data:
            value = data['format'].lower()
            if value not in SUPPORTED_RESOURCE_FORMATS:
                value = ''
            data['format'] = value
        return data
class DatasetSchema(Schema):
    author = Str()
    author_email = Str()
    creator_user_id = UUID()
    extras = Nested(ExtraSchema, many=True)
    groups = Nested(CategorySchema, many=True)
    license_id = Str()
    license_title = Str()
    license_url = URL()
    maintainer = Str()
    maintainer_email = Str()
    created = DateTime(data_key='metadata_created')
    modified = DateTime(data_key='metadata_modified', allow_none=True)
    slug = Str(data_key='name')
    notes = Str()
    num_resources = Int()
    num_tags = Int()
    ext_ident = Str(data_key='id', validate=validate.Length(max=36))
    isopen = Bool()
    organization = Nested(OrganizationSchema, many=False)
    owner_org = UUID()
    private = Bool()
    relationships_as_object = Nested(RelationshipObjectSchema, many=True)
    relationships_as_subject = Nested(RelationshipSubjectSchema, many=True)
    resources = Nested(ResourceSchema, many=True)
    revision_id = UUID()
    status = Str(data_key='state')
    tags = Nested(TagSchema, many=True)
    title = Str()
    type = Str()
    url = Str()
    version = Str()

    class Meta:
        exclude = [
            'author', 'author_email', 'creator_user_id', 'extras', 'groups',
            'license_title', 'license_url', 'maintainer', 'maintainer_email',
            'num_resources', 'num_tags', 'isopen', 'owner_org', 'private',
            'relationships_as_object', 'relationships_as_subject',
            'revision_id', 'type', 'status', 'url', 'version'
        ]
        ordered = True
        unknown = EXCLUDE
Пример #11
0
class RelatedItemSchema(StrictKeysMixin):
    itemTitle = SanitizedUnicode(required=True)
    itemDOI = DOI()
    itemISBN = List(ISBN())
    itemISSN = List(ISSN())
    itemURL = URL()
    itemYear = Year()
    itemVolume = SanitizedUnicode()
    itemIssue = SanitizedUnicode()
    itemStartPage = SanitizedUnicode()
    itemEndPage = SanitizedUnicode()
    itemRelationType = TaxonomyField(mixins=[TitledMixin], required=True)

    @validates_schema
    def required_journal(self, data, **kwargs):
        if data.get("ISSN") or data.get("itemVolume") or data.get(
                "itemIssue") or data.get("itemStartPage") or data.get(
                    "itemEndPage"):
            journal_keys = [
                "itemVolume", "itemIssue", "itemStartPage", "itemEndPage"
            ]
            for key in data.keys():
                if key in journal_keys:
                    journal_keys.pop(journal_keys.index(key))
            if len(journal_keys) > 0:
                raise ValidationError(
                    f"Required field(s) is/are missing: {journal_keys}")

    @validates_schema
    def validate_pages(self, data, **kwargs):
        start_page = data.get("itemStartPage")
        end_page = data.get("itemEndPage")
        if start_page and end_page:
            if int(start_page) > int(end_page):
                raise ValidationError(
                    f"Start page ({start_page}) must be smaller than end page ({end_page})"
                )
Пример #12
0
class TopicSchema(Schema):
    """Marshmallow schema for topics."""

    topic_id36 = ID36()
    title = SimpleString(max_length=TITLE_MAX_LENGTH)
    topic_type = Enum(dump_only=True)
    markdown = Markdown(allow_none=True)
    rendered_html = String(dump_only=True)
    link = URL(schemes={"http", "https"}, allow_none=True)
    created_time = DateTime(dump_only=True)
    tags = List(String())

    user = Nested(UserSchema, dump_only=True)
    group = Nested(GroupSchema, dump_only=True)

    @pre_load
    def prepare_title(self, data: dict, many: bool, partial: Any) -> dict:
        """Prepare the title before it's validated."""
        # pylint: disable=unused-argument
        if "title" not in data:
            return data

        new_data = data.copy()

        split_title = re.split("[.?!]+", new_data["title"])

        # the last string in the list will be empty if it ended with punctuation
        num_sentences = len([piece for piece in split_title if piece])

        # strip trailing periods off single-sentence titles
        if num_sentences == 1:
            new_data["title"] = new_data["title"].rstrip(".")

        return new_data

    @pre_load
    def prepare_tags(self, data: dict, many: bool, partial: Any) -> dict:
        """Prepare the tags before they're validated."""
        # pylint: disable=unused-argument
        if "tags" not in data:
            return data

        new_data = data.copy()

        tags: list[str] = []

        for tag in new_data["tags"]:
            tag = tag.lower()

            # replace underscores with spaces
            tag = tag.replace("_", " ")

            # remove any consecutive spaces
            tag = re.sub(" {2,}", " ", tag)

            # remove any leading/trailing spaces
            tag = tag.strip(" ")

            # drop any empty tags
            if not tag or tag.isspace():
                continue

            # handle synonyms
            for name, synonyms in TAG_SYNONYMS.items():
                if tag in synonyms:
                    tag = name

            # skip any duplicate tags
            if tag in tags:
                continue

            tags.append(tag)

        new_data["tags"] = tags

        return new_data

    @validates("tags")
    def validate_tags(self, value: list[str]) -> None:
        """Validate the tags field, raising an error if an issue exists.

        Note that tags are validated by ensuring that each tag would be a valid group
        path. This is definitely mixing concerns, but it's deliberate in this case. It
        will allow for some interesting possibilities by ensuring naming "compatibility"
        between groups and tags. For example, a popular tag in a group could be
        converted into a sub-group easily.
        """
        group_schema = GroupSchema(partial=True)
        for tag in value:
            try:
                group_schema.load({"path": tag})
            except ValidationError as exc:
                raise ValidationError("Tag %s is invalid" % tag) from exc

    @pre_load
    def prepare_markdown(self, data: dict, many: bool, partial: Any) -> dict:
        """Prepare the markdown value before it's validated."""
        # pylint: disable=unused-argument
        if "markdown" not in data:
            return data

        new_data = data.copy()

        # if the value is empty, convert it to None
        if not new_data["markdown"] or new_data["markdown"].isspace():
            new_data["markdown"] = None

        return new_data

    @pre_load
    def prepare_link(self, data: dict, many: bool, partial: Any) -> dict:
        """Prepare the link value before it's validated."""
        # pylint: disable=unused-argument
        if "link" not in data:
            return data

        new_data = data.copy()

        # remove leading/trailing whitespace
        new_data["link"] = new_data["link"].strip()

        # if the value is empty, convert it to None
        if not new_data["link"]:
            new_data["link"] = None
            return new_data

        # prepend http:// to the link if it doesn't have a scheme
        parsed = urlparse(new_data["link"])
        if not parsed.scheme:
            new_data["link"] = "http://" + new_data["link"]

        # run the link through the url-transformation process
        new_data["link"] = apply_url_transformations(new_data["link"])

        return new_data

    @validates_schema
    def link_or_markdown(self, data: dict, many: bool, partial: Any) -> None:
        """Fail validation unless at least one of link or markdown were set."""
        # pylint: disable=unused-argument
        if "link" not in data and "markdown" not in data:
            return

        link = data.get("link")
        markdown = data.get("markdown")

        if not (markdown or link):
            raise ValidationError("Topics must have either markdown or a link.")
Пример #13
0
class RightsRelated(StrictKeysMixin):
    uri = URL()
class XMLDatasetSchema(Schema):
    ext_ident = Str(data_key='extIdent',
                    validate=validate.Length(max=36),
                    required=True)
    int_ident = Int(data_key='intIdent')
    status = Str(data_key='@status',
                 validate=validate.OneOf(choices=['draft', 'published']))
    title_pl = Str()
    title_en = Str()
    notes_pl = Str()
    notes_en = Str()
    url = URL(allow_none=True)
    update_frequency = Str(data_key='updateFrequency')
    license = Str()
    license_chosen = Int(allow_none=True)
    license_condition_db_or_copyrighted = Str(allow_none=True)
    license_condition_modification = Bool(allow_none=True)
    license_condition_personal_data = Str(allow_none=True)
    license_condition_responsibilities = Str(allow_none=True)
    license_condition_source = Bool(allow_none=True)
    created = DateTime(data_key='created', allow_none=True)
    modified = DateTime(data_key='lastUpdateDate', allow_none=True)
    categories = List(Str())
    resources = Nested(XMLResourceSchema, many=True)
    tags = Nested(XMLTagSchema, many=True)

    class Meta:
        ordered = True
        unknown = EXCLUDE

    @pre_load
    def prepare_data(self, data, **kwargs):
        if 'title' in data and isinstance(data.get('title'), dict):
            data['title_en'] = data['title'].get('english', '')
            data['title_pl'] = data['title'].get('polish', '')
        if 'conditions' in data:
            data['license_condition_source'] = data['conditions'].get(
                'source', False)
            data['license_condition_modification'] = data['conditions'].get(
                'modification', False)
            data['license_condition_responsibilities'] = data[
                'conditions'].get('responsibilities')
            data['license_condition_db_or_copyrighted'] = data[
                'conditions'].get('dbOrCopyrighted')
            license_text_to_num = dict(
                (row[1], row[0]) for row in Dataset.LICENSES)
            license_chosen_text = data['conditions'].get(
                'dbOrCopyrightedLicenseChosen')
            data['license_chosen'] = license_text_to_num.get(
                license_chosen_text)
            data['license_condition_personal_data'] = data['conditions'].get(
                'personalData')
        if 'description' in data and isinstance(data.get('description'), dict):
            data['notes_en'] = data['description'].get('english', '')
            data['notes_pl'] = data['description'].get('polish', '')
        if 'tags' in data:
            data['tags'] = data['tags'].get('tag', [])
        if 'categories' in data:
            if 'category' in data['categories']:  # XSD SCHEMA >= 1.1
                data['categories'] = data['categories']['category']
            else:
                data['categories'] = [str(row) for row in data['categories']]
        if 'resources' in data:
            data['resources'] = data['resources'].get('resource', [])
        int_ident = data.get('intIdent')
        if int_ident:
            self.context['dataset_int_ident'] = int_ident
        return data

    @validates_schema
    def validate_int_ident(self, data, **kwargs):
        int_ident = data.get('int_ident')
        organization = self.context['organization']
        if int_ident and organization and not Dataset.raw.filter(
                id=int_ident, organization=organization).exists():
            msg = _(
                'Dataset with id: %(d_id)s and institution: "%(ins)s" was not found.'
            ) % {
                'd_id': int_ident,
                'ins': organization.title
            }
            raise ValidationError(msg, field_name='int_ident')

    @validates_schema
    def validate_license_condition_personal_data(self, data, **kwargs):
        field_name = 'license_condition_personal_data'
        if data.get(field_name):
            raise ValidationError(
                message=
                _('Chosen conditions for re-use mean that they contain personal data. '
                  'Please contact the administrator at [email protected].'),
                field_name=field_name,
            )

    @validates_schema
    def validate_license_condition_db_or_copyrighted(self, data, **kwargs):
        field_name = 'license_condition_db_or_copyrighted'
        if data.get(field_name) and not data.get('license_chosen'):
            raise ValidationError(
                message=
                _("Field 'dbOrCopyrightedLicenseChosen' is required if field 'dbOrCopyrighted' is provided."
                  ),
                field_name=field_name,
            )

    @validates_schema
    def validate_license_chosen(self, data, **kwargs):
        field_name = 'license_chosen'
        if data.get(field_name
                    ) and not data.get('license_condition_db_or_copyrighted'):
            raise ValidationError(
                message=
                _("Field 'dbOrCopyrighted' is required if field 'dbOrCopyrightedLicenseChosen' is provided."
                  ),
                field_name=field_name,
            )
Пример #15
0
class TopicSchema(Schema):
    """Marshmallow schema for topics."""

    topic_id36 = ID36()
    title = SimpleString(max_length=TITLE_MAX_LENGTH)
    topic_type = Enum(dump_only=True)
    markdown = Markdown(allow_none=True)
    rendered_html = String(dump_only=True)
    link = URL(schemes={'http', 'https'}, allow_none=True)
    created_time = DateTime(dump_only=True)
    tags = List(Ltree())

    user = Nested(UserSchema, dump_only=True)
    group = Nested(GroupSchema, dump_only=True)

    @pre_load
    def prepare_tags(self, data: dict) -> dict:
        """Prepare the tags before they're validated."""
        if 'tags' not in data:
            return data

        tags: typing.List[str] = []

        for tag in data['tags']:
            tag = tag.lower()

            # replace spaces with underscores
            tag = tag.replace(' ', '_')

            # remove any consecutive underscores
            tag = re.sub('_{2,}', '_', tag)

            # remove any leading/trailing underscores
            tag = tag.strip('_')

            # drop any empty tags
            if not tag or tag.isspace():
                continue

            # skip any duplicate tags
            if tag in tags:
                continue

            tags.append(tag)

        data['tags'] = tags

        return data

    @validates('tags')
    def validate_tags(
        self,
        value: typing.List[sqlalchemy_utils.Ltree],
    ) -> None:
        """Validate the tags field, raising an error if an issue exists.

        Note that tags are validated by ensuring that each tag would be a valid
        group path. This is definitely mixing concerns, but it's deliberate in
        this case. It will allow for some interesting possibilities by ensuring
        naming "compatibility" between groups and tags. For example, a popular
        tag in a group could be converted into a sub-group easily.
        """
        group_schema = GroupSchema(partial=True)
        for tag in value:
            try:
                group_schema.validate({'path': tag})
            except ValidationError:
                raise ValidationError('Tag %s is invalid' % tag)

    @pre_load
    def prepare_markdown(self, data: dict) -> dict:
        """Prepare the markdown value before it's validated."""
        if 'markdown' not in data:
            return data

        # if the value is empty, convert it to None
        if not data['markdown'] or data['markdown'].isspace():
            data['markdown'] = None

        return data

    @pre_load
    def prepare_link(self, data: dict) -> dict:
        """Prepare the link value before it's validated."""
        if 'link' not in data:
            return data

        # if the value is empty, convert it to None
        if not data['link'] or data['link'].isspace():
            data['link'] = None
            return data

        # prepend http:// to the link if it doesn't have a scheme
        parsed = urlparse(data['link'])
        if not parsed.scheme:
            data['link'] = 'http://' + data['link']

        return data

    @validates_schema
    def link_or_markdown(self, data: dict) -> None:
        """Fail validation unless at least one of link or markdown were set."""
        if 'link' not in data and 'markdown' not in data:
            return

        link = data.get('link')
        markdown = data.get('markdown')

        if not (markdown or link):
            raise ValidationError(
                'Topics must have either markdown or a link.')

    class Meta:
        """Always use strict checking so error handlers are invoked."""

        strict = True
Пример #16
0
    class RightsRelated(StrictKeysMixin):
        """Related rights URI."""

        uri = URL()
Пример #17
0
class TopicSchema(Schema):
    """Marshmallow schema for topics."""

    topic_id36 = ID36()
    title = SimpleString(max_length=TITLE_MAX_LENGTH)
    topic_type = Enum(dump_only=True)
    markdown = Markdown(allow_none=True)
    rendered_html = String(dump_only=True)
    link = URL(schemes={"http", "https"}, allow_none=True)
    created_time = DateTime(dump_only=True)
    tags = List(Ltree())

    user = Nested(UserSchema, dump_only=True)
    group = Nested(GroupSchema, dump_only=True)

    @pre_load
    def prepare_tags(self, data: dict) -> dict:
        """Prepare the tags before they're validated."""
        if "tags" not in data:
            return data

        tags: typing.List[str] = []

        for tag in data["tags"]:
            tag = tag.lower()

            # replace spaces with underscores
            tag = tag.replace(" ", "_")

            # remove any consecutive underscores
            tag = re.sub("_{2,}", "_", tag)

            # remove any leading/trailing underscores
            tag = tag.strip("_")

            # drop any empty tags
            if not tag or tag.isspace():
                continue

            # handle synonyms
            for name, synonyms in TAG_SYNONYMS.items():
                if tag in synonyms:
                    tag = name

            # skip any duplicate tags
            if tag in tags:
                continue

            tags.append(tag)

        data["tags"] = tags

        return data

    @validates("tags")
    def validate_tags(self, value: typing.List[sqlalchemy_utils.Ltree]) -> None:
        """Validate the tags field, raising an error if an issue exists.

        Note that tags are validated by ensuring that each tag would be a valid group
        path. This is definitely mixing concerns, but it's deliberate in this case. It
        will allow for some interesting possibilities by ensuring naming "compatibility"
        between groups and tags. For example, a popular tag in a group could be
        converted into a sub-group easily.
        """
        group_schema = GroupSchema(partial=True)
        for tag in value:
            try:
                group_schema.validate({"path": str(tag)})
            except ValidationError:
                raise ValidationError("Tag %s is invalid" % tag)

    @pre_load
    def prepare_markdown(self, data: dict) -> dict:
        """Prepare the markdown value before it's validated."""
        if "markdown" not in data:
            return data

        # if the value is empty, convert it to None
        if not data["markdown"] or data["markdown"].isspace():
            data["markdown"] = None

        return data

    @pre_load
    def prepare_link(self, data: dict) -> dict:
        """Prepare the link value before it's validated."""
        if "link" not in data:
            return data

        # if the value is empty, convert it to None
        if not data["link"] or data["link"].isspace():
            data["link"] = None
            return data

        # prepend http:// to the link if it doesn't have a scheme
        parsed = urlparse(data["link"])
        if not parsed.scheme:
            data["link"] = "http://" + data["link"]

        # run the link through the url-transformation process
        data["link"] = apply_url_transformations(data["link"])

        return data

    @validates_schema
    def link_or_markdown(self, data: dict) -> None:
        """Fail validation unless at least one of link or markdown were set."""
        if "link" not in data and "markdown" not in data:
            return

        link = data.get("link")
        markdown = data.get("markdown")

        if not (markdown or link):
            raise ValidationError("Topics must have either markdown or a link.")

    class Meta:
        """Always use strict checking so error handlers are invoked."""

        strict = True