Пример #1
0
class PeriodicTask(BasePeriodicTask):
    last_run_id = db.StringField()

    class Interval(BasePeriodicTask.Interval):
        def __str__(self):
            if self.every == 1:
                return _('every {0.period_singular}').format(self)
            return _('every {0.every} {0.period}').format(self)

    class Crontab(BasePeriodicTask.Crontab):
        def __str__(self):
            return CRON.format(**self._data)

        @classmethod
        def parse(cls, cron):
            m, h, d, M, W = cron.split()
            return cls(
                minute=m,
                hour=h,
                day_of_month=d,
                month_of_year=M,
                day_of_week=W,
            )

    @property
    def schedule_display(self):
        if self.interval:
            return str(self.interval)
        elif self.crontab:
            return str(self.crontab)
        else:
            raise Exception("must define internal or crontab schedule")

    interval = db.EmbeddedDocumentField(Interval)
    crontab = db.EmbeddedDocumentField(Crontab)
Пример #2
0
class HarvestJob(db.Document):
    '''Keep track of harvestings'''
    created = db.DateTimeField(default=datetime.now, required=True)
    started = db.DateTimeField()
    ended = db.DateTimeField()
    status = db.StringField(choices=HARVEST_JOB_STATUS.keys(),
                            default=DEFAULT_HARVEST_JOB_STATUS, required=True)
    errors = db.ListField(db.EmbeddedDocumentField(HarvestError))
    items = db.ListField(db.EmbeddedDocumentField(HarvestItem))
    source = db.ReferenceField(HarvestSource, reverse_delete_rule=db.NULLIFY)
Пример #3
0
class Fake(db.Document):
    title = db.StringField()
    description = db.StringField()
    tags = db.ListField(db.StringField())
    other = db.ListField(db.StringField())
    nested = db.ListField(db.EmbeddedDocumentField(NestedFake))
    sub = db.EmbeddedDocumentField(NestedFake)
    metrics = db.DictField()

    def __str__(self):
        return 'fake'
Пример #4
0
class Discussion(db.Document):
    user = db.ReferenceField('User')
    subject = db.GenericReferenceField()
    title = db.StringField(required=True)
    discussion = db.ListField(db.EmbeddedDocumentField(Message))
    created = db.DateTimeField(default=datetime.now, required=True)
    closed = db.DateTimeField()
    closed_by = db.ReferenceField('User')
    extras = db.ExtrasField()

    meta = {
        'indexes': [
            'user',
            'subject',
            '-created'
        ],
        'ordering': ['-created'],
    }

    def person_involved(self, person):
        """Return True if the given person has been involved in the

        discussion, False otherwise.
        """
        return any(message.posted_by == person for message in self.discussion)

    @property
    def external_url(self):
        return self.subject.url_for(
            _anchor='discussion-{id}'.format(id=self.id),
            _external=True)
Пример #5
0
class Resource(WithMetrics, db.EmbeddedDocument):
    id = db.AutoUUIDField()
    title = db.StringField(verbose_name="Title", required=True)
    description = db.StringField()
    type = db.StringField(choices=RESOURCE_TYPES.keys(),
                          default='file',
                          required=True)
    url = db.StringField()
    urlhash = db.StringField()
    checksum = db.EmbeddedDocumentField(Checksum)
    format = db.StringField()
    mime = db.StringField()
    size = db.IntField()
    owner = db.ReferenceField('User')

    created_at = db.DateTimeField(default=datetime.datetime.now, required=True)
    modified = db.DateTimeField(default=datetime.datetime.now, required=True)
    published = db.DateTimeField(default=datetime.datetime.now, required=True)
    deleted = db.DateTimeField()

    on_added = Signal()
    on_deleted = Signal()

    def clean(self):
        super(Resource, self).clean()
        if not self.urlhash or 'url' in self._get_changed_fields():
            self.urlhash = hash_url(self.url)
Пример #6
0
class Site(WithMetrics, db.Document):
    id = db.StringField(primary_key=True)
    title = db.StringField(required=True)
    keywords = db.ListField(db.StringField())
    feed_size = db.IntField(required=True, default=DEFAULT_FEED_SIZE)
    configs = db.DictField()
    themes = db.DictField()
    settings = db.EmbeddedDocumentField(SiteSettings, default=SiteSettings)
Пример #7
0
class HarvestJob(db.Document):
    '''Keep track of harvestings'''
    created = db.DateTimeField(default=datetime.now, required=True)
    started = db.DateTimeField()
    ended = db.DateTimeField()
    status = db.StringField(choices=HARVEST_JOB_STATUS.keys(),
                            default=DEFAULT_HARVEST_JOB_STATUS,
                            required=True)
    errors = db.ListField(db.EmbeddedDocumentField(HarvestError))
    items = db.ListField(db.EmbeddedDocumentField(HarvestItem))
    source = db.ReferenceField(HarvestSource, reverse_delete_rule=db.CASCADE)
    data = db.DictField()

    meta = {
        'indexes': ['-created', 'source', ('source', '-created')],
        'ordering': ['-created'],
    }
Пример #8
0
class HarvestSource(db.Owned, db.Document):
    name = db.StringField(max_length=255)
    slug = db.SlugField(max_length=255,
                        required=True,
                        unique=True,
                        populate_from='name',
                        update=True)
    description = db.StringField()
    url = db.StringField(required=True)
    backend = db.StringField()
    config = db.DictField()
    periodic_task = db.ReferenceField('PeriodicTask',
                                      reverse_delete_rule=db.NULLIFY)
    created_at = db.DateTimeField(default=datetime.now, required=True)
    frequency = db.StringField(choices=HARVEST_FREQUENCIES.keys(),
                               default=DEFAULT_HARVEST_FREQUENCY,
                               required=True)
    active = db.BooleanField(default=True)
    validation = db.EmbeddedDocumentField(HarvestSourceValidation,
                                          default=HarvestSourceValidation)

    deleted = db.DateTimeField()

    @property
    def domain(self):
        parsed = urlparse(self.url)
        return parsed.netloc.split(':')[0]

    @classmethod
    def get(cls, ident):
        return cls.objects(slug=ident).first() or cls.objects.get(pk=ident)

    def get_last_job(self):
        return HarvestJob.objects(source=self).order_by('-created').first()

    @cached_property
    def last_job(self):
        return self.get_last_job()

    @property
    def schedule(self):
        if not self.periodic_task:
            return
        return self.periodic_task.schedule_display

    meta = {
        'indexes': [
            '-created_at',
            'slug',
            ('deleted', '-created_at'),
        ] + db.Owned.meta['indexes'],
        'ordering': ['-created_at'],
        'queryset_class':
        HarvestSourceQuerySet,
    }

    def __unicode__(self):
        return self.name or ''
Пример #9
0
class Fake(db.Document):
    title = db.StringField()
    description = db.StringField()
    tags = db.ListField(db.StringField())
    other = db.ListField(db.StringField())
    nested = db.ListField(db.EmbeddedDocumentField(NestedFake))
    sub = db.EmbeddedDocumentField(NestedFake)
    metrics = db.DictField()

    __metrics_keys__ = [
        'fake-metric-int',
        'fake-metric-float',
    ]

    def __str__(self):
        return 'fake'

    def get_metrics(self):
        return self.metrics
Пример #10
0
class HarvestItem(db.EmbeddedDocument):
    remote_id = db.StringField()
    dataset = db.ReferenceField(Dataset)
    status = db.StringField(choices=HARVEST_ITEM_STATUS.keys(),
                            default=DEFAULT_HARVEST_ITEM_STATUS, required=True)
    created = db.DateTimeField(default=datetime.now, required=True)
    started = db.DateTimeField()
    ended = db.DateTimeField()
    errors = db.ListField(db.EmbeddedDocumentField(HarvestError))
    args = db.ListField(db.StringField())
    kwargs = db.DictField()
Пример #11
0
class Issue(db.Document):
    user = db.ReferenceField('User')
    subject = db.GenericReferenceField()
    title = db.StringField(required=True)
    discussion = db.ListField(db.EmbeddedDocumentField(Message))
    created = db.DateTimeField(default=datetime.now, required=True)
    closed = db.DateTimeField()
    closed_by = db.ReferenceField('User')

    meta = {
        'indexes': ['user', 'subject', '-created'],
        'ordering': ['-created'],
    }
Пример #12
0
class Discussion(db.Document):
    user = db.ReferenceField('User')
    subject = db.ReferenceField(db.DomainModel)
    title = db.StringField(required=True)
    discussion = db.ListField(db.EmbeddedDocumentField(Message))
    created = db.DateTimeField(default=datetime.now, required=True)
    closed = db.DateTimeField()
    closed_by = db.ReferenceField('User')

    meta = {
        'indexes': ['user', 'subject', 'created'],
        'allow_inheritance': True,
        'ordering': ['created'],
    }
Пример #13
0
class PeriodicTask(BasePeriodicTask):
    last_run_id = db.StringField()

    class Interval(BasePeriodicTask.Interval):
        def __unicode__(self):
            if self.every == 1:
                return _('every {0.period_singular}').format(self)
            return _('every {0.every} {0.period}').format(self)

    class Crontab(BasePeriodicTask.Crontab):
        def __unicode__(self):
            return CRON.format(**self._data)

    @property
    def schedule_display(self):
        if self.interval:
            return str(self.interval)
        elif self.crontab:
            return str(self.crontab)
        else:
            raise Exception("must define internal or crontab schedule")

    interval = db.EmbeddedDocumentField(Interval)
    crontab = db.EmbeddedDocumentField(Crontab)
Пример #14
0
class ResourceMixin(object):
    id = db.AutoUUIDField(primary_key=True)
    title = db.StringField(verbose_name="Title", required=True)
    description = db.StringField()
    filetype = db.StringField(choices=RESOURCE_TYPES.keys(),
                              default='file',
                              required=True)
    url = db.StringField()
    urlhash = db.StringField()
    checksum = db.EmbeddedDocumentField(Checksum)
    format = db.StringField()
    mime = db.StringField()
    filesize = db.IntField()  # `size` is a reserved keyword for mongoengine.

    created_at = db.DateTimeField(default=datetime.now, required=True)
    modified = db.DateTimeField(default=datetime.now, required=True)
    published = db.DateTimeField(default=datetime.now, required=True)
    deleted = db.DateTimeField()

    def clean(self):
        super(ResourceMixin, self).clean()
        if not self.urlhash or 'url' in self._get_changed_fields():
            self.urlhash = hash_url(self.url)

    @property
    def closed_format(self):
        """Return True if the specified format is in CLOSED_FORMATS."""
        return self.format.lower() in CLOSED_FORMATS

    def check_availability(self, group):
        """Check if a resource is reachable against a Croquemort server.

        Return a boolean.
        """
        if self.filetype == 'remote':
            # We perform a quick check for performances matters.
            error, response = check_url_from_cache(self.url, group)
            if error or int(response.get('status', 500)) >= 500:
                return False
            else:
                return True
        else:
            return True  # We consider that API cases (types) are OK.

    @property
    def is_available(self):
        return self.check_availability(group=None)
Пример #15
0
class Discussion(db.Document):
    user = db.ReferenceField('User')
    subject = db.ReferenceField(db.DomainModel)
    title = db.StringField(required=True)
    discussion = db.ListField(db.EmbeddedDocumentField(Message))
    created = db.DateTimeField(default=datetime.now, required=True)
    closed = db.DateTimeField()
    closed_by = db.ReferenceField('User')

    meta = {
        'indexes': ['user', 'subject', 'created'],
        'allow_inheritance': True,
        'ordering': ['created'],
    }

    def person_involved(self, person):
        """Return True if the given person has been involved in the

        discussion, False otherwise.
        """
        return any(message.posted_by == person for message in self.discussion)
Пример #16
0
class SpatialCoverage(db.EmbeddedDocument):
    '''Represent a spatial coverage as a list of territories and/or a geometry'''
    geom = db.MultiPolygonField()
    territories = db.ListField(db.EmbeddedDocumentField(TerritoryReference))
    granularity = db.StringField(choices=SPATIAL_GRANULARITIES.keys(), default='other')

    @property
    def granularity_label(self):
        return SPATIAL_GRANULARITIES[self.granularity or 'other']

    @property
    def top_label(self):
        if not self.territories:
            return None
        top = None
        for territory in self.territories:
            if not top:
                top = territory
                continue
            if LEVELS[territory.level]['position'] < LEVELS[top.level]['position']:
                top = territory
        return top.name
Пример #17
0
class Issue(db.Document):
    user = db.ReferenceField('User')
    subject = db.ReferenceField(db.DomainModel)
    type = db.StringField(choices=ISSUE_TYPES.keys())

    discussion = db.ListField(db.EmbeddedDocumentField(Message))

    created = db.DateTimeField(default=datetime.now, required=True)
    closed = db.DateTimeField()
    closed_by = db.ReferenceField('User')

    meta = {
        'indexes': ['user', 'subject', 'created'],
        'allow_inheritance': True,
    }

    @property
    def type_label(self):
        return ISSUE_TYPES[self.type]

    @property
    def description(self):
        return self.discussion[0].content
Пример #18
0
class ResourceMixin(object):
    id = db.AutoUUIDField(primary_key=True)
    title = db.StringField(verbose_name="Title", required=True)
    description = db.StringField()
    filetype = db.StringField(
        choices=RESOURCE_FILETYPES.keys(), default='file', required=True)
    type = db.StringField(
        choices=RESOURCE_TYPES.keys(), default='main', required=True)
    url = db.URLField(required=True)
    urlhash = db.StringField()
    checksum = db.EmbeddedDocumentField(Checksum)
    format = db.StringField()
    mime = db.StringField()
    filesize = db.IntField()  # `size` is a reserved keyword for mongoengine.
    extras = db.ExtrasField()

    created_at = db.DateTimeField(default=datetime.now, required=True)
    modified = db.DateTimeField(default=datetime.now, required=True)
    published = db.DateTimeField(default=datetime.now, required=True)
    deleted = db.DateTimeField()

    def clean(self):
        super(ResourceMixin, self).clean()
        if not self.urlhash or 'url' in self._get_changed_fields():
            self.urlhash = hash_url(self.url)

    @cached_property  # Accessed at least 2 times in front rendering
    def preview_url(self):
        return get_preview_url(self)

    @property
    def closed_or_no_format(self):
        """
        Return True if the specified format is in CLOSED_FORMATS or
        no format has been specified.
        """
        return not self.format or self.format.lower() in CLOSED_FORMATS

    def check_availability(self):
        '''
        Return the check status from extras if any.

        NB: `unknown` will evaluate to True in the aggregate checks using
        `all([])` (dataset, organization, user).
        '''
        return self.extras.get('check:available', 'unknown')

    def need_check(self):
        '''Does the resource needs to be checked against its linkchecker?

        We check unavailable resources often, unless they go over the
        threshold. Available resources are checked less and less frequently
        based on their historical availability.
        '''
        min_cache_duration, max_cache_duration, ko_threshold = [
            current_app.config.get(k) for k in (
                'LINKCHECKING_MIN_CACHE_DURATION',
                'LINKCHECKING_MAX_CACHE_DURATION',
                'LINKCHECKING_UNAVAILABLE_THRESHOLD',
            )
        ]
        count_availability = self.extras.get('check:count-availability', 1)
        is_available = self.check_availability()
        if is_available == 'unknown':
            return True
        elif is_available or count_availability > ko_threshold:
            delta = min(min_cache_duration * count_availability,
                        max_cache_duration)
        else:
            delta = min_cache_duration
        if self.extras.get('check:date'):
            limit_date = datetime.now() - timedelta(minutes=delta)
            check_date = self.extras['check:date']
            if not isinstance(check_date, datetime):
                try:
                    check_date = parse_dt(check_date)
                except (ValueError, TypeError):
                    return True
            if check_date >= limit_date:
                return False
        return True

    @property
    def latest(self):
        '''
        Permanent link to the latest version of this resource.

        If this resource is updated and `url` changes, this property won't.
        '''
        return url_for('datasets.resource', id=self.id, _external=True)

    @cached_property
    def json_ld(self):

        result = {
            '@type': 'DataDownload',
            '@id': str(self.id),
            'url': self.latest,
            'name': self.title or _('Nameless resource'),
            'contentUrl': self.url,
            'dateCreated': self.created_at.isoformat(),
            'dateModified': self.modified.isoformat(),
            'datePublished': self.published.isoformat(),
            'extras': [get_json_ld_extra(*item)
                       for item in self.extras.items()],
        }

        if 'views' in self.metrics:
            result['interactionStatistic'] = {
                '@type': 'InteractionCounter',
                'interactionType': {
                    '@type': 'DownloadAction',
                },
                'userInteractionCount': self.metrics['views']
            }

        if self.format:
            result['encodingFormat'] = self.format

        if self.filesize:
            result['contentSize'] = self.filesize

        if self.mime:
            result['fileFormat'] = self.mime

        if self.description:
            result['description'] = mdstrip(self.description)

        return result
Пример #19
0
class Dataset(WithMetrics, BadgeMixin, db.Owned, db.Document):
    created_at = DateTimeField(verbose_name=_('Creation date'),
                               default=datetime.now, required=True)
    last_modified = DateTimeField(verbose_name=_('Last modification date'),
                                  default=datetime.now, required=True)
    title = db.StringField(required=True)
    acronym = db.StringField(max_length=128)
    slug = db.SlugField(max_length=255, required=True, populate_from='title',
                        update=True, follow=True)
    description = db.StringField(required=True, default='')
    license = db.ReferenceField('License')

    tags = db.TagListField()
    resources = db.ListField(db.EmbeddedDocumentField(Resource))

    private = db.BooleanField()
    frequency = db.StringField(choices=UPDATE_FREQUENCIES.keys())
    frequency_date = db.DateTimeField(verbose_name=_('Future date of update'))
    temporal_coverage = db.EmbeddedDocumentField(db.DateRange)
    spatial = db.EmbeddedDocumentField(SpatialCoverage)

    ext = db.MapField(db.GenericEmbeddedDocumentField())
    extras = db.ExtrasField()

    featured = db.BooleanField(required=True, default=False)

    deleted = db.DateTimeField()

    def __unicode__(self):
        return self.title or ''

    __badges__ = {
        PIVOTAL_DATA: _('Pivotal data'),
    }

    meta = {
        'indexes': [
            '-created_at',
            'slug',
            'resources.id',
            'resources.urlhash',
        ] + db.Owned.meta['indexes'],
        'ordering': ['-created_at'],
        'queryset_class': DatasetQuerySet,
    }

    before_save = signal('Dataset.before_save')
    after_save = signal('Dataset.after_save')
    on_create = signal('Dataset.on_create')
    on_update = signal('Dataset.on_update')
    before_delete = signal('Dataset.before_delete')
    after_delete = signal('Dataset.after_delete')
    on_delete = signal('Dataset.on_delete')
    on_resource_added = signal('Dataset.on_resource_added')

    verbose_name = _('dataset')

    @classmethod
    def pre_save(cls, sender, document, **kwargs):
        cls.before_save.send(document)

    @classmethod
    def post_save(cls, sender, document, **kwargs):
        if 'post_save' in kwargs.get('ignores', []):
            return
        cls.after_save.send(document)
        if kwargs.get('created'):
            cls.on_create.send(document)
        else:
            cls.on_update.send(document)
        if document.deleted:
            cls.on_delete.send(document)
        if kwargs.get('resource_added'):
            cls.on_resource_added.send(document,
                                       resource_id=kwargs['resource_added'])

    def clean(self):
        super(Dataset, self).clean()
        if self.frequency in LEGACY_FREQUENCIES:
            self.frequency = LEGACY_FREQUENCIES[self.frequency]

    def url_for(self, *args, **kwargs):
        return url_for('datasets.show', dataset=self, *args, **kwargs)

    display_url = property(url_for)

    @property
    def is_visible(self):
        return not self.is_hidden

    @property
    def is_hidden(self):
        return len(self.resources) == 0 or self.private or self.deleted

    @property
    def full_title(self):
        if not self.acronym:
            return self.title
        return '{title} ({acronym})'.format(**self._data)

    @property
    def external_url(self):
        return self.url_for(_external=True)

    @property
    def image_url(self):
        if self.organization:
            return self.organization.logo.url
        elif self.owner:
            return self.owner.avatar.url

    @property
    def frequency_label(self):
        return UPDATE_FREQUENCIES.get(self.frequency or 'unknown',
                                      UPDATE_FREQUENCIES['unknown'])

    def check_availability(self):
        """Check if resources from that dataset are available.

        Return a list of (boolean or 'unknown')
        """
        # Only check remote resources.
        remote_resources = [resource
                            for resource in self.resources
                            if resource.filetype == 'remote']
        if not remote_resources:
            return []
        return [resource.check_availability() for resource in remote_resources]

    @property
    def last_update(self):
        if self.resources:
            return max(resource.published for resource in self.resources)
        else:
            return self.last_modified

    @property
    def next_update(self):
        """Compute the next expected update date,

        given the frequency and last_update.
        Return None if the frequency is not handled.
        """
        delta = None
        if self.frequency == 'daily':
            delta = timedelta(days=1)
        elif self.frequency == 'weekly':
            delta = timedelta(weeks=1)
        elif self.frequency == 'fortnighly':
            delta = timedelta(weeks=2)
        elif self.frequency == 'monthly':
            delta = timedelta(weeks=4)
        elif self.frequency == 'bimonthly':
            delta = timedelta(weeks=4 * 2)
        elif self.frequency == 'quarterly':
            delta = timedelta(weeks=52 / 4)
        elif self.frequency == 'biannual':
            delta = timedelta(weeks=52 / 2)
        elif self.frequency == 'annual':
            delta = timedelta(weeks=52)
        elif self.frequency == 'biennial':
            delta = timedelta(weeks=52 * 2)
        elif self.frequency == 'triennial':
            delta = timedelta(weeks=52 * 3)
        elif self.frequency == 'quinquennial':
            delta = timedelta(weeks=52 * 5)
        if delta is None:
            return
        else:
            return self.last_update + delta

    @cached_property
    def quality(self):
        """Return a dict filled with metrics related to the inner

        quality of the dataset:

            * number of tags
            * description length
            * and so on
        """
        from udata.models import Discussion  # noqa: Prevent circular imports
        result = {}
        if not self.id:
            # Quality is only relevant on saved Datasets
            return result
        if self.next_update:
            result['frequency'] = self.frequency
            result['update_in'] = -(self.next_update - datetime.now()).days
        if self.tags:
            result['tags_count'] = len(self.tags)
        if self.description:
            result['description_length'] = len(self.description)
        if self.resources:
            result['has_resources'] = True
            result['has_only_closed_or_no_formats'] = all(
                resource.closed_or_no_format for resource in self.resources)
            result['has_unavailable_resources'] = not all(
                self.check_availability())
        discussions = Discussion.objects(subject=self)
        if discussions:
            result['discussions'] = len(discussions)
            result['has_untreated_discussions'] = not all(
                discussion.person_involved(self.owner)
                for discussion in discussions)
        result['score'] = self.compute_quality_score(result)
        return result

    def compute_quality_score(self, quality):
        """Compute the score related to the quality of that dataset."""
        score = 0
        UNIT = 2
        if 'frequency' in quality:
            # TODO: should be related to frequency.
            if quality['update_in'] < 0:
                score += UNIT
            else:
                score -= UNIT
        if 'tags_count' in quality:
            if quality['tags_count'] > 3:
                score += UNIT
        if 'description_length' in quality:
            if quality['description_length'] > 100:
                score += UNIT
        if 'has_resources' in quality:
            if quality['has_only_closed_or_no_formats']:
                score -= UNIT
            else:
                score += UNIT
            if quality['has_unavailable_resources']:
                score -= UNIT
            else:
                score += UNIT
        if 'discussions' in quality:
            if quality['has_untreated_discussions']:
                score -= UNIT
            else:
                score += UNIT
        if score < 0:
            return 0
        return score

    @classmethod
    def get(cls, id_or_slug):
        obj = cls.objects(slug=id_or_slug).first()
        return obj or cls.objects.get_or_404(id=id_or_slug)

    def add_resource(self, resource):
        '''Perform an atomic prepend for a new resource'''
        resource.validate()
        self.update(__raw__={
            '$push': {
                'resources': {
                    '$each': [resource.to_mongo()],
                    '$position': 0
                }
            }
        })
        self.reload()
        post_save.send(self.__class__, document=self,
                       resource_added=resource.id)

    def update_resource(self, resource):
        '''Perform an atomic update for an existing resource'''
        index = self.resources.index(resource)
        data = {
            'resources__{index}'.format(index=index): resource
        }
        self.update(**data)
        self.reload()
        post_save.send(self.__class__, document=self)

    @property
    def community_resources(self):
        return self.id and CommunityResource.objects.filter(dataset=self) or []

    @cached_property
    def json_ld(self):
        result = {
            '@context': 'http://schema.org',
            '@type': 'Dataset',
            '@id': str(self.id),
            'alternateName': self.slug,
            'dateCreated': self.created_at.isoformat(),
            'dateModified': self.last_modified.isoformat(),
            'url': url_for('datasets.show', dataset=self, _external=True),
            'name': self.title,
            'keywords': ','.join(self.tags),
            'distribution': [resource.json_ld for resource in self.resources],
            # Theses values are not standard
            'contributedDistribution': [
                resource.json_ld for resource in self.community_resources
            ],
            'extras': [get_json_ld_extra(*item)
                       for item in self.extras.items()],
        }

        if self.description:
            result['description'] = mdstrip(self.description)

        if self.license and self.license.url:
            result['license'] = self.license.url

        if self.organization:
            author = self.organization.json_ld
        elif self.owner:
            author = self.owner.json_ld
        else:
            author = None

        if author:
            result['author'] = author

        return result
Пример #20
0
class GeoZone(db.Document):
    id = db.StringField(primary_key=True)
    slug = db.StringField(required=True)
    name = db.StringField(required=True)
    level = db.StringField(required=True)
    code = db.StringField(required=True)
    geom = db.MultiPolygonField()
    parents = db.ListField()
    keys = db.DictField()
    validity = db.EmbeddedDocumentField(db.DateRange)
    ancestors = db.ListField()
    successors = db.ListField()
    population = db.IntField()
    area = db.FloatField()
    wikipedia = db.StringField()
    dbpedia = db.StringField()
    flag = db.ImageField(fs=logos)
    blazon = db.ImageField(fs=logos)
    logo = db.ImageField(fs=logos)

    meta = {
        'indexes': [
            'name',
            'parents',
            ('level', 'code'),
        ],
        'queryset_class': GeoZoneQuerySet
    }

    def __unicode__(self):
        return self.id

    __str__ = __unicode__

    def __html__(self):
        """In use within the admin."""
        return '{name} <i>({code})</i>'.format(name=gettext(self.name),
                                               code=self.code)

    def logo_url(self, external=False):
        flag_filename = self.flag.filename
        blazon_filename = self.blazon.filename
        if flag_filename and self.flag.fs.exists(flag_filename):
            return self.flag.fs.url(flag_filename, external=external)
        elif blazon_filename and self.blazon.fs.exists(blazon_filename):
            return self.blazon.fs.url(blazon_filename, external=external)
        else:
            return ''

    @property
    def keys_values(self):
        """Key values might be a list or not, always return a list."""
        keys_values = []
        for value in self.keys.values():
            if isinstance(value, list):
                keys_values += value
            elif not str(value).startswith('-'):  # Avoid -99.
                keys_values.append(value)
        return keys_values

    @cached_property
    def level_code(self):
        """Truncated level code for the sake of readability."""
        # Either 'region', 'departement' or 'commune',
        # useful to match TERRITORY_DATASETS keys.
        return self.id.split(':')[1]

    @cached_property
    def level_name(self):
        """Truncated level name for the sake of readability."""
        if self.level.startswith('fr:'):
            return self.level[3:]
        # Keep the whole level name as a fallback (e.g. `country:fr`)
        return self.level

    @cached_property
    def level_i18n_name(self):
        """In use within templates for dynamic translations."""
        for level, name in spatial_granularities:
            if self.level == level:
                return name
        return self.level_name  # Fallback that should never happen.

    @cached_property
    def ancestors_objects(self):
        """Ancestors objects sorted by name."""
        ancestors_objects = []
        for ancestor in self.ancestors:
            try:
                ancestor_object = GeoZone.objects.get(id=ancestor)
            except GeoZone.DoesNotExist:
                continue
            ancestors_objects.append(ancestor_object)
        ancestors_objects.sort(key=lambda a: a.name)
        return ancestors_objects

    @cached_property
    def child_level(self):
        """Return the child level given handled levels."""
        HANDLED_LEVELS = current_app.config.get('HANDLED_LEVELS')
        try:
            return HANDLED_LEVELS[HANDLED_LEVELS.index(self.level) - 1]
        except (IndexError, ValueError):
            return None

    @cached_property
    def parent_level(self):
        """Return the parent level given handled levels."""
        HANDLED_LEVELS = current_app.config.get('HANDLED_LEVELS')
        try:
            return HANDLED_LEVELS[HANDLED_LEVELS.index(self.level) + 1]
        except (IndexError, ValueError):
            return None

    @property
    def url(self):
        return url_for('territories.territory', territory=self)

    @property
    def external_url(self):
        return url_for('territories.territory', territory=self, _external=True)

    @cached_property
    def wikipedia_url(self):
        """Computed wikipedia URL from the DBpedia one."""
        return (self.dbpedia.replace('dbpedia',
                                     'wikipedia').replace('resource', 'wiki'))

    @cached_property
    def postal_string(self):
        """Return a list of postal codes separated by commas."""
        return ', '.join(self.keys.get('postal', []))

    @property
    def parents_objects(self):
        if self.parent_level:
            for parent in self.parents:
                if parent.startswith(self.parent_level):
                    yield GeoZone.objects.get(id=parent,
                                              level=self.parent_level)

    @cached_property
    def current_parent(self):
        today = date.today()
        for parent in self.parents_objects:
            if parent.valid_at(today):
                return parent

    @property
    def children(self):
        return (GeoZone.objects(level=self.child_level,
                                parents__in=[self.id]).order_by('name'))

    @property
    def biggest_children(self):
        return self.children.order_by('-population', '-area')[:10]

    @property
    def handled_level(self):
        return self.level in current_app.config.get('HANDLED_LEVELS')

    def valid_at(self, valid_date):
        if not self.validity:
            return True
        return self.validity.start <= valid_date <= self.validity.end

    def toGeoJSON(self):
        return {
            'id': self.id,
            'type': 'Feature',
            'geometry': self.geom,
            'properties': {
                'slug': self.slug,
                'name': gettext(self.name),
                'level': self.level,
                'code': self.code,
                'validity': self.validity,
                'parents': self.parents,
                'keys': self.keys,
                'population': self.population,
                'area': self.area,
                'logo': self.logo_url(external=True)
            }
        }
 class Fake(db.Document):
     daterange = db.EmbeddedDocumentField(db.DateRange)
Пример #22
0
class RequiredDateRangeTester(db.Document):
    temporal = db.EmbeddedDocumentField(db.DateRange, required=True)
Пример #23
0
class Dataset(WithMetrics, db.Datetimed, db.Document):
    title = db.StringField(max_length=255, required=True)
    slug = db.SlugField(max_length=255,
                        required=True,
                        populate_from='title',
                        update=True)
    description = db.StringField(required=True, default='')
    license = db.ReferenceField('License')

    tags = db.ListField(db.StringField())
    resources = db.ListField(db.EmbeddedDocumentField(Resource))
    community_resources = db.ListField(db.EmbeddedDocumentField(Resource))

    private = db.BooleanField()
    owner = db.ReferenceField('User', reverse_delete_rule=db.NULLIFY)
    organization = db.ReferenceField('Organization',
                                     reverse_delete_rule=db.NULLIFY)
    supplier = db.ReferenceField('Organization',
                                 reverse_delete_rule=db.NULLIFY)

    frequency = db.StringField(choices=UPDATE_FREQUENCIES.keys())
    temporal_coverage = db.EmbeddedDocumentField(db.DateRange)
    spatial = db.EmbeddedDocumentField(SpatialCoverage)

    ext = db.MapField(db.GenericEmbeddedDocumentField())
    extras = db.ExtrasField()

    featured = db.BooleanField(required=True, default=False)

    deleted = db.DateTimeField()

    def __str__(self):
        return self.title or ''

    __unicode__ = __str__

    meta = {
        'allow_inheritance':
        True,
        'indexes': [
            '-created_at',
            'slug',
            'organization',
            'supplier',
            'resources.id',
            'resources.urlhash',
        ],
        'ordering': ['-created_at'],
        'queryset_class':
        DatasetQuerySet,
    }

    before_save = Signal()
    after_save = Signal()
    on_create = Signal()
    on_update = Signal()
    before_delete = Signal()
    after_delete = Signal()
    on_delete = Signal()

    verbose_name = _('dataset')

    @classmethod
    def pre_save(cls, sender, document, **kwargs):
        cls.before_save.send(document)

    @classmethod
    def post_save(cls, sender, document, **kwargs):
        cls.after_save.send(document)
        if kwargs.get('created'):
            cls.on_create.send(document)
        else:
            cls.on_update.send(document)

    @property
    def display_url(self):
        return url_for('datasets.show', dataset=self)

    @property
    def external_url(self):
        return url_for('datasets.show', dataset=self, _external=True)

    @property
    def image_url(self):
        if self.organization:
            return self.organization.logo.url
        elif self.owner:
            return self.owner.avatar.url

    @property
    def frequency_label(self):
        return UPDATE_FREQUENCIES.get(self.frequency or 'unknown',
                                      UPDATE_FREQUENCIES['unknown'])

    @classmethod
    def get(cls, id_or_slug):
        obj = cls.objects(slug=id_or_slug).first()
        return obj or cls.objects.get_or_404(id=id_or_slug)

    def add_resource(self, resource):
        '''Perform an atomic prepend for a new resource'''
        self.update(
            __raw__={
                '$push': {
                    'resources': {
                        '$each': [resource.to_mongo()],
                        '$position': 0
                    }
                }
            })
        self.reload()

    def add_community_resource(self, resource):
        '''Perform an atomic prepend for a new resource'''
        self.update(
            __raw__={
                '$push': {
                    'community_resources': {
                        '$each': [resource.to_mongo()],
                        '$position': 0
                    }
                }
            })
        self.reload()
Пример #24
0
class Reuse(db.Datetimed, WithMetrics, BadgeMixin, db.Document):
    title = db.StringField(max_length=255, required=True)
    slug = db.SlugField(max_length=255,
                        required=True,
                        populate_from='title',
                        update=True)
    description = db.StringField(required=True)
    type = db.StringField(required=True, choices=REUSE_TYPES.keys())
    url = db.StringField(required=True)
    urlhash = db.StringField(required=True, unique=True)
    image_url = db.StringField()
    image = db.ImageField(fs=images,
                          basename=default_image_basename,
                          max_size=IMAGE_MAX_SIZE,
                          thumbnails=IMAGE_SIZES)
    datasets = db.ListField(
        db.ReferenceField('Dataset', reverse_delete_rule=db.PULL))
    tags = db.ListField(db.StringField())
    badges = db.ListField(db.EmbeddedDocumentField(ReuseBadge))

    private = db.BooleanField()
    owner = db.ReferenceField('User', reverse_delete_rule=db.NULLIFY)
    organization = db.ReferenceField('Organization',
                                     reverse_delete_rule=db.NULLIFY)

    ext = db.MapField(db.GenericEmbeddedDocumentField())
    extras = db.ExtrasField()

    featured = db.BooleanField()
    deleted = db.DateTimeField()

    def __str__(self):
        return self.title or ''

    __unicode__ = __str__

    meta = {
        'allow_inheritance': True,
        'indexes': ['-created_at', 'owner', 'urlhash'],
        'ordering': ['-created_at'],
        'queryset_class': ReuseQuerySet,
    }

    before_save = Signal()
    after_save = Signal()
    on_create = Signal()
    on_update = Signal()
    before_delete = Signal()
    after_delete = Signal()
    on_delete = Signal()

    verbose_name = _('reuse')

    @classmethod
    def pre_save(cls, sender, document, **kwargs):
        # Emit before_save
        cls.before_save.send(document)

    @classmethod
    def post_save(cls, sender, document, **kwargs):
        cls.after_save.send(document)
        if kwargs.get('created'):
            cls.on_create.send(document)
        else:
            cls.on_update.send(document)

    @property
    def display_url(self):
        return url_for('reuses.show', reuse=self)

    @property
    def external_url(self):
        return url_for('reuses.show', reuse=self, _external=True)

    @property
    def type_label(self):
        return REUSE_TYPES[self.type]

    def clean(self):
        '''Auto populate urlhash from url'''
        if not self.urlhash or 'url' in self._get_changed_fields():
            self.urlhash = hash_url(self.url)
        super(Reuse, self).clean()

    @classmethod
    def get(cls, id_or_slug):
        obj = cls.objects(slug=id_or_slug).first()
        return obj or cls.objects.get_or_404(id=id_or_slug)

    @classmethod
    def url_exists(cls, url):
        urlhash = hash_url(url)
        return cls.objects(urlhash=urlhash).count() > 0
Пример #25
0
class DateRangeTester(db.Document):
    temporal = db.EmbeddedDocumentField(db.DateRange)
Пример #26
0
class Nested(db.EmbeddedDocument):
    id = db.AutoUUIDField()
    name = db.StringField()
    sub = db.EmbeddedDocumentField(SubNested)
Пример #27
0
class Fake(db.Document):
    name = db.StringField()
    nested = db.ListField(db.EmbeddedDocumentField(Nested))
Пример #28
0
class Site(WithMetrics, db.Document):
    id = db.StringField(primary_key=True)
    title = db.StringField(required=True)
    keywords = db.ListField(db.StringField())
    feed_size = db.IntField(required=True, default=DEFAULT_FEED_SIZE)
    configs = db.DictField()
    themes = db.DictField()
    settings = db.EmbeddedDocumentField(SiteSettings, default=SiteSettings)

    __metrics_keys__ = [
        'max_dataset_followers',
        'max_dataset_reuses',
        'max_reuse_datasets',
        'max_reuse_followers',
        'max_org_followers',
        'max_org_reuses',
        'max_org_datasets',
        'datasets',
        'discussions',
        'followers',
        'organizations',
        'public-service',
        'resources',
        'reuses',
        'users',
    ]

    def __str__(self):
        return self.title or ''

    def count_users(self):
        from udata.models import User
        self.metrics['users'] = User.objects(confirmed_at__ne=None,
                                             deleted=None).count()
        self.save()

    def count_org(self):
        from udata.models import Organization
        self.metrics['organizations'] = Organization.objects.visible().count()
        self.save()

    def count_org_for_badge(self, badge_kind):
        from udata.models import Organization
        self.metrics[badge_kind] = Organization.objects(
            badges__kind=badge_kind).count()
        self.save()

    def count_datasets(self):
        from udata.models import Dataset
        self.metrics['datasets'] = Dataset.objects.visible().count()
        self.save()

    def count_resources(self):
        self.metrics['resources'] = next(
            Dataset.objects.visible().aggregate(
                {'$project': {
                    'resources': 1
                }}, {'$unwind': '$resources'},
                {'$group': {
                    '_id': 'result',
                    'count': {
                        '$sum': 1
                    }
                }}), {}).get('count', 0)
        self.save()

    def count_reuses(self):
        self.metrics['reuses'] = Reuse.objects.visible().count()
        self.save()

    def count_followers(self):
        from udata.models import Follow
        self.metrics['followers'] = Follow.objects(until=None).count()
        self.save()

    def count_discussions(self):
        from udata.models import Discussion
        self.metrics['discussions'] = Discussion.objects.count()
        self.save()

    def count_max_dataset_followers(self):
        dataset = (Dataset.objects(metrics__followers__gt=0).visible().
                   order_by('-metrics.followers').first())
        self.metrics['max_dataset_followers'] = dataset.metrics[
            'followers'] if dataset else 0
        self.save()

    def count_max_dataset_reuses(self):
        dataset = (Dataset.objects(metrics__reuses__gt=0).visible().order_by(
            '-metrics.reuses').first())
        self.metrics[
            'max_dataset_reuses'] = dataset.metrics['reuses'] if dataset else 0
        self.save()

    def count_max_reuse_datasets(self):
        reuse = (Reuse.objects(metrics__datasets__gt=0).visible().order_by(
            '-metrics.datasets').first())
        self.metrics[
            'max_reuse_datasets'] = reuse.metrics['datasets'] if reuse else 0
        self.save()

    def count_max_reuse_followers(self):
        reuse = (Reuse.objects(metrics__followers__gt=0).visible().order_by(
            '-metrics.followers').first())
        self.metrics[
            'max_reuse_followers'] = reuse.metrics['followers'] if reuse else 0
        self.save()

    def count_max_org_followers(self):
        org = (Organization.objects(metrics__followers__gt=0).visible().
               order_by('-metrics.followers').first())
        self.metrics[
            'max_org_followers'] = org.metrics['followers'] if org else 0
        self.save()

    def count_max_org_reuses(self):
        org = (Organization.objects(metrics__reuses__gt=0).visible().order_by(
            '-metrics.reuses').first())
        self.metrics['max_org_reuses'] = org.metrics['reuses'] if org else 0
        self.save()

    def count_max_org_datasets(self):
        org = (Organization.objects(metrics__datasets__gt=0).visible().
               order_by('-metrics.datasets').first())
        self.metrics[
            'max_org_datasets'] = org.metrics['datasets'] if org else 0
        self.save()
Пример #29
0
 class Fake(db.Document):
     spatial = db.EmbeddedDocumentField(SpatialCoverage)
Пример #30
0
class Organization(WithMetrics, BadgeMixin, db.Datetimed, db.Document):
    name = db.StringField(required=True)
    acronym = db.StringField(max_length=128)
    slug = db.SlugField(max_length=255,
                        required=True,
                        populate_from='name',
                        update=True,
                        follow=True)
    description = db.StringField(required=True)
    url = db.StringField()
    image_url = db.StringField()
    logo = db.ImageField(fs=avatars,
                         basename=default_image_basename,
                         max_size=LOGO_MAX_SIZE,
                         thumbnails=LOGO_SIZES)

    members = db.ListField(db.EmbeddedDocumentField(Member))
    teams = db.ListField(db.EmbeddedDocumentField(Team))
    requests = db.ListField(db.EmbeddedDocumentField(MembershipRequest))

    ext = db.MapField(db.GenericEmbeddedDocumentField())
    zone = db.StringField()
    extras = db.ExtrasField()

    deleted = db.DateTimeField()

    meta = {
        'indexes': ['-created_at', 'slug'],
        'ordering': ['-created_at'],
        'queryset_class': OrganizationQuerySet,
    }

    def __str__(self):
        return self.name or ''

    __badges__ = {
        PUBLIC_SERVICE: _('Public Service'),
        CERTIFIED: _('Certified'),
    }

    __search_metrics__ = Object(
        properties={
            'datasets': Integer(),
            'reuses': Integer(),
            'followers': Integer(),
            'views': Integer(),
        })

    __metrics_keys__ = [
        'datasets',
        'members',
        'reuses',
        'followers',
        'views',
    ]

    before_save = Signal()
    after_save = Signal()
    on_create = Signal()
    on_update = Signal()
    before_delete = Signal()
    after_delete = Signal()

    @classmethod
    def pre_save(cls, sender, document, **kwargs):
        cls.before_save.send(document)

    @classmethod
    def post_save(cls, sender, document, **kwargs):
        cls.after_save.send(document)
        if kwargs.get('created'):
            cls.on_create.send(document)
        else:
            cls.on_update.send(document)

    def url_for(self, *args, **kwargs):
        return url_for('organizations.show', org=self, *args, **kwargs)

    display_url = property(url_for)

    @property
    def external_url(self):
        return self.url_for(_external=True)

    @property
    def pending_requests(self):
        return [r for r in self.requests if r.status == 'pending']

    @property
    def refused_requests(self):
        return [r for r in self.requests if r.status == 'refused']

    @property
    def accepted_requests(self):
        return [r for r in self.requests if r.status == 'accepted']

    @property
    def certified(self):
        return any(b.kind == CERTIFIED for b in self.badges)

    @property
    def public_service(self):
        is_public_service = any(b.kind == PUBLIC_SERVICE for b in self.badges)
        return self.certified and is_public_service

    def member(self, user):
        for member in self.members:
            if member.user == user:
                return member
        return None

    def is_member(self, user):
        return self.member(user) is not None

    def is_admin(self, user):
        member = self.member(user)
        return member is not None and member.role == 'admin'

    def pending_request(self, user):
        for request in self.requests:
            if request.user == user and request.status == 'pending':
                return request
        return None

    @classmethod
    def get(cls, id_or_slug):
        obj = cls.objects(slug=id_or_slug).first()
        return obj or cls.objects.get_or_404(id=id_or_slug)

    def by_role(self, role):
        return filter(lambda m: m.role == role, self.members)

    def check_availability(self):
        from udata.models import Dataset  # Circular imports.
        # Performances: only check the first 20 datasets for now.
        return chain(*[
            dataset.check_availability()
            for dataset in Dataset.objects(organization=self).visible()[:20]
        ])

    @cached_property
    def json_ld(self):
        type_ = 'GovernmentOrganization' if self.public_service \
                else 'Organization'

        result = {
            '@context': 'http://schema.org',
            '@type': type_,
            '@id': str(self.id),
            'alternateName': self.slug,
            'url': url_for('organizations.show', org=self, _external=True),
            'name': self.name,
            'dateCreated': self.created_at.isoformat(),
            'dateModified': self.last_modified.isoformat()
        }

        if self.description:
            result['description'] = mdstrip(self.description)

        logo = self.logo(external=True)
        if logo:
            result['logo'] = logo

        return result

    @property
    def views_count(self):
        return self.metrics.get('views', 0)

    def count_members(self):
        self.metrics['members'] = len(self.members)
        self.save()

    def count_datasets(self):
        from udata.models import Dataset
        self.metrics['datasets'] = Dataset.objects(
            organization=self).visible().count()
        self.save()

    def count_reuses(self):
        from udata.models import Reuse
        self.metrics['reuses'] = Reuse.objects(organization=self).count()
        self.save()

    def count_followers(self):
        from udata.models import Follow
        self.metrics['followers'] = Follow.objects(
            until=None).followers(self).count()
        self.save()