def serialize(cls, reuse): """By default use the ``to_dict`` method and exclude ``_id``, ``_cls`` and ``owner`` fields. """ datasets = Dataset.objects(id__in=[r.id for r in reuse.datasets]) datasets = list(datasets.only('id', 'title').no_dereference()) organization = None owner = None if reuse.organization: organization = Organization.objects( id=reuse.organization.id).first() elif reuse.owner: owner = User.objects(id=reuse.owner.id).first() return { 'title': reuse.title, 'description': reuse.description, 'url': reuse.url, 'organization': str(organization.id) if organization else None, 'owner': str(owner.id) if owner else None, 'type': reuse.type, 'topic': reuse.topic, 'tags': reuse.tags, 'tag_suggest': reuse.tags, 'badges': [badge.kind for badge in reuse.badges], 'created': to_iso_datetime(reuse.created_at), 'last_modified': to_iso_datetime(reuse.last_modified), 'dataset': [{ 'id': str(d.id), 'title': d.title } for d in datasets], 'metrics': reuse.metrics, 'featured': reuse.featured, 'extras': reuse.extras, 'reuse_suggest': { 'input': cls.completer_tokenize(reuse.title) + [reuse.id], 'output': str(reuse.id), 'payload': { 'title': reuse.title, 'slug': reuse.slug, 'image_url': reuse.image(500, external=True), }, }, }
def serialize(cls, organization): extras = {} for key, value in organization.extras.items(): extras[key] = to_iso_datetime(value) if isinstance( value, datetime.datetime) else value return { 'id': str(organization.id), 'name': organization.name, 'acronym': organization.acronym if organization.acronym else None, 'description': organization.description, 'url': organization.url, 'badges': [badge.kind for badge in organization.badges], 'created_at': to_iso_datetime(organization.created_at), 'orga_sp': 1 if organization.public_service else 0, 'followers': organization.metrics.get('followers', 0), 'datasets': organization.metrics.get('datasets', 0), 'reuses': organization.metrics.get('reuses', 0), 'views': organization.metrics.get('views', 0), 'extras': extras }
def serialize_resource_for_event(resource): resource_dict = { 'id': str(resource.id), 'url': resource.url, 'format': resource.format, 'title': resource.title, 'schema': resource.schema, 'description': resource.description, 'filetype': resource.filetype, 'type': resource.type, 'mime': resource.mime, 'filesize': resource.filesize, 'checksum_type': resource.checksum.type if resource.checksum else None, 'checksum_value': resource.checksum.value if resource.checksum else None, 'created_at': to_iso_datetime(resource.created_at), 'modified': to_iso_datetime(resource.modified), 'published': to_iso_datetime(resource.published) } extras = {} for key, value in resource.extras.items(): extras[key] = to_iso_datetime(value) if isinstance(value, datetime.datetime) else value resource_dict.update({'extras': extras}) return resource_dict
def serialize(cls, user): return { 'first_name': user.first_name, 'last_name': user.last_name, 'about': user.about, 'organizations': [str(o.id) for o in user.organizations], 'metrics': user.metrics, 'created': to_iso_datetime(user.created_at), 'user_suggest': { 'input': cls.completer_tokenize(user.fullname) + [user.id], 'output': str(user.id), 'payload': { 'avatar_url': user.avatar(40, external=True), 'first_name': user.first_name, 'last_name': user.last_name, 'slug': user.slug, }, }, 'visible': user.visible, }
def serialize(cls, organization): completions = cls.completer_tokenize(organization.name) completions.append(organization.id) if organization.acronym: completions.append(organization.acronym) return { 'name': organization.name, 'acronym': organization.acronym, 'description': organization.description, 'url': organization.url, 'metrics': organization.metrics, 'badges': [badge.kind for badge in organization.badges], 'created': to_iso_datetime(organization.created_at), 'org_suggest': { 'input': completions, 'output': str(organization.id), 'payload': { 'name': organization.name, 'acronym': organization.acronym, 'image_url': organization.logo(40, external=True), 'slug': organization.slug, }, } }
def test_to_iso_datetime_with_datetime(self): self.assertEqual(to_iso_datetime(datetime(1984, 2, 29, 1, 2, 3)), '1984-02-29T01:02:03')
def test_to_iso_datetime_emtpy(self): self.assertEqual(to_iso_datetime(None), None)
def serialize(cls, dataset): organization = None owner = None if dataset.organization: org = Organization.objects(id=dataset.organization.id).first() organization = { 'id': str(org.id), 'name': org.name, 'public_service': 1 if org.public_service else 0, 'followers': org.metrics.get('followers', 0) } elif dataset.owner: owner = User.objects(id=dataset.owner.id).first() document = { 'id': str(dataset.id), 'title': dataset.title, 'description': dataset.description, 'acronym': dataset.acronym or None, 'url': dataset.display_url, 'tags': dataset.tags, 'license': getattr(dataset.license, 'id', None), 'badges': [badge.kind for badge in dataset.badges], 'frequency': dataset.frequency, 'created_at': to_iso_datetime(dataset.created_at), 'views': dataset.metrics.get('views', 0), 'followers': dataset.metrics.get('followers', 0), 'reuses': dataset.metrics.get('reuses', 0), 'featured': 1 if dataset.featured else 0, 'resources_count': len(dataset.resources), 'organization': organization, 'owner': str(owner.id) if owner else None, 'format': [r.format.lower() for r in dataset.resources if r.format], 'schema': [r.schema.get('name') for r in dataset.resources if r.schema] } extras = {} for key, value in dataset.extras.items(): extras[key] = to_iso_datetime(value) if isinstance( value, datetime.datetime) else value document.update({'extras': extras}) if (dataset.temporal_coverage is not None and dataset.temporal_coverage.start and dataset.temporal_coverage.end): start = to_iso_datetime(dataset.temporal_coverage.start) end = to_iso_datetime(dataset.temporal_coverage.end) document.update({ 'temporal_coverage_start': start, 'temporal_coverage_end': end, }) if dataset.spatial is not None: # Index precise zone labels and parents zone identifiers # to allow fast filtering. zone_ids = [z.id for z in dataset.spatial.zones] zones = GeoZone.objects(id__in=zone_ids).exclude('geom') parents = set() geozones = [] coverage_level = ADMIN_LEVEL_MAX for zone in zones: geozones.append({ 'id': zone.id, 'name': zone.name, 'keys': zone.keys_values }) parents |= set(zone.parents) coverage_level = min(coverage_level, admin_levels[zone.level]) geozones.extend([{'id': p} for p in parents]) document.update({ 'geozones': geozones, 'granularity': dataset.spatial.granularity, }) return document
def test_to_iso_datetime_with_date(self): assert to_iso_datetime(date(1984, 2, 29)) == '1984-02-29T00:00:00'
def test_to_iso_datetime_emtpy(self): assert to_iso_datetime(None) is None
def test_to_iso_datetime_before_1900(self): assert to_iso_datetime(date(1884, 2, 29)) == '1884-02-29T00:00:00'
def test_to_iso_datetime_with_datetime(self): result = to_iso_datetime(datetime(1984, 2, 29, 1, 2, 3)) assert result == '1984-02-29T01:02:03'
def test_to_iso_datetime_with_date(self): self.assertEqual(to_iso_datetime(date(1984, 2, 29)), '1984-02-29T00:00:00')
def test_to_iso_datetime_before_1900(self): self.assertEqual(to_iso_datetime(date(1884, 2, 29)), '1884-02-29T00:00:00')
def serialize(cls, dataset): organization = None owner = None image_url = None spatial_weight = DEFAULT_SPATIAL_WEIGHT temporal_weight = DEFAULT_TEMPORAL_WEIGHT if dataset.organization: organization = Organization.objects( id=dataset.organization.id).first() image_url = organization.logo(40, external=True) elif dataset.owner: owner = User.objects(id=dataset.owner.id).first() image_url = owner.avatar(40, external=True) certified = organization and organization.certified document = { 'title': dataset.title, 'description': dataset.description, 'license': getattr(dataset.license, 'id', None), 'tags': dataset.tags, 'badges': [badge.kind for badge in dataset.badges], 'tag_suggest': dataset.tags, 'resources': [{ 'title': r.title, 'description': r.description, 'format': r.format, 'type': r.type, 'schema': r.schema, } for r in dataset.resources], 'format_suggest': [r.format.lower() for r in dataset.resources if r.format], 'mime_suggest': [], # Need a custom loop below 'frequency': dataset.frequency, 'organization': str(organization.id) if organization else None, 'owner': str(owner.id) if owner else None, 'dataset_suggest': { 'input': cls.completer_tokenize(dataset.title) + [str(dataset.id)], 'output': dataset.title, 'payload': { 'id': str(dataset.id), 'slug': dataset.slug, 'acronym': dataset.acronym, 'image_url': image_url, }, }, 'created': to_iso_datetime(dataset.created_at), 'last_modified': to_iso_datetime(dataset.last_modified), 'metrics': dataset.metrics, 'featured': dataset.featured, 'from_certified': certified, } if (dataset.temporal_coverage is not None and dataset.temporal_coverage.start and dataset.temporal_coverage.end): start = dataset.temporal_coverage.start.toordinal() end = dataset.temporal_coverage.end.toordinal() temporal_weight = min( abs(end - start) / 365, cls.from_config('MAX_TEMPORAL_WEIGHT')) document.update({ 'temporal_coverage': { 'start': start, 'end': end }, 'temporal_weight': temporal_weight, }) if dataset.spatial is not None: # Index precise zone labels and parents zone identifiers # to allow fast filtering. zone_ids = [z.id for z in dataset.spatial.zones] zones = GeoZone.objects(id__in=zone_ids).exclude('geom') parents = set() geozones = [] coverage_level = ADMIN_LEVEL_MAX for zone in zones: geozones.append({ 'id': zone.id, 'name': zone.name, 'keys': zone.keys_values }) parents |= set(zone.parents) coverage_level = min(coverage_level, admin_levels[zone.level]) geozones.extend([{'id': p} for p in parents]) spatial_weight = ADMIN_LEVEL_MAX / coverage_level document.update({ 'geozones': geozones, 'granularity': dataset.spatial.granularity, 'spatial_weight': spatial_weight, }) document['dataset_suggest']['weight'] = cls.get_suggest_weight( temporal_weight, spatial_weight, dataset.featured) if dataset.acronym: document['dataset_suggest']['input'].append(dataset.acronym) # mime Completion mimes = {r.mime.lower() for r in dataset.resources if r.mime} for mime in mimes: document['mime_suggest'].append({ 'input': mime.replace('+', '/').split('/') + [mime], 'output': mime, }) return document
def test_to_iso_datetime_before_1000(self): assert to_iso_datetime(date(908, 2, 29)) == '0908-02-29T00:00:00'