def test_prioritize_title_over_alternate_title(self): title = faker.sentence() license = LicenseFactory(title=title) LicenseFactory(alternate_titles=[title]) found = License.guess(title) assert isinstance(found, License) assert license.id == found.id
def test_oembed_region_territory_api_get(self, api): '''It should fetch a region territory in the oembed format.''' paca, bdr, arles = create_geozones_fixtures() licence_ouverte = LicenseFactory(id='fr-lo', title='Licence Ouverte') LicenseFactory(id='notspecified', title='Not Specified') for territory_dataset_class in TERRITORY_DATASETS['region'].values(): organization = OrganizationFactory( id=territory_dataset_class.organization_id) territory = territory_dataset_class(paca) reference = 'territory-{id}'.format(id=territory.slug) response = api.get(url_for('api.oembeds', references=reference)) assert200(response) data = json.loads(response.data)[0] assert 'html' in data assert 'width' in data assert 'maxwidth' in data assert 'height' in data assert 'maxheight' in data assert data['type'] == 'rich' assert data['version'] == '1.0' html = data['html'] assert territory.title in html assert escape(territory.url) in html assert 'alt="{name}"'.format(name=organization.name) in html assert md(territory.description, source_tooltip=True) in html assert 'Download from local.test' in html assert 'Add to your own website' in html if territory_dataset_class not in ( TERRITORY_DATASETS['region']['zonages_reg'], ): assert 'License: {0}'.format(licence_ouverte.title) in html assert '© {0}'.format(licence_ouverte.id) in html assert ( '<a data-tooltip="Source" href="http://local.test/datasets' in html)
def test_oembed_county_territory_api_get(self): '''It should fetch a county territory in the oembed format.''' midi_pyrenees = GeoZoneFactory(id='fr/region/73', level='fr/region', name='Midi-Pyrénées', code='73') aveyron = GeoZoneFactory(id='fr/county/12', level='fr/county', name='Aveyron', code='12', parents=[midi_pyrenees.id]) licence_ouverte = LicenseFactory(id='fr-lo', title='Licence Ouverte') LicenseFactory(id='notspecified', title='Not Specified') for territory_dataset_class in TERRITORY_DATASETS['county'].values(): organization = OrganizationFactory( id=territory_dataset_class.organization_id) territory = territory_dataset_class(aveyron) reference = 'territory-{id}'.format(id=territory.slug) response = self.get(url_for('api.oembeds', references=reference)) self.assert200(response) data = json.loads(response.data)[0] self.assertIn('html', data) self.assertIn('width', data) self.assertIn('maxwidth', data) self.assertIn('height', data) self.assertIn('maxheight', data) self.assertTrue(data['type'], 'rich') self.assertTrue(data['version'], '1.0') self.assertIn(territory.title, data['html']) self.assertIn(cgi.escape(territory.url), data['html']) self.assertIn('alt="{name}"'.format(name=organization.name), data['html']) self.assertIn(md(territory.description, source_tooltip=True), data['html']) self.assertIn('Download from localhost', data['html']) self.assertIn('Add to your own website', data['html']) if territory_dataset_class not in ( TERRITORY_DATASETS['county']['comptes_c'], TERRITORY_DATASETS['county']['zonages_c']): self.assertIn( 'License: {title}'.format(title=licence_ouverte.title), data['html']) self.assertIn( '© {license_id}'.format(license_id=licence_ouverte.id), data['html']) self.assertIn( '<a data-tooltip="Source" href="http://localhost/datasets', data['html'])
def test_dataset_licenses_list(self): '''It should fetch the dataset licenses list from the API''' licenses = LicenseFactory.create_batch(4) response = self.get(url_for('api.licenses')) self.assert200(response) self.assertEqual(len(response.json), len(licenses))
def test_match_by_alternate_title_with_multiple_candidates_from_one_licence( self): license = LicenseFactory( alternate_titles=['Licence Ouverte v2', 'Licence Ouverte v2.0']) found = License.guess('Licence Ouverte v2.0') assert isinstance(found, License) assert license.id == found.id
def test_all_resource_fields(self): license = LicenseFactory() resource = ResourceFactory(format='csv') dataset = DatasetFactory(resources=[resource], license=license) permalink = url_for('datasets.resource', id=resource.id, _external=True) r = resource_to_rdf(resource, dataset) assert r.value(DCT.title) == Literal(resource.title) assert r.value(DCT.description) == Literal(resource.description) assert r.value(DCT.issued) == Literal(resource.published) assert r.value(DCT.modified) == Literal(resource.modified) assert r.value(DCT.license).identifier == URIRef(license.url) assert r.value(DCT.rights) == Literal(license.title) assert r.value(DCAT.downloadURL).identifier == URIRef(resource.url) assert r.value(DCAT.accessURL).identifier == URIRef(permalink) assert r.value(DCAT.bytesSize) == Literal(resource.filesize) assert r.value(DCAT.mediaType) == Literal(resource.mime) assert r.value(DCT.term('format')) == Literal(resource.format) checksum = r.value(SPDX.checksum) assert r.graph.value(checksum.identifier, RDF.type) == SPDX.Checksum assert (r.graph.value(checksum.identifier, SPDX.algorithm) == SPDX.checksumAlgorithm_sha1) assert (checksum.value(SPDX.checksumValue) == Literal(resource.checksum.value))
def test_xml_catalog(self, rmock): LicenseFactory(id='lov2', title='Licence Ouverte Version 2.0') url = mock_dcat(rmock, 'catalog.xml', path='catalog.xml') org = OrganizationFactory() source = HarvestSourceFactory(backend='dcat', url=url, organization=org) actions.run(source.slug) # test dct:license support extras = {'extras__dct:identifier': '3'} dataset = Dataset.objects.get(**extras) assert dataset.license.id == 'lov2' assert dataset.extras['remote_url'] == 'http://data.test.org/datasets/3' assert dataset.created_at.date() == date(2016, 12, 14) assert dataset.last_modified.date() == date(2016, 12, 14) assert dataset.frequency == 'daily' assert dataset.description == 'Dataset 3 description' extras = {'extras__dct:identifier': '1'} dataset = Dataset.objects.get(**extras) # test abstract description support assert dataset.description == 'Dataset 1 description'
def test_oembed_town_territory_api_get(self): '''It should fetch a town territory in the oembed format.''' arles = GeoZoneFactory(id='fr/town/13004', level='fr/town', name='Arles', code='13004', population=52439) licence_ouverte = LicenseFactory(id='fr-lo', title='Licence Ouverte') odbl_license = LicenseFactory(id='odc-odbl', title='ODbL') LicenseFactory(id='notspecified', title='Not Specified') town_datasets = TERRITORY_DATASETS['town'] for territory_dataset_class in town_datasets.values(): organization = OrganizationFactory( id=territory_dataset_class.organization_id) territory = territory_dataset_class(arles) reference = 'territory-{id}'.format(id=territory.slug) response = self.get(url_for('api.oembeds', references=reference)) self.assert200(response) data = json.loads(response.data)[0] self.assertIn('html', data) self.assertIn('width', data) self.assertIn('maxwidth', data) self.assertIn('height', data) self.assertIn('maxheight', data) self.assertTrue(data['type'], 'rich') self.assertTrue(data['version'], '1.0') self.assertIn(territory.title, data['html']) self.assertIn(cgi.escape(territory.url), data['html']) self.assertIn('alt="{name}"'.format(name=organization.name), data['html']) self.assertIn(md(territory.description, source_tooltip=True), data['html']) self.assertIn('Download from localhost', data['html']) self.assertIn('Add to your own website', data['html']) if territory_dataset_class not in (town_datasets['comptes_t'], ): if territory_dataset_class == town_datasets['ban_odbl_t']: license = odbl_license else: license = licence_ouverte self.assertIn('License: {title}'.format(title=license.title), data['html']) self.assertIn('© {license_id}'.format(license_id=license.id), data['html']) self.assertIn( '<a data-tooltip="Source" href="http://localhost/datasets', data['html'])
def test_oembed_town_territory_api_get(self, api): '''It should fetch a town territory in the oembed format.''' paca, bdr, arles = create_geozones_fixtures() licence_ouverte = LicenseFactory(id='fr-lo', title='Licence Ouverte') odbl_license = LicenseFactory(id='odc-odbl', title='ODbL') LicenseFactory(id='notspecified', title='Not Specified') town_datasets = TERRITORY_DATASETS['commune'] for territory_dataset_class in town_datasets.values(): organization = OrganizationFactory( id=territory_dataset_class.organization_id) territory = territory_dataset_class(arles) reference = 'territory-{id}'.format(id=territory.slug) response = api.get(url_for('api.oembeds', references=reference)) assert200(response) data = json.loads(response.data)[0] assert 'html' in data assert 'width' in data assert 'maxwidth' in data assert 'height' in data assert 'maxheight' in data assert data['type'] == 'rich' assert data['version'] == '1.0' html = data['html'] assert territory.title in html assert cgi.escape(territory.url) in html assert 'alt="{name}"'.format(name=organization.name) in html assert md(territory.description, source_tooltip=True) in html assert 'Download from localhost' in html assert 'Add to your own website' in html if territory_dataset_class not in (town_datasets['comptes_com'], ): if territory_dataset_class == town_datasets['ban_odbl_com']: license = odbl_license else: license = licence_ouverte assert 'License: {title}'.format(title=license.title) in html assert '© {license_id}'.format(license_id=license.id) in html assert ( '<a data-tooltip="Source" href="http://localhost/datasets' in html)
def test_oembed_region_territory_api_get(self): '''It should fetch a region territory in the oembed format.''' paca, bdr, arles = create_geozones_fixtures() licence_ouverte = LicenseFactory(id='fr-lo', title='Licence Ouverte') LicenseFactory(id='notspecified', title='Not Specified') for territory_dataset_class in TERRITORY_DATASETS['region'].values(): organization = OrganizationFactory( id=territory_dataset_class.organization_id) territory = territory_dataset_class(paca) reference = 'territory-{id}'.format(id=territory.slug) response = self.get(url_for('api.oembeds', references=reference)) self.assert200(response) data = json.loads(response.data)[0] self.assertIn('html', data) self.assertIn('width', data) self.assertIn('maxwidth', data) self.assertIn('height', data) self.assertIn('maxheight', data) self.assertTrue(data['type'], 'rich') self.assertTrue(data['version'], '1.0') self.assertIn(territory.title, data['html']) self.assertIn(cgi.escape(territory.url), data['html']) self.assertIn('alt="{name}"'.format(name=organization.name), data['html']) self.assertIn(md(territory.description, source_tooltip=True), data['html']) self.assertIn('Download from localhost', data['html']) self.assertIn('Add to your own website', data['html']) if territory_dataset_class not in ( TERRITORY_DATASETS['region']['comptes_reg'], TERRITORY_DATASETS['region']['zonages_reg']): self.assertIn( 'License: {title}'.format(title=licence_ouverte.title), data['html']) self.assertIn( '© {license_id}'.format(license_id=licence_ouverte.id), data['html']) self.assertIn( '<a data-tooltip="Source" href="http://localhost/datasets', data['html'])
def test_xml_catalog(self, rmock): LicenseFactory(id='lov2', title='Licence Ouverte Version 2.0') url = mock_dcat(rmock, 'catalog.xml', path='catalog.xml') org = OrganizationFactory() source = HarvestSourceFactory(backend='dcat', url=url, organization=org) actions.run(source.slug) # test dct:license support extras = {'extras__dct:identifier': '3'} dataset = Dataset.objects.get(**extras) assert dataset.license.id == 'lov2'
def test_sigoreme_xml_catalog(self, rmock): LicenseFactory(id='fr-lo', title='Licence ouverte / Open Licence') url = mock_dcat(rmock, 'sig.oreme.rdf') org = OrganizationFactory() source = HarvestSourceFactory(backend='dcat', url=url, organization=org) actions.run(source.slug) dataset = Dataset.objects.filter(organization=org).first() assert dataset is not None assert dataset.frequency == 'irregular' assert 'gravi' in dataset.tags # support dcat:keyword assert 'geodesy' in dataset.tags # support dcat:theme assert dataset.license.id == 'fr-lo' assert len(dataset.resources) == 1 assert dataset.description.startswith('Data from the \'National network')
def test_match_license_from_rights_uri(self): license = LicenseFactory() node = BNode() g = Graph() g.set((node, RDF.type, DCAT.Dataset)) g.set((node, DCT.title, Literal(faker.sentence()))) rnode = BNode() g.set((rnode, RDF.type, DCAT.Distribution)) g.set((rnode, DCAT.downloadURL, URIRef(faker.uri()))) g.set((rnode, DCT.rights, URIRef(license.url))) g.add((node, DCAT.distribution, rnode)) dataset = dataset_from_rdf(g) assert isinstance(dataset.license, License) assert dataset.license == license
def test_match_license_from_license_title(self): license = LicenseFactory() node = BNode() g = Graph() g.set((node, RDF.type, DCAT.Dataset)) g.set((node, DCT.title, Literal(faker.sentence()))) rnode = BNode() g.set((rnode, RDF.type, DCAT.Distribution)) g.set((rnode, DCAT.downloadURL, URIRef(faker.uri()))) g.set((rnode, DCT.license, Literal(license.title))) g.add((node, DCAT.distribution, rnode)) dataset = dataset_from_rdf(g) dataset.validate() self.assertIsInstance(dataset.license, License) self.assertEqual(dataset.license, license)
def test_json_ld(self): '''It should render a json-ld markup into the dataset page''' resource = ResourceFactory(format='png', description='* Title 1\n* Title 2', metrics={'views': 10}) license = LicenseFactory(url='http://www.datagouv.fr/licence') dataset = DatasetFactory(license=license, tags=['foo', 'bar'], resources=[resource], description='a&éèëù$£', owner=UserFactory(), extras={'foo': 'bar'}) community_resource = CommunityResourceFactory( dataset=dataset, format='csv', description='* Title 1\n* Title 2', metrics={'views': 42}) url = url_for('datasets.show', dataset=dataset) response = self.get(url) self.assert200(response) json_ld = self.get_json_ld(response) self.assertEquals(json_ld['@context'], 'http://schema.org') self.assertEquals(json_ld['@type'], 'Dataset') self.assertEquals(json_ld['@id'], str(dataset.id)) self.assertEquals(json_ld['description'], 'a&éèëù$£') self.assertEquals(json_ld['alternateName'], dataset.slug) self.assertEquals(json_ld['dateCreated'][:16], dataset.created_at.isoformat()[:16]) self.assertEquals(json_ld['dateModified'][:16], dataset.last_modified.isoformat()[:16]) self.assertEquals(json_ld['url'], 'http://localhost{}'.format(url)) self.assertEquals(json_ld['name'], dataset.title) self.assertEquals(json_ld['keywords'], 'bar,foo') self.assertEquals(len(json_ld['distribution']), 1) json_ld_resource = json_ld['distribution'][0] self.assertEquals(json_ld_resource['@type'], 'DataDownload') self.assertEquals(json_ld_resource['@id'], str(resource.id)) self.assertEquals(json_ld_resource['url'], resource.latest) self.assertEquals(json_ld_resource['name'], resource.title) self.assertEquals(json_ld_resource['contentUrl'], resource.url) self.assertEquals(json_ld_resource['dateCreated'][:16], resource.created_at.isoformat()[:16]) self.assertEquals(json_ld_resource['dateModified'][:16], resource.modified.isoformat()[:16]) self.assertEquals(json_ld_resource['datePublished'][:16], resource.published.isoformat()[:16]) self.assertEquals(json_ld_resource['encodingFormat'], 'png') self.assertEquals(json_ld_resource['contentSize'], resource.filesize) self.assertEquals(json_ld_resource['fileFormat'], resource.mime) self.assertEquals(json_ld_resource['description'], 'Title 1 Title 2') self.assertEquals(json_ld_resource['interactionStatistic'], { '@type': 'InteractionCounter', 'interactionType': { '@type': 'DownloadAction', }, 'userInteractionCount': 10, }) self.assertEquals(len(json_ld['contributedDistribution']), 1) json_ld_resource = json_ld['contributedDistribution'][0] self.assertEquals(json_ld_resource['@type'], 'DataDownload') self.assertEquals(json_ld_resource['@id'], str(community_resource.id)) self.assertEquals(json_ld_resource['url'], community_resource.latest) self.assertEquals(json_ld_resource['name'], community_resource.title) self.assertEquals(json_ld_resource['contentUrl'], community_resource.url) self.assertEquals(json_ld_resource['dateCreated'][:16], community_resource.created_at.isoformat()[:16]) self.assertEquals(json_ld_resource['dateModified'][:16], community_resource.modified.isoformat()[:16]) self.assertEquals(json_ld_resource['datePublished'][:16], community_resource.published.isoformat()[:16]) self.assertEquals(json_ld_resource['encodingFormat'], community_resource.format) self.assertEquals(json_ld_resource['contentSize'], community_resource.filesize) self.assertEquals(json_ld_resource['fileFormat'], community_resource.mime) self.assertEquals(json_ld_resource['description'], 'Title 1 Title 2') self.assertEquals(json_ld_resource['interactionStatistic'], { '@type': 'InteractionCounter', 'interactionType': { '@type': 'DownloadAction', }, 'userInteractionCount': 42, }) self.assertEquals(json_ld['extras'], [{ '@type': 'http://schema.org/PropertyValue', 'name': 'foo', 'value': 'bar', }]) self.assertEquals(json_ld['license'], 'http://www.datagouv.fr/licence') self.assertEquals(json_ld['author']['@type'], 'Person')
def test_multiple_strings(self): license = LicenseFactory() found = License.guess('should not match', license.id) self.assertIsInstance(found, License) self.assertEqual(license.id, found.id)
def test_match_by_title_with_extra_inner_space(self): license = LicenseFactory(title='License ODBl') found = License.guess('License ODBl') # 2 spaces instead of 1 self.assertIsInstance(found, License) self.assertEqual(license.id, found.id)
def test_match_by_title_with_mismatching_case(self): license = LicenseFactory(title='License ODBl') found = License.guess('License ODBL') self.assertIsInstance(found, License) self.assertEqual(license.id, found.id)
def test_match_by_alternate_title_with_extra_inner_space(self): license = LicenseFactory(alternate_titles=['License ODBl']) found = License.guess('License ODBl') # 2 spaces instead of 1 assert isinstance(found, License) assert license.id == found.id
def test_match_by_title_with_low_edit_distance(self): license = LicenseFactory(title='License') found = License.guess('Licence') self.assertIsInstance(found, License) self.assertEqual(license.id, found.id)
def test_not_found_with_default(self): license = LicenseFactory() found = License.guess('should not be found', default=license) self.assertEqual(found.id, license.id)
def test_exact_match_by_title(self): license = LicenseFactory() found = License.guess(license.title) self.assertIsInstance(found, License) self.assertEqual(license.id, found.id)
def generate_licenses(count): for _ in range(0, count): LicenseFactory()
def setUp(self): # Feed the DB with random data to ensure true matching LicenseFactory.create_batch(3)
def test_multiple_strings(self): license = LicenseFactory() found = License.guess('should not match', license.id) assert isinstance(found, License) assert license.id == found.id
def test_exact_match_by_title_with_spaces(self): license = LicenseFactory() found = License.guess(' {0} '.format(license.title)) assert isinstance(found, License) assert license.id == found.id
def test_exact_match_by_alternate_title_with_spaces(self): alternate_title = faker.sentence() license = LicenseFactory(alternate_titles=[alternate_title]) found = License.guess(' {0} '.format(alternate_title)) assert isinstance(found, License) assert license.id == found.id
def setUp(self): # Feed the DB with random data to ensure true matching LicenseFactory.create_batch(3)
def test_exact_match_by_title_with_spaces(self): license = LicenseFactory() found = License.guess(' {0} '.format(license.title)) self.assertIsInstance(found, License) self.assertEqual(license.id, found.id)
def test_match_by_alternate_title_with_mismatching_case(self): license = LicenseFactory(alternate_titles=['License ODBl']) found = License.guess('License ODBL') assert isinstance(found, License) assert license.id == found.id
def test_match_by_alternate_title_with_low_edit_distance(self): license = LicenseFactory(alternate_titles=['License']) found = License.guess('Licence') assert isinstance(found, License) assert license.id == found.id