def test_catalog_modified_date(self): dataset1 = factories.Dataset(title='First dataset') time.sleep(1) dataset2 = factories.Dataset(title='Second dataset') url = url_for('dcat_catalog', _format='ttl', modified_since=dataset2['metadata_modified']) app = self._get_test_app() response = app.get(url) content = response.body p = RDFParser() p.parse(content, _format='turtle') dcat_datasets = [d for d in p.datasets()] eq_(len(dcat_datasets), 1) eq_(dcat_datasets[0]['title'], dataset2['title'])
def test_catalog_fq_filter(self, app): dataset1 = factories.Dataset( title='First dataset', tags=[ {'name': 'economy'}, {'name': 'statistics'} ] ) dataset2 = factories.Dataset( title='Second dataset', tags=[{'name': 'economy'}] ) dataset3 = factories.Dataset( title='Third dataset', tags=[{'name': 'statistics'}] ) url = url_for('dcat.read_catalog', _format='ttl', fq='tags:economy') response = app.get(url) content = response.body p = RDFParser() p.parse(content, _format='turtle') dcat_datasets = [d for d in p.datasets()] assert len(dcat_datasets) == 2 assert dcat_datasets[0]['title'] in [dataset1['title'], dataset2['title']] assert dcat_datasets[1]['title'] in [dataset1['title'], dataset2['title']]
def _create_datasets(self): """ create the required datasets to test """ log.info('Creating datasets for testing collections') # create just one time if hasattr(self, 'org1'): return reset_db # TODO it's seems not working self.org1 = ckan_factories.Organization() log.info('Org1 created {}'.format(self.org1['id'])) self.org2 = ckan_factories.Organization() log.info('Org2 created {}'.format(self.org2['id'])) self.group1 = ckan_factories.Group() log.info('Group1 created {}'.format(self.group1['id'])) self.group2 = ckan_factories.Group() log.info('Group2 created {}'.format(self.group2['id'])) self.parent = ckan_factories.Dataset(owner_org=self.org1['id'], extras=[{'key': 'collection_metadata', 'value': 'true'}], title='The Father test_collections_unique', groups=[{'name': self.group1['name']}, {'name': self.group2['name']}]) log.info('Parent created {}'.format(self.parent['id'])) self.child1 = ckan_factories.Dataset(owner_org=self.org1['id'], extras=[{'key': 'collection_package_id', 'value': self.parent['id']}], title='The Child 2 test_collections_unique', groups=[{'name': self.group1['name']}]) log.info('Child 1 created {}'.format(self.child1['id'])) self.child2 = ckan_factories.Dataset(owner_org=self.org1['id'], extras=[{'key': 'collection_package_id', 'value': self.parent['id']}], title='The Child 2 test_collections_unique', groups=[{'name': self.group2['name']}]) log.info('Child 2 created {}'.format(self.child2['id']))
def test_update_dataset_but_with_same_name(self): # this can happen if you remove a trailing space from the title - the # harvester sees the title changed and thinks it should have a new # name, but clearly it can reuse its existing one factories.Dataset(name='trees') factories.Dataset(name='trees1') assert _ensure_name_is_unique('trees', existing_name='trees') == 'trees'
def test_harvest_sources_job_history_clear(self): # prepare data_dict = SOURCE_DICT.copy() source_1 = factories.HarvestSourceObj(**data_dict) data_dict['name'] = 'another-source' data_dict['url'] = 'http://another-url' source_2 = factories.HarvestSourceObj(**data_dict) job_1 = factories.HarvestJobObj(source=source_1) dataset_1 = ckan_factories.Dataset() object_1_ = factories.HarvestObjectObj(job=job_1, source=source_1, package_id=dataset_1['id']) job_2 = factories.HarvestJobObj(source=source_2) dataset_2 = ckan_factories.Dataset() object_2_ = factories.HarvestObjectObj(job=job_2, source=source_2, package_id=dataset_2['id']) # execute context = { 'model': model, 'session': model.Session, 'ignore_auth': True, 'user': '' } result = toolkit.get_action('harvest_sources_job_history_clear')( context, {}) # verify assert_equal(sorted(result), sorted([{ 'id': source_1.id }, { 'id': source_2.id }])) source_1 = harvest_model.HarvestSource.get(source_1.id) assert source_1 assert_equal(harvest_model.HarvestJob.get(job_1.id), None) assert_equal(harvest_model.HarvestObject.get(object_1_.id), None) dataset_from_db_1 = model.Package.get(dataset_1['id']) assert dataset_from_db_1, 'is None' assert_equal(dataset_from_db_1.id, dataset_1['id']) source_2 = harvest_model.HarvestSource.get(source_1.id) assert source_2 assert_equal(harvest_model.HarvestJob.get(job_2.id), None) assert_equal(harvest_model.HarvestObject.get(object_2_.id), None) dataset_from_db_2 = model.Package.get(dataset_2['id']) assert dataset_from_db_2, 'is None' assert_equal(dataset_from_db_2.id, dataset_2['id'])
def test_catalog_q_search(self, app): dataset1 = factories.Dataset(title='First dataset') factories.Dataset(title='Second dataset') url = url_for('dcat.read_catalog', _format='ttl', q='First') response = app.get(url) content = response.body p = RDFParser() p.parse(content, _format='turtle') dcat_datasets = [d for d in p.datasets()] assert len(dcat_datasets) == 1 assert dcat_datasets[0]['title'] == dataset1['title']
def test_dataset_ttl(self, app): dataset = factories.Dataset( notes='Test dataset' ) url = url_for('dcat.read_dataset', _id=dataset['name'], _format='ttl') response = app.get(url) assert response.headers['Content-Type'] == 'text/turtle' content = response.body # Parse the contents to check it's an actual serialization p = RDFParser() p.parse(content, _format='turtle') dcat_datasets = [d for d in p.datasets()] assert len(dcat_datasets) == 1 dcat_dataset = dcat_datasets[0] assert dcat_dataset['title'] == dataset['title'] assert dcat_dataset['notes'] == dataset['notes']
def test_catalog_pagination_parameters(self, app): for i in range(12): factories.Dataset() url = url_for('dcat.read_catalog', _format='rdf', modified_since='2018-03-22', extra_param='test') response = app.get(url) content = response.body g = Graph() g.parse(data=content, format='xml') pagination = [o for o in g.subjects(RDF.type, HYDRA.PagedCollection)][0] assert self._object_value(g, pagination, HYDRA.itemsPerPage) == '10' assert (_sort_query_params( self._object_value(g, pagination, HYDRA.firstPage)) == _sort_query_params( url_for('dcat.read_catalog', _format='rdf', page=1, _external=True, modified_since='2018-03-22')))
def test_harvest_source_job_history_clear(self): # prepare source = factories.HarvestSourceObj(**SOURCE_DICT.copy()) job = factories.HarvestJobObj(source=source) dataset = ckan_factories.Dataset() object_ = factories.HarvestObjectObj(job=job, source=source, package_id=dataset['id']) # execute context = { 'model': model, 'session': model.Session, 'ignore_auth': True, 'user': '' } result = toolkit.get_action('harvest_source_job_history_clear')( context, { 'id': source.id }) # verify assert_equal(result, {'id': source.id}) source = harvest_model.HarvestSource.get(source.id) assert source assert_equal(harvest_model.HarvestJob.get(job.id), None) assert_equal(harvest_model.HarvestObject.get(object_.id), None) dataset_from_db = model.Package.get(dataset['id']) assert dataset_from_db, 'is None' assert_equal(dataset_from_db.id, dataset['id'])
def test_dataset_ttl(self): dataset = factories.Dataset( notes='Test dataset' ) url = url_for('dcat_dataset', _id=dataset['name'], _format='ttl') app = self._get_test_app() response = app.get(url) eq_(response.headers['Content-Type'], 'text/turtle') content = response.body # Parse the contents to check it's an actual serialization p = RDFParser() p.parse(content, _format='turtle') dcat_datasets = [d for d in p.datasets()] eq_(len(dcat_datasets), 1) dcat_dataset = dcat_datasets[0] eq_(dcat_dataset['title'], dataset['title']) eq_(dcat_dataset['notes'], dataset['notes'])
def test_catalog_pagination(self): for i in xrange(12): factories.Dataset() app = self._get_test_app() url = url_for('dcat_catalog', _format='rdf') response = app.get(url) content = response.body g = Graph() g.parse(data=content, format='xml') eq_(len([d for d in g.subjects(RDF.type, DCAT.Dataset)]), 10) pagination = [o for o in g.subjects(RDF.type, HYDRA.PagedCollection)][0] eq_(self._object_value(g, pagination, HYDRA.totalItems), '12') eq_(self._object_value(g, pagination, HYDRA.itemsPerPage), '10') eq_(self._object_value(g, pagination, HYDRA.firstPage), url_for('dcat_catalog', _format='rdf', page=1, host='test.ckan.net')) eq_(self._object_value(g, pagination, HYDRA.nextPage), url_for('dcat_catalog', _format='rdf', page=2, host='test.ckan.net')) eq_(self._object_value(g, pagination, HYDRA.lastPage), url_for('dcat_catalog', _format='rdf', page=2, host='test.ckan.net'))
def test_catalog_pagination(self, app): for i in range(12): factories.Dataset() url = url_for('dcat.read_catalog', _format='rdf') response = app.get(url) content = response.body g = Graph() g.parse(data=content, format='xml') assert len([d for d in g.subjects(RDF.type, DCAT.Dataset)]) == 10 pagination = [o for o in g.subjects(RDF.type, HYDRA.PagedCollection)][0] assert self._object_value(g, pagination, HYDRA.totalItems) == '12' assert self._object_value(g, pagination, HYDRA.itemsPerPage) == '10' assert (_sort_query_params(self._object_value(g, pagination, HYDRA.firstPage)) == _sort_query_params(url_for('dcat.read_catalog', _format='rdf', page=1, _external=True))) assert (_sort_query_params(self._object_value(g, pagination, HYDRA.nextPage)) == _sort_query_params(url_for('dcat.read_catalog', _format='rdf', page=2, _external=True))) assert (_sort_query_params(self._object_value(g, pagination, HYDRA.lastPage)) == _sort_query_params(url_for('dcat.read_catalog', _format='rdf', page=2, _external=True)))
def test_catalog_pagination_parameters(self): for i in range(12): factories.Dataset() app = self._get_test_app() url = url_for('dcat_catalog', _format='rdf', modified_since='2018-03-22', extra_param='test') response = app.get(url) content = response.body g = Graph() g.parse(data=content, format='xml') pagination = [o for o in g.subjects(RDF.type, HYDRA.PagedCollection)][0] eq_(self._object_value(g, pagination, HYDRA.itemsPerPage), '10') eq_( self._object_value(g, pagination, HYDRA.firstPage), url_for('dcat_catalog', _format='rdf', page=1, host='test.ckan.net', modified_since='2018-03-22'))
def test_catalog_q_search(self): dataset1 = factories.Dataset(title='First dataset') dataset2 = factories.Dataset(title='Second dataset') url = url_for('dcat_catalog', _format='ttl', q='First') app = self._get_test_app() response = app.get(url) content = response.body p = RDFParser() p.parse(content, _format='turtle') dcat_datasets = [d for d in p.datasets()] eq_(len(dcat_datasets), 1) eq_(dcat_datasets[0]['title'], dataset1['title'])
def test_config_override(self): '''Tests if a parameter has precedence over a config value.''' factories.Dataset(name='trees') assert_equal( HarvesterBase._gen_new_name('Trees', append_type='number-sequence'), 'trees1')
def test_update_dataset_to_available_shorter_name(self): # this can be handy when if reharvesting, you got duplicates and # managed to purge one set and through a minor title change you can now # lose the appended number. users don't like unnecessary numbers. factories.Dataset(name='trees1') assert _ensure_name_is_unique('trees', existing_name='trees1') == 'trees'
def test_dataset_endpoint_disabled(self): p.unload('dcat') p.load('dcat') dataset = factories.Dataset(notes='Test dataset') # without the route, url_for returns the given parameters url = url_for('dcat_dataset', _id=dataset['name'], _format='xml') assert not url.startswith('/') assert url.startswith('dcat_dataset')
def test_structured_data_not_generated(self, app): dataset = factories.Dataset(notes='test description') url = url_for('dataset.read', id=dataset['name']) response = app.get(url) assert '<script type="application/ld+json">' not in response.body
def _create_packages_and_tracking(self): self.package = ckan_factories.Dataset() # add 12 visit to the dataset page url = url_for(controller='package', action='read',id=self.package['name']) app = self._get_test_app() for r in range(12): self._post_to_tracking(url=url, app=app, ip='199.200.100.{}'.format(r))
def test_dataset_no_header_returns_html(self, app): dataset = factories.Dataset() url = url_for('dataset.read', id=dataset['name']) response = app.get(url) assert response.headers['Content-Type'] == 'text/html; charset=utf-8'
def test_selected_group_with_no_groups(self): dataset = factories.Dataset(tag_string='geography', accessLevel='public', contact_name='John Smith', contact_email='*****@*****.**', rights='No restrictions on public use', accrualPeriodicity='R/P1W') assert get_selected_group(dataset) == ''
def test_datagovtheme_html_loads(self, app): notes = 'Notes for a test dataset' name = 'random_test' + str(int(time.time())) dataset = factories.Dataset(notes=notes, name=name) dataset_response = app.get('/dataset/{}'.format(dataset['name'])) assert '<div itemprop="description" class="notes embedded-content">' in dataset_response.body assert notes in dataset_response.body
def test_package_create_without_any_group_modifier(self): dataset = factories.Dataset(name='test-dataset-1', tag_string='geography', accessLevel='public', contact_name='John Smith', contact_email='*****@*****.**', rights='No restrictions on public use', accrualPeriodicity='R/P1W') assert dataset['groups'] == []
def test_dataset_no_header_returns_html(self): dataset = factories.Dataset() url = url_for('dataset_read', id=dataset['name']) app = self._get_test_app() response = app.get(url) eq_(response.headers['Content-Type'], 'text/html; charset=utf-8')
def test_dataset_profiles_not_found(self, app): dataset = factories.Dataset( notes='Test dataset' ) url = url_for('dcat.read_dataset', _id=dataset['name'], _format='jsonld', profiles='nope') response = app.get(url, status=409) assert 'Unknown RDF profiles: nope' in response.body
def test_labels_enable_by_config(self, app): dataset = factories.Dataset(extras=[ {'key': 'version_notes', 'value': 'bla'} ]) url = url_for('dataset.read', id=dataset['name'], locale='ca') response = app.get(url) assert 'Notes de la versió' in response.body assert not 'Version notes' in response.body
def test_labels_default(self, app): dataset = factories.Dataset(extras=[ {'key': 'version_notes', 'value': 'bla'} ]) url = url_for('dataset.read', id=dataset['name']) response = app.get(url) assert 'Version notes' in response.body
def test_dataset_not_supported_returns_html(self, app): dataset = factories.Dataset() url = url_for('dataset.read', id=dataset['name']) headers = {'Accept': 'image/gif'} response = app.get(url, headers=headers) assert response.headers['Content-Type'] == 'text/html; charset=utf-8'
def test_dataset_multiple(self, app): dataset = factories.Dataset() url = url_for('dataset.read', id=dataset['name']) headers = {'Accept': 'text/csv; q=1.0, text/turtle; q=0.6, application/ld+json; q=0.3'} response = app.get(url, headers=headers) assert response.headers['Content-Type'] == 'text/turtle'
def test_dataset_basic(self, app): dataset = factories.Dataset() url = url_for('dataset.read', id=dataset['name']) headers = {'Accept': 'application/ld+json'} response = app.get(url, headers=headers) assert response.headers['Content-Type'] == 'application/ld+json'