def test_datasets_none_found(self): p = RDFParser() p.g = Graph() eq_(len([d for d in p.datasets()]), 0)
def test__datasets(self): p = RDFParser() p.g = _default_graph() eq_(len([d for d in p._datasets()]), 3)
def test_distribution_format_IMT_field(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") imt = BNode() g.add((imt, RDF.type, DCT.IMT)) g.add((imt, RDF.value, Literal('text/turtle'))) g.add((imt, RDFS.label, Literal('Turtle'))) g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCT['format'], imt)) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]['resources'][0] eq_(resource['format'], u'Turtle') eq_(resource['mimetype'], u'text/turtle')
def test_dataset_turtle_1(self): contents = self._get_file_contents('dataset_deri.ttl') p = RDFParser(profiles=['euro_dcat_ap']) p.parse(contents, _format='n3') datasets = [d for d in p.datasets()] eq_(len(datasets), 1) dataset = datasets[0] eq_(dataset['title'], 'Abandoned Vehicles') eq_(len(dataset['resources']), 1) resource = dataset['resources'][0] eq_(resource['name'], u'CSV distribution of: Abandoned Vehicles') eq_( resource['url'], u'http://data.london.gov.uk/datafiles/environment/abandoned-vehicles-borough.csv' ) eq_(resource['uri'], u'http://data.london.gov.uk/dataset/Abandoned_Vehicles/csv')
def test_dataset_json_ld_1(self): contents = self._get_file_contents('catalog_pod.jsonld') p = RDFParser(profiles=['euro_dcat_ap']) p.parse(contents, _format='json-ld') datasets = [d for d in p.datasets()] eq_(len(datasets), 1) dataset = datasets[0] extras = dict((e['key'], e['value']) for e in dataset['extras']) eq_(dataset['title'], 'U.S. Widget Manufacturing Statistics') eq_(extras['contact_name'], 'Jane Doe') eq_(extras['contact_email'], 'mailto:[email protected]') eq_(extras['publisher_name'], 'Widget Services') eq_(extras['publisher_email'], '*****@*****.**') eq_(len(dataset['resources']), 4) resource = [ r for r in dataset['resources'] if r['name'] == 'widgets.csv' ][0] eq_(resource['name'], u'widgets.csv') eq_( resource['url'], u'https://data.agency.gov/datasets/widgets-statistics/widgets.csv') eq_( resource['download_url'], u'https://data.agency.gov/datasets/widgets-statistics/widgets.csv')
def test_dataset_compatibility_mode(self): contents = self._get_file_contents('dataset.rdf') p = RDFParser(profiles=['euro_dcat_ap'], compatibility_mode=True) p.parse(contents) datasets = [d for d in p.datasets()] eq_(len(datasets), 1) dataset = datasets[0] def _get_extra_value(key): v = [ extra['value'] for extra in dataset['extras'] if extra['key'] == key ] return v[0] if v else None eq_(_get_extra_value('dcat_issued'), u'2012-05-10') eq_(_get_extra_value('dcat_modified'), u'2012-05-10T21:04:00') eq_(_get_extra_value('dcat_publisher_name'), 'Publishing Organization for dataset 1') eq_(_get_extra_value('dcat_publisher_email'), '*****@*****.**') eq_(_get_extra_value('language'), 'ca,en,es')
def test_distribution_format_format_normalized(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCAT.mediaType, Literal('text/csv'))) g.add((distribution1_1, DCT['format'], Literal('Comma Separated Values'))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]['resources'][0] if toolkit.check_ckan_version(min_version='2.3'): eq_(resource['format'], u'CSV') eq_(resource['mimetype'], u'text/csv') else: eq_(resource['format'], u'Comma Separated Values')
def test_spatial_one_dct_spatial_instance_no_uri(self): g = Graph() dataset = URIRef('http://example.org/datasets/1') g.add((dataset, RDF.type, DCAT.Dataset)) location_ref = BNode() g.add((dataset, DCT.spatial, location_ref)) g.add((location_ref, RDF.type, DCT.Location)) g.add((location_ref, LOCN.geometry, Literal('{"type": "Point", "coordinates": [23, 45]}', datatype=GEOJSON_IMT))) g.add((location_ref, SKOS.prefLabel, Literal('Newark'))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] extras = self._extras(datasets[0]) assert_true('spatial_uri' not in extras) eq_(extras['spatial_text'], 'Newark') eq_(extras['spatial'], '{"type": "Point", "coordinates": [23, 45]}')
def test_profiles_are_called_on_datasets(self): p = RDFParser() p._profiles = [MockRDFProfile1, MockRDFProfile2] p.g = _default_graph() for dataset in p.datasets(): assert dataset['profile_1'] assert dataset['profile_2']
def test_profiles_via_config_option(self): original_config = config.copy() config[RDF_PROFILES_CONFIG_OPTION] = 'profile_conf_1 profile_conf_2' try: RDFParser() except RDFProfileException as e: eq_(str(e), 'Unknown RDF profiles: profile_conf_1, profile_conf_2') config.clear() config.update(original_config)
def test_datasets(self): p = RDFParser() p.g = _default_graph() datasets = [] for dataset in p.datasets(): assert 'title' in dataset datasets.append(dataset) eq_(len(datasets), 3)
def test_dataset_version_adms(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) g.add((dataset1, ADMS.version, Literal('2.3a'))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g dataset = [d for d in p.datasets()][0] eq_(dataset['version'], u'2.3a')
def test_parse_data_raises_on_parse_error(self): p = RDFParser() data = 'Wrong data' nose.tools.assert_raises(RDFParserException, p.parse, '') nose.tools.assert_raises(RDFParserException, p.parse, data) nose.tools.assert_raises( RDFParserException, p.parse, data, _format='n3', )
def test_parse_data_different_format(self): data = ''' @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . <http://example.org> a rdfs:SomeClass ; rdfs:label "Some label" . ''' p = RDFParser() eq_(len(p.g), 0) p.parse(data, _format='n3') eq_(len(p.g), 2)
def test_parse_without_pagination(self): data = '''<?xml version="1.0" encoding="utf-8" ?> <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"> <rdfs:SomeClass rdf:about="http://example.org"> <rdfs:label>Some label</rdfs:label> </rdfs:SomeClass> </rdf:RDF> ''' p = RDFParser() p.parse(data) eq_(p.next_page(), None)
def test_catalog_xml_rdf(self): contents = self._get_file_contents('catalog.rdf') p = RDFParser(profiles=['euro_dcat_ap']) p.parse(contents) datasets = [d for d in p.datasets()] eq_(len(datasets), 2) dataset = (datasets[0] if datasets[0]['title'] == 'Example dataset 1' else datasets[1]) eq_(dataset['title'], 'Example dataset 1') eq_(len(dataset['resources']), 3) eq_(len(dataset['tags']), 2)
def test_dataset_license_from_distribution_by_uri(self): # license_id retrieved from the URI of dcat:license object g = Graph() dataset = URIRef("http://example.org/datasets/1") g.add((dataset, RDF.type, DCAT.Dataset)) distribution = URIRef("http://example.org/datasets/1/ds/1") g.add((dataset, DCAT.distribution, distribution)) g.add((distribution, RDF.type, DCAT.Distribution)) g.add((distribution, DCT.license, URIRef("http://www.opendefinition.org/licenses/cc-by"))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g dataset = [d for d in p.datasets()][0] eq_(dataset['license_id'], 'cc-by')
def test_dataset_license_from_distribution_by_title(self): # license_id retrieved from dct:title of dcat:license object g = Graph() dataset = URIRef("http://example.org/datasets/1") g.add((dataset, RDF.type, DCAT.Dataset)) distribution = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution, RDF.type, DCAT.Distribution)) g.add((dataset, DCAT.distribution, distribution)) license = BNode() g.add((distribution, DCT.license, license)) g.add((license, DCT.title, Literal("Creative Commons Attribution"))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g dataset = [d for d in p.datasets()][0] eq_(dataset['license_id'], 'cc-by')
def test_distribution_format_format_only(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCT['format'], Literal('CSV'))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]['resources'][0] eq_(resource['format'], u'CSV')
def test_distribution_format_imt_normalized(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCAT.mediaType, Literal('text/unknown-imt'))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]['resources'][0] eq_(resource['format'], u'text/unknown-imt') eq_(resource['mimetype'], u'text/unknown-imt')
def test_parse_pagination_last_page(self): data = '''<?xml version="1.0" encoding="utf-8" ?> <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:hydra="http://www.w3.org/ns/hydra/core#"> <hydra:PagedCollection rdf:about="http://example.com/catalog.xml?page=3"> <hydra:totalItems rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">245</hydra:totalItems> <hydra:lastPage>http://example.com/catalog.xml?page=3</hydra:lastPage> <hydra:itemsPerPage rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">100</hydra:itemsPerPage> <hydra:firstPage>http://example.com/catalog.xml?page=1</hydra:firstPage> <hydra:previousPage>http://example.com/catalog.xml?page=2</hydra:previousPage> </hydra:PagedCollection> </rdf:RDF> ''' p = RDFParser() p.parse(data) eq_(p.next_page(), None)
def test_distribution_download_url(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCAT.downloadURL, Literal('http://download.url.org'))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]['resources'][0] eq_(resource['url'], u'http://download.url.org') eq_(resource['download_url'], u'http://download.url.org')