Пример #1
0
    def test_datasets_none_found(self):

        p = RDFParser()

        p.g = Graph()

        eq_(len([d for d in p.datasets()]), 0)
Пример #2
0
    def test__datasets(self):

        p = RDFParser()

        p.g = _default_graph()

        eq_(len([d for d in p._datasets()]), 3)
    def test_distribution_format_IMT_field(self):
        g = Graph()

        dataset1 = URIRef("http://example.org/datasets/1")
        g.add((dataset1, RDF.type, DCAT.Dataset))

        distribution1_1 = URIRef("http://example.org/datasets/1/ds/1")

        imt = BNode()

        g.add((imt, RDF.type, DCT.IMT))
        g.add((imt, RDF.value, Literal('text/turtle')))
        g.add((imt, RDFS.label, Literal('Turtle')))

        g.add((distribution1_1, RDF.type, DCAT.Distribution))
        g.add((distribution1_1, DCT['format'], imt))
        g.add((dataset1, DCAT.distribution, distribution1_1))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        resource = datasets[0]['resources'][0]

        eq_(resource['format'], u'Turtle')
        eq_(resource['mimetype'], u'text/turtle')
    def test_dataset_turtle_1(self):

        contents = self._get_file_contents('dataset_deri.ttl')

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.parse(contents, _format='n3')

        datasets = [d for d in p.datasets()]

        eq_(len(datasets), 1)

        dataset = datasets[0]

        eq_(dataset['title'], 'Abandoned Vehicles')
        eq_(len(dataset['resources']), 1)

        resource = dataset['resources'][0]
        eq_(resource['name'], u'CSV distribution of: Abandoned Vehicles')
        eq_(
            resource['url'],
            u'http://data.london.gov.uk/datafiles/environment/abandoned-vehicles-borough.csv'
        )
        eq_(resource['uri'],
            u'http://data.london.gov.uk/dataset/Abandoned_Vehicles/csv')
    def test_dataset_json_ld_1(self):

        contents = self._get_file_contents('catalog_pod.jsonld')

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.parse(contents, _format='json-ld')

        datasets = [d for d in p.datasets()]

        eq_(len(datasets), 1)

        dataset = datasets[0]
        extras = dict((e['key'], e['value']) for e in dataset['extras'])

        eq_(dataset['title'], 'U.S. Widget Manufacturing Statistics')

        eq_(extras['contact_name'], 'Jane Doe')
        eq_(extras['contact_email'], 'mailto:[email protected]')
        eq_(extras['publisher_name'], 'Widget Services')
        eq_(extras['publisher_email'], '*****@*****.**')

        eq_(len(dataset['resources']), 4)

        resource = [
            r for r in dataset['resources'] if r['name'] == 'widgets.csv'
        ][0]
        eq_(resource['name'], u'widgets.csv')
        eq_(
            resource['url'],
            u'https://data.agency.gov/datasets/widgets-statistics/widgets.csv')
        eq_(
            resource['download_url'],
            u'https://data.agency.gov/datasets/widgets-statistics/widgets.csv')
    def test_dataset_compatibility_mode(self):

        contents = self._get_file_contents('dataset.rdf')

        p = RDFParser(profiles=['euro_dcat_ap'], compatibility_mode=True)

        p.parse(contents)

        datasets = [d for d in p.datasets()]

        eq_(len(datasets), 1)

        dataset = datasets[0]

        def _get_extra_value(key):
            v = [
                extra['value'] for extra in dataset['extras']
                if extra['key'] == key
            ]
            return v[0] if v else None

        eq_(_get_extra_value('dcat_issued'), u'2012-05-10')
        eq_(_get_extra_value('dcat_modified'), u'2012-05-10T21:04:00')
        eq_(_get_extra_value('dcat_publisher_name'),
            'Publishing Organization for dataset 1')
        eq_(_get_extra_value('dcat_publisher_email'), '*****@*****.**')
        eq_(_get_extra_value('language'), 'ca,en,es')
    def test_distribution_format_format_normalized(self):
        g = Graph()

        dataset1 = URIRef("http://example.org/datasets/1")
        g.add((dataset1, RDF.type, DCAT.Dataset))

        distribution1_1 = URIRef("http://example.org/datasets/1/ds/1")
        g.add((distribution1_1, RDF.type, DCAT.Distribution))
        g.add((distribution1_1, DCAT.mediaType, Literal('text/csv')))
        g.add((distribution1_1, DCT['format'],
               Literal('Comma Separated Values')))
        g.add((dataset1, DCAT.distribution, distribution1_1))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        resource = datasets[0]['resources'][0]

        if toolkit.check_ckan_version(min_version='2.3'):
            eq_(resource['format'], u'CSV')
            eq_(resource['mimetype'], u'text/csv')
        else:
            eq_(resource['format'], u'Comma Separated Values')
    def test_spatial_one_dct_spatial_instance_no_uri(self):
        g = Graph()

        dataset = URIRef('http://example.org/datasets/1')
        g.add((dataset, RDF.type, DCAT.Dataset))

        location_ref = BNode()
        g.add((dataset, DCT.spatial, location_ref))

        g.add((location_ref, RDF.type, DCT.Location))
        g.add((location_ref, LOCN.geometry,
               Literal('{"type": "Point", "coordinates": [23, 45]}',
                       datatype=GEOJSON_IMT)))
        g.add((location_ref, SKOS.prefLabel, Literal('Newark')))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        extras = self._extras(datasets[0])

        assert_true('spatial_uri' not in extras)
        eq_(extras['spatial_text'], 'Newark')
        eq_(extras['spatial'], '{"type": "Point", "coordinates": [23, 45]}')
Пример #9
0
    def test_profiles_are_called_on_datasets(self):

        p = RDFParser()

        p._profiles = [MockRDFProfile1, MockRDFProfile2]

        p.g = _default_graph()

        for dataset in p.datasets():
            assert dataset['profile_1']
            assert dataset['profile_2']
Пример #10
0
    def test_profiles_via_config_option(self):

        original_config = config.copy()

        config[RDF_PROFILES_CONFIG_OPTION] = 'profile_conf_1 profile_conf_2'
        try:
            RDFParser()
        except RDFProfileException as e:

            eq_(str(e), 'Unknown RDF profiles: profile_conf_1, profile_conf_2')

        config.clear()
        config.update(original_config)
Пример #11
0
    def test_datasets(self):

        p = RDFParser()

        p.g = _default_graph()

        datasets = []
        for dataset in p.datasets():

            assert 'title' in dataset

            datasets.append(dataset)

        eq_(len(datasets), 3)
    def test_dataset_version_adms(self):
        g = Graph()

        dataset1 = URIRef("http://example.org/datasets/1")
        g.add((dataset1, RDF.type, DCAT.Dataset))

        g.add((dataset1, ADMS.version, Literal('2.3a')))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        dataset = [d for d in p.datasets()][0]

        eq_(dataset['version'], u'2.3a')
Пример #13
0
    def test_parse_data_raises_on_parse_error(self):

        p = RDFParser()

        data = 'Wrong data'

        nose.tools.assert_raises(RDFParserException, p.parse, '')

        nose.tools.assert_raises(RDFParserException, p.parse, data)

        nose.tools.assert_raises(
            RDFParserException,
            p.parse,
            data,
            _format='n3',
        )
Пример #14
0
    def test_parse_data_different_format(self):

        data = '''
        @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
        @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

        <http://example.org> a rdfs:SomeClass ;
            rdfs:label "Some label" .
        '''

        p = RDFParser()

        eq_(len(p.g), 0)

        p.parse(data, _format='n3')

        eq_(len(p.g), 2)
Пример #15
0
    def test_parse_without_pagination(self):

        data = '''<?xml version="1.0" encoding="utf-8" ?>
        <rdf:RDF
         xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
         xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
        <rdfs:SomeClass rdf:about="http://example.org">
            <rdfs:label>Some label</rdfs:label>
        </rdfs:SomeClass>
        </rdf:RDF>
        '''

        p = RDFParser()

        p.parse(data)

        eq_(p.next_page(), None)
    def test_catalog_xml_rdf(self):

        contents = self._get_file_contents('catalog.rdf')

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.parse(contents)

        datasets = [d for d in p.datasets()]

        eq_(len(datasets), 2)

        dataset = (datasets[0] if datasets[0]['title'] == 'Example dataset 1'
                   else datasets[1])

        eq_(dataset['title'], 'Example dataset 1')
        eq_(len(dataset['resources']), 3)
        eq_(len(dataset['tags']), 2)
    def test_dataset_license_from_distribution_by_uri(self):
        # license_id retrieved from the URI of dcat:license object
        g = Graph()

        dataset = URIRef("http://example.org/datasets/1")
        g.add((dataset, RDF.type, DCAT.Dataset))

        distribution = URIRef("http://example.org/datasets/1/ds/1")
        g.add((dataset, DCAT.distribution, distribution))
        g.add((distribution, RDF.type, DCAT.Distribution))
        g.add((distribution, DCT.license,
               URIRef("http://www.opendefinition.org/licenses/cc-by")))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        dataset = [d for d in p.datasets()][0]
        eq_(dataset['license_id'], 'cc-by')
    def test_dataset_license_from_distribution_by_title(self):
        # license_id retrieved from dct:title of dcat:license object
        g = Graph()

        dataset = URIRef("http://example.org/datasets/1")
        g.add((dataset, RDF.type, DCAT.Dataset))

        distribution = URIRef("http://example.org/datasets/1/ds/1")
        g.add((distribution, RDF.type, DCAT.Distribution))
        g.add((dataset, DCAT.distribution, distribution))
        license = BNode()
        g.add((distribution, DCT.license, license))
        g.add((license, DCT.title, Literal("Creative Commons Attribution")))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        dataset = [d for d in p.datasets()][0]
        eq_(dataset['license_id'], 'cc-by')
    def test_distribution_format_format_only(self):
        g = Graph()

        dataset1 = URIRef("http://example.org/datasets/1")
        g.add((dataset1, RDF.type, DCAT.Dataset))

        distribution1_1 = URIRef("http://example.org/datasets/1/ds/1")
        g.add((distribution1_1, RDF.type, DCAT.Distribution))
        g.add((distribution1_1, DCT['format'], Literal('CSV')))
        g.add((dataset1, DCAT.distribution, distribution1_1))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        resource = datasets[0]['resources'][0]

        eq_(resource['format'], u'CSV')
    def test_distribution_format_imt_normalized(self):
        g = Graph()

        dataset1 = URIRef("http://example.org/datasets/1")
        g.add((dataset1, RDF.type, DCAT.Dataset))

        distribution1_1 = URIRef("http://example.org/datasets/1/ds/1")
        g.add((distribution1_1, RDF.type, DCAT.Distribution))
        g.add((distribution1_1, DCAT.mediaType, Literal('text/unknown-imt')))
        g.add((dataset1, DCAT.distribution, distribution1_1))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        resource = datasets[0]['resources'][0]

        eq_(resource['format'], u'text/unknown-imt')
        eq_(resource['mimetype'], u'text/unknown-imt')
Пример #21
0
    def test_parse_pagination_last_page(self):

        data = '''<?xml version="1.0" encoding="utf-8" ?>
        <rdf:RDF
         xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
         xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
         xmlns:hydra="http://www.w3.org/ns/hydra/core#">
         <hydra:PagedCollection rdf:about="http://example.com/catalog.xml?page=3">
            <hydra:totalItems rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">245</hydra:totalItems>
            <hydra:lastPage>http://example.com/catalog.xml?page=3</hydra:lastPage>
            <hydra:itemsPerPage rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">100</hydra:itemsPerPage>
            <hydra:firstPage>http://example.com/catalog.xml?page=1</hydra:firstPage>
            <hydra:previousPage>http://example.com/catalog.xml?page=2</hydra:previousPage>
        </hydra:PagedCollection>
        </rdf:RDF>
        '''

        p = RDFParser()

        p.parse(data)

        eq_(p.next_page(), None)
    def test_distribution_download_url(self):
        g = Graph()

        dataset1 = URIRef("http://example.org/datasets/1")
        g.add((dataset1, RDF.type, DCAT.Dataset))

        distribution1_1 = URIRef("http://example.org/datasets/1/ds/1")
        g.add((distribution1_1, RDF.type, DCAT.Distribution))
        g.add((distribution1_1, DCAT.downloadURL,
               Literal('http://download.url.org')))
        g.add((dataset1, DCAT.distribution, distribution1_1))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        resource = datasets[0]['resources'][0]

        eq_(resource['url'], u'http://download.url.org')
        eq_(resource['download_url'], u'http://download.url.org')