예제 #1
0
    def test_unicode(self):
        g = Graph()
        title = 'ééé'
        description = 'éééé'

        node = BNode()
        g.add((node, RDF.type, DCAT.Dataset))
        g.add((node, DCT.title, Literal(title)))
        g.add((node, DCT.description, Literal(description)))

        rnode = BNode()
        g.add((rnode, RDF.type, DCAT.Distribution))
        g.add((rnode, DCT.title, Literal(title)))
        g.add((rnode, DCT.description, Literal(description)))
        g.add((rnode, DCAT.downloadURL, URIRef(faker.uri())))
        g.add((node, DCAT.distribution, rnode))

        dataset = dataset_from_rdf(g)
        dataset.validate()
        assert dataset.title == title
        assert dataset.description == description

        resource = dataset.resources[0]
        assert resource.title == title
        assert resource.description == description
예제 #2
0
파일: dcat.py 프로젝트: opendatateam/udata
def parse_url(url, quiet=False, rid=''):
    '''Parse the datasets in a DCAT format located at URL (debug)'''
    if quiet:
        verbose_loggers = ['rdflib', 'udata.core.dataset']
        [logging.getLogger(l).setLevel(logging.ERROR) for l in verbose_loggers]

    class MockSource:
        url = ''

    class MockJob:
        items = []

    class MockDatasetFactory(DatasetFactory):
        '''Use DatasetFactory without .save()'''
        @classmethod
        def _create(cls, model_class, *args, **kwargs):
            instance = model_class(*args, **kwargs)
            return instance

    echo(cyan('Parsing url {}'.format(url)))
    source = MockSource()
    source.url = url
    backend = DcatBackend(source, dryrun=True)
    backend.job = MockJob()
    format = backend.get_format()
    echo(yellow('Detected format: {}'.format(format)))
    graph = backend.parse_graph(url, format)

    # serialize/unserialize graph like in the job mechanism
    _graph = graph.serialize(format=format, indent=None)
    graph = Graph(namespace_manager=namespace_manager)
    graph.parse(data=_graph, format=format)

    for item in backend.job.items:
        if not rid or rid in item.remote_id:
            echo(magenta('Processing item {}'.format(item.remote_id)))
            echo('Item kwargs: {}'.format(yellow(item.kwargs)))
            node = backend.get_node_from_item(item)
            dataset = MockDatasetFactory()
            dataset = dataset_from_rdf(graph, dataset, node=node)
            echo('')
            echo(green('Dataset found!'))
            echo('Title: {}'.format(yellow(dataset)))
            echo('License: {}'.format(yellow(dataset.license)))
            echo('Description: {}'.format(yellow(dataset.description)))
            echo('Tags: {}'.format(yellow(dataset.tags)))
            echo('Resources: {}'.format(yellow([(r.title, r.format, r.url) for r in dataset.resources])))

            try:
                dataset.validate()
            except mongoengine.errors.ValidationError as e:
                log.error(e, exc_info=True)
            else:
                echo(green('Dataset is valid ✅'))
            echo('')
예제 #3
0
파일: dcat.py 프로젝트: opendatateam/udata
    def process(self, item):
        graph = Graph(namespace_manager=namespace_manager)
        data = self.job.data['graph']
        format = self.job.data['format']

        node = self.get_node_from_item(item)
        graph.parse(data=bytes(data, encoding='utf8'), format=format)

        dataset = self.get_dataset(item.remote_id)
        dataset = dataset_from_rdf(graph, dataset, node=node)
        return dataset
예제 #4
0
    def test_minimal(self):
        node = BNode()
        g = Graph()

        title = faker.sentence()
        g.add((node, RDF.type, DCAT.Dataset))
        g.add((node, DCT.title, Literal(title)))

        dataset = dataset_from_rdf(g)
        dataset.validate()

        assert isinstance(dataset, Dataset)
        assert dataset.title == title
예제 #5
0
    def test_html_description(self):
        node = BNode()
        g = Graph()

        g.add((node, RDF.type, DCAT.Dataset))
        g.add((node, DCT.identifier, Literal(faker.uuid4())))
        g.add((node, DCT.title, Literal(faker.sentence())))
        g.add((node, DCT.description, Literal('<div>a description</div>')))

        dataset = dataset_from_rdf(g)
        dataset.validate()

        assert isinstance(dataset, Dataset)
        assert dataset.description == 'a description'
예제 #6
0
    def process(self, item):
        graph = Graph(namespace_manager=namespace_manager)
        data = item.kwargs.get('graph', self.job.data['graph'])  # handles legacy graphs
        node = None

        graph.parse(data=data, format='json-ld')

        if 'nid' in item.kwargs and 'type' in item.kwargs:
            nid = item.kwargs['nid']
            node = URIRef(nid) if item.kwargs['type'] == 'uriref' else BNode(nid)

        dataset = self.get_dataset(item.remote_id)
        dataset = dataset_from_rdf(graph, dataset, node=node)
        return dataset
예제 #7
0
    def test_update(self):
        original = DatasetFactory()

        node = URIRef('https://test.org/dataset')
        g = Graph()

        new_title = faker.sentence()
        g.add((node, RDF.type, DCAT.Dataset))
        g.add((node, DCT.title, Literal(new_title)))

        dataset = dataset_from_rdf(g, dataset=original)
        dataset.validate()

        assert isinstance(dataset, Dataset)
        assert dataset.id == original.id
        assert dataset.title == new_title
예제 #8
0
    def test_match_license_from_rights_uri(self):
        license = LicenseFactory()
        node = BNode()
        g = Graph()

        g.set((node, RDF.type, DCAT.Dataset))
        g.set((node, DCT.title, Literal(faker.sentence())))
        rnode = BNode()
        g.set((rnode, RDF.type, DCAT.Distribution))
        g.set((rnode, DCAT.downloadURL, URIRef(faker.uri())))
        g.set((rnode, DCT.rights, URIRef(license.url)))
        g.add((node, DCAT.distribution, rnode))

        dataset = dataset_from_rdf(g)

        assert isinstance(dataset.license, License)
        assert dataset.license == license
예제 #9
0
    def test_dataset_has_resources_from_literal_instead_of_uriref(self):
        node = BNode()
        g = Graph()

        g.add((node, RDF.type, DCAT.Dataset))
        g.add((node, DCT.title, Literal(faker.sentence())))
        rnode = BNode()
        g.set((rnode, RDF.type, DCAT.Distribution))
        # Resource URL is expressed as a Literal
        g.set((rnode, DCAT.downloadURL, Literal(faker.uri())))
        g.add((node, DCAT.distribution, rnode))

        dataset = dataset_from_rdf(g)
        dataset.validate()

        assert isinstance(dataset, Dataset)
        assert len(dataset.resources) == 1
예제 #10
0
    def test_dataset_has_resources_from_buggy_plural_distribution(self):
        '''Try to extract resources from the wrong distributions attribute'''
        node = BNode()
        g = Graph()

        g.add((node, RDF.type, DCAT.Dataset))
        g.add((node, DCT.title, Literal(faker.sentence())))
        rnode = BNode()
        g.set((rnode, RDF.type, DCAT.Distribution))
        g.set((rnode, DCAT.downloadURL, URIRef(faker.uri())))
        g.add((node, DCAT.distributions, rnode))  # use plural name

        dataset = dataset_from_rdf(g)
        dataset.validate()

        assert isinstance(dataset, Dataset)
        assert len(dataset.resources) == 1
예제 #11
0
    def test_dataset_has_resources(self):
        node = BNode()
        g = Graph()

        g.add((node, RDF.type, DCAT.Dataset))
        g.add((node, DCT.title, Literal(faker.sentence())))
        for i in range(3):
            rnode = BNode()
            g.set((rnode, RDF.type, DCAT.Distribution))
            g.set((rnode, DCAT.downloadURL, URIRef(faker.uri())))
            g.add((node, DCAT.distribution, rnode))

        dataset = dataset_from_rdf(g)
        dataset.validate()

        assert isinstance(dataset, Dataset)
        assert len(dataset.resources) == 3
예제 #12
0
    def test_theme_and_tags(self):
        node = BNode()
        g = Graph()

        tags = faker.words(nb=3)
        themes = faker.words(nb=3)
        g.add((node, RDF.type, DCAT.Dataset))
        g.add((node, DCT.title, Literal(faker.sentence())))
        for tag in tags:
            g.add((node, DCAT.keyword, Literal(tag)))
        for theme in themes:
            g.add((node, DCAT.theme, Literal(theme)))

        dataset = dataset_from_rdf(g)
        dataset.validate()

        assert isinstance(dataset, Dataset)
        assert set(dataset.tags) == set(tags + themes)
예제 #13
0
    def test_minimal_from_multiple(self):
        node = BNode()
        g = Graph()

        title = faker.sentence()
        g.add((node, RDF.type, DCAT.Dataset))
        g.add((node, DCT.title, Literal(title)))

        for i in range(3):
            other = BNode()
            g.add((other, RDF.type, DCAT.Dataset))
            g.add((other, DCT.title, Literal(faker.sentence())))

        dataset = dataset_from_rdf(g, node=node)
        dataset.validate()

        assert isinstance(dataset, Dataset)
        assert dataset.title == title
예제 #14
0
    def test_minimal_from_multiple(self):
        node = BNode()
        g = Graph()

        title = faker.sentence()
        g.add((node, RDF.type, DCAT.Dataset))
        g.add((node, DCT.title, Literal(title)))

        for i in range(3):
            other = BNode()
            g.add((other, RDF.type, DCAT.Dataset))
            g.add((other, DCT.title, Literal(faker.sentence())))

        dataset = dataset_from_rdf(g, node=node)
        dataset.validate()

        assert isinstance(dataset, Dataset)
        assert dataset.title == title
예제 #15
0
    def test_match_license_from_license_title(self):
        license = LicenseFactory()
        node = BNode()
        g = Graph()

        g.set((node, RDF.type, DCAT.Dataset))
        g.set((node, DCT.title, Literal(faker.sentence())))
        rnode = BNode()
        g.set((rnode, RDF.type, DCAT.Distribution))
        g.set((rnode, DCAT.downloadURL, URIRef(faker.uri())))
        g.set((rnode, DCT.license, Literal(license.title)))
        g.add((node, DCAT.distribution, rnode))

        dataset = dataset_from_rdf(g)
        dataset.validate()

        self.assertIsInstance(dataset.license, License)
        self.assertEqual(dataset.license, license)
예제 #16
0
    def test_all_fields(self):
        uri = 'https://test.org/dataset'
        node = URIRef(uri)
        g = Graph()

        id = faker.uuid4()
        title = faker.sentence()
        acronym = faker.word()
        description = faker.paragraph()
        tags = faker.words(nb=3)
        start = faker.past_date(start_date='-30d')
        end = faker.future_date(end_date='+30d')
        g.set((node, RDF.type, DCAT.Dataset))
        g.set((node, DCT.identifier, Literal(id)))
        g.set((node, DCT.title, Literal(title)))
        g.set((node, SKOS.altLabel, Literal(acronym)))
        g.set((node, DCT.description, Literal(description)))
        g.set((node, DCT.accrualPeriodicity, FREQ.daily))
        pot = BNode()
        g.add((node, DCT.temporal, pot))
        g.set((pot, RDF.type, DCT.PeriodOfTime))
        g.set((pot, SCHEMA.startDate, Literal(start)))
        g.set((pot, SCHEMA.endDate, Literal(end)))
        for tag in tags:
            g.add((node, DCAT.keyword, Literal(tag)))

        dataset = dataset_from_rdf(g)
        dataset.validate()

        self.assertIsInstance(dataset, Dataset)
        self.assertEqual(dataset.title, title)
        self.assertEqual(dataset.acronym, acronym)
        self.assertEqual(dataset.description, description)
        self.assertEqual(dataset.frequency, 'daily')
        self.assertEqual(set(dataset.tags), set(tags))
        self.assertIsInstance(dataset.temporal_coverage, db.DateRange)
        self.assertEqual(dataset.temporal_coverage.start, start)
        self.assertEqual(dataset.temporal_coverage.end, end)

        extras = dataset.extras
        self.assertIn('dct:identifier', extras)
        self.assertEqual(extras['dct:identifier'], id)
        self.assertIn('uri', extras)
        self.assertEqual(extras['uri'], uri)
예제 #17
0
    def test_all_fields(self):
        uri = 'https://test.org/dataset'
        node = URIRef(uri)
        g = Graph()

        id = faker.uuid4()
        title = faker.sentence()
        acronym = faker.word()
        description = faker.paragraph()
        tags = faker.words(nb=3)
        start = faker.past_date(start_date='-30d')
        end = faker.future_date(end_date='+30d')
        g.set((node, RDF.type, DCAT.Dataset))
        g.set((node, DCT.identifier, Literal(id)))
        g.set((node, DCT.title, Literal(title)))
        g.set((node, SKOS.altLabel, Literal(acronym)))
        g.set((node, DCT.description, Literal(description)))
        g.set((node, DCT.accrualPeriodicity, FREQ.daily))
        pot = BNode()
        g.add((node, DCT.temporal, pot))
        g.set((pot, RDF.type, DCT.PeriodOfTime))
        g.set((pot, SCHEMA.startDate, Literal(start)))
        g.set((pot, SCHEMA.endDate, Literal(end)))
        for tag in tags:
            g.add((node, DCAT.keyword, Literal(tag)))

        dataset = dataset_from_rdf(g)
        dataset.validate()

        assert isinstance(dataset, Dataset)
        assert dataset.title == title
        assert dataset.acronym == acronym
        assert dataset.description == description
        assert dataset.frequency == 'daily'
        assert set(dataset.tags) == set(tags)
        assert isinstance(dataset.temporal_coverage, db.DateRange)
        assert dataset.temporal_coverage.start == start
        assert dataset.temporal_coverage.end == end

        extras = dataset.extras
        assert 'dct:identifier' in extras
        assert extras['dct:identifier'] == id
        assert 'uri' in extras
        assert extras['uri'] == uri
예제 #18
0
 def process(self, item):
     graph = Graph(namespace_manager=namespace_manager)
     graph.parse(data=item.kwargs['graph'], format='json-ld')
     dataset = self.get_dataset(item.remote_id)
     dataset = dataset_from_rdf(graph, dataset)
     return dataset