示例#1
0
    def test_all_dataset_fields(self):
        resources = ResourceFactory.build_batch(3)
        dataset = DatasetFactory(tags=faker.words(nb=3), resources=resources,
                                 frequency='daily', acronym='acro')
        d = dataset_to_rdf(dataset)
        g = d.graph

        assert isinstance(d, RdfResource)
        assert len(list(g.subjects(RDF.type, DCAT.Dataset))) is 1

        assert g.value(d.identifier, RDF.type) == DCAT.Dataset

        assert isinstance(d.identifier, URIRef)
        uri = url_for('datasets.show_redirect',
                      dataset=dataset.id, _external=True)
        assert str(d.identifier) == uri
        assert d.value(DCT.identifier) == Literal(dataset.id)
        assert d.value(DCT.title) == Literal(dataset.title)
        assert d.value(SKOS.altLabel) == Literal(dataset.acronym)
        assert d.value(DCT.description) == Literal(dataset.description)
        assert d.value(DCT.issued) == Literal(dataset.created_at)
        assert d.value(DCT.modified) == Literal(dataset.last_modified)
        assert d.value(DCT.accrualPeriodicity).identifier == FREQ.daily
        expected_tags = set(Literal(t) for t in dataset.tags)
        assert set(d.objects(DCAT.keyword)) == expected_tags
        assert len(list(d.objects(DCAT.distribution))) == len(resources)
    def process(self, item):
        '''Generate a random dataset from a fake identifier'''
        # Get or create a harvested dataset with this identifier.
        # Harvest metadata are already filled on creation.
        dataset = self.get_dataset(item.remote_id)

        # Here you comes your implementation. You should :
        # - fetch the remote dataset (if necessary)
        # - validate the fetched payload
        # - map its content to the dataset fields
        # - store extra significant data in the `extra` attribute
        # - map resources data

        dataset.title = faker.sentence()
        dataset.description = faker.text()
        dataset.tags = list(set(faker.words(nb=faker.pyint())))

        # Resources
        for i in range(faker.pyint()):
            dataset.resources.append(
                Resource(title=faker.sentence(),
                         description=faker.text(),
                         url=faker.url(),
                         filetype='remote',
                         mime=faker.mime_type(category='text'),
                         format=faker.file_extension(category='text'),
                         filesize=faker.pyint()))

        return dataset
示例#3
0
    def test_all_dataset_fields(self):
        resources = ResourceFactory.build_batch(3)
        dataset = DatasetFactory(tags=faker.words(nb=3), resources=resources,
                                 frequency='daily')
        d = dataset_to_rdf(dataset)
        g = d.graph

        self.assertIsInstance(d, RdfResource)
        self.assertEqual(len(list(g.subjects(RDF.type, DCAT.Dataset))), 1)

        self.assertEqual(g.value(d.identifier, RDF.type), DCAT.Dataset)

        self.assertIsInstance(d.identifier, URIRef)
        uri = url_for('datasets.show_redirect',
                      dataset=dataset.id, _external=True)
        self.assertEqual(str(d.identifier), uri)
        self.assertEqual(d.value(DCT.identifier), Literal(dataset.id))
        self.assertEqual(d.value(DCT.title), Literal(dataset.title))
        self.assertEqual(d.value(DCT.description),
                         Literal(dataset.description))
        self.assertEqual(d.value(DCT.issued), Literal(dataset.created_at))
        self.assertEqual(d.value(DCT.modified),
                         Literal(dataset.last_modified))
        self.assertEqual(d.value(DCT.accrualPeriodicity).identifier,
                         FREQ.daily)
        expected_tags = set(Literal(t) for t in dataset.tags)
        self.assertEqual(set(d.objects(DCAT.keyword)), expected_tags)

        self.assertEqual(len(list(d.objects(DCAT.distribution))),
                         len(resources))
示例#4
0
    def test_theme_and_tags(self):
        node = BNode()
        g = Graph()

        tags = faker.words(nb=3)
        themes = faker.words(nb=3)
        g.add((node, RDF.type, DCAT.Dataset))
        g.add((node, DCT.title, Literal(faker.sentence())))
        for tag in tags:
            g.add((node, DCAT.keyword, Literal(tag)))
        for theme in themes:
            g.add((node, DCAT.theme, Literal(theme)))

        dataset = dataset_from_rdf(g)
        dataset.validate()

        assert isinstance(dataset, Dataset)
        assert set(dataset.tags) == set(tags + themes)
示例#5
0
    def test_all_fields(self):
        uri = 'https://test.org/dataset'
        node = URIRef(uri)
        g = Graph()

        id = faker.uuid4()
        title = faker.sentence()
        acronym = faker.word()
        description = faker.paragraph()
        tags = faker.words(nb=3)
        start = faker.past_date(start_date='-30d')
        end = faker.future_date(end_date='+30d')
        g.set((node, RDF.type, DCAT.Dataset))
        g.set((node, DCT.identifier, Literal(id)))
        g.set((node, DCT.title, Literal(title)))
        g.set((node, SKOS.altLabel, Literal(acronym)))
        g.set((node, DCT.description, Literal(description)))
        g.set((node, DCT.accrualPeriodicity, FREQ.daily))
        pot = BNode()
        g.add((node, DCT.temporal, pot))
        g.set((pot, RDF.type, DCT.PeriodOfTime))
        g.set((pot, SCHEMA.startDate, Literal(start)))
        g.set((pot, SCHEMA.endDate, Literal(end)))
        for tag in tags:
            g.add((node, DCAT.keyword, Literal(tag)))

        dataset = dataset_from_rdf(g)
        dataset.validate()

        assert isinstance(dataset, Dataset)
        assert dataset.title == title
        assert dataset.acronym == acronym
        assert dataset.description == description
        assert dataset.frequency == 'daily'
        assert set(dataset.tags) == set(tags)
        assert isinstance(dataset.temporal_coverage, db.DateRange)
        assert dataset.temporal_coverage.start == start
        assert dataset.temporal_coverage.end == end

        extras = dataset.extras
        assert 'dct:identifier' in extras
        assert extras['dct:identifier'] == id
        assert 'uri' in extras
        assert extras['uri'] == uri
示例#6
0
    def test_all_fields(self):
        uri = 'https://test.org/dataset'
        node = URIRef(uri)
        g = Graph()

        id = faker.uuid4()
        title = faker.sentence()
        acronym = faker.word()
        description = faker.paragraph()
        tags = faker.words(nb=3)
        start = faker.past_date(start_date='-30d')
        end = faker.future_date(end_date='+30d')
        g.set((node, RDF.type, DCAT.Dataset))
        g.set((node, DCT.identifier, Literal(id)))
        g.set((node, DCT.title, Literal(title)))
        g.set((node, SKOS.altLabel, Literal(acronym)))
        g.set((node, DCT.description, Literal(description)))
        g.set((node, DCT.accrualPeriodicity, FREQ.daily))
        pot = BNode()
        g.add((node, DCT.temporal, pot))
        g.set((pot, RDF.type, DCT.PeriodOfTime))
        g.set((pot, SCHEMA.startDate, Literal(start)))
        g.set((pot, SCHEMA.endDate, Literal(end)))
        for tag in tags:
            g.add((node, DCAT.keyword, Literal(tag)))

        dataset = dataset_from_rdf(g)
        dataset.validate()

        self.assertIsInstance(dataset, Dataset)
        self.assertEqual(dataset.title, title)
        self.assertEqual(dataset.acronym, acronym)
        self.assertEqual(dataset.description, description)
        self.assertEqual(dataset.frequency, 'daily')
        self.assertEqual(set(dataset.tags), set(tags))
        self.assertIsInstance(dataset.temporal_coverage, db.DateRange)
        self.assertEqual(dataset.temporal_coverage.start, start)
        self.assertEqual(dataset.temporal_coverage.end, end)

        extras = dataset.extras
        self.assertIn('dct:identifier', extras)
        self.assertEqual(extras['dct:identifier'], id)
        self.assertIn('uri', extras)
        self.assertEqual(extras['uri'], uri)