def test_all_dataset_fields(self): resources = ResourceFactory.build_batch(3) dataset = DatasetFactory(tags=faker.words(nb=3), resources=resources, frequency='daily', acronym='acro') d = dataset_to_rdf(dataset) g = d.graph assert isinstance(d, RdfResource) assert len(list(g.subjects(RDF.type, DCAT.Dataset))) is 1 assert g.value(d.identifier, RDF.type) == DCAT.Dataset assert isinstance(d.identifier, URIRef) uri = url_for('datasets.show_redirect', dataset=dataset.id, _external=True) assert str(d.identifier) == uri assert d.value(DCT.identifier) == Literal(dataset.id) assert d.value(DCT.title) == Literal(dataset.title) assert d.value(SKOS.altLabel) == Literal(dataset.acronym) assert d.value(DCT.description) == Literal(dataset.description) assert d.value(DCT.issued) == Literal(dataset.created_at) assert d.value(DCT.modified) == Literal(dataset.last_modified) assert d.value(DCT.accrualPeriodicity).identifier == FREQ.daily expected_tags = set(Literal(t) for t in dataset.tags) assert set(d.objects(DCAT.keyword)) == expected_tags assert len(list(d.objects(DCAT.distribution))) == len(resources)
def process(self, item): '''Generate a random dataset from a fake identifier''' # Get or create a harvested dataset with this identifier. # Harvest metadata are already filled on creation. dataset = self.get_dataset(item.remote_id) # Here you comes your implementation. You should : # - fetch the remote dataset (if necessary) # - validate the fetched payload # - map its content to the dataset fields # - store extra significant data in the `extra` attribute # - map resources data dataset.title = faker.sentence() dataset.description = faker.text() dataset.tags = list(set(faker.words(nb=faker.pyint()))) # Resources for i in range(faker.pyint()): dataset.resources.append( Resource(title=faker.sentence(), description=faker.text(), url=faker.url(), filetype='remote', mime=faker.mime_type(category='text'), format=faker.file_extension(category='text'), filesize=faker.pyint())) return dataset
def test_all_dataset_fields(self): resources = ResourceFactory.build_batch(3) dataset = DatasetFactory(tags=faker.words(nb=3), resources=resources, frequency='daily') d = dataset_to_rdf(dataset) g = d.graph self.assertIsInstance(d, RdfResource) self.assertEqual(len(list(g.subjects(RDF.type, DCAT.Dataset))), 1) self.assertEqual(g.value(d.identifier, RDF.type), DCAT.Dataset) self.assertIsInstance(d.identifier, URIRef) uri = url_for('datasets.show_redirect', dataset=dataset.id, _external=True) self.assertEqual(str(d.identifier), uri) self.assertEqual(d.value(DCT.identifier), Literal(dataset.id)) self.assertEqual(d.value(DCT.title), Literal(dataset.title)) self.assertEqual(d.value(DCT.description), Literal(dataset.description)) self.assertEqual(d.value(DCT.issued), Literal(dataset.created_at)) self.assertEqual(d.value(DCT.modified), Literal(dataset.last_modified)) self.assertEqual(d.value(DCT.accrualPeriodicity).identifier, FREQ.daily) expected_tags = set(Literal(t) for t in dataset.tags) self.assertEqual(set(d.objects(DCAT.keyword)), expected_tags) self.assertEqual(len(list(d.objects(DCAT.distribution))), len(resources))
def test_theme_and_tags(self): node = BNode() g = Graph() tags = faker.words(nb=3) themes = faker.words(nb=3) g.add((node, RDF.type, DCAT.Dataset)) g.add((node, DCT.title, Literal(faker.sentence()))) for tag in tags: g.add((node, DCAT.keyword, Literal(tag))) for theme in themes: g.add((node, DCAT.theme, Literal(theme))) dataset = dataset_from_rdf(g) dataset.validate() assert isinstance(dataset, Dataset) assert set(dataset.tags) == set(tags + themes)
def test_all_fields(self): uri = 'https://test.org/dataset' node = URIRef(uri) g = Graph() id = faker.uuid4() title = faker.sentence() acronym = faker.word() description = faker.paragraph() tags = faker.words(nb=3) start = faker.past_date(start_date='-30d') end = faker.future_date(end_date='+30d') g.set((node, RDF.type, DCAT.Dataset)) g.set((node, DCT.identifier, Literal(id))) g.set((node, DCT.title, Literal(title))) g.set((node, SKOS.altLabel, Literal(acronym))) g.set((node, DCT.description, Literal(description))) g.set((node, DCT.accrualPeriodicity, FREQ.daily)) pot = BNode() g.add((node, DCT.temporal, pot)) g.set((pot, RDF.type, DCT.PeriodOfTime)) g.set((pot, SCHEMA.startDate, Literal(start))) g.set((pot, SCHEMA.endDate, Literal(end))) for tag in tags: g.add((node, DCAT.keyword, Literal(tag))) dataset = dataset_from_rdf(g) dataset.validate() assert isinstance(dataset, Dataset) assert dataset.title == title assert dataset.acronym == acronym assert dataset.description == description assert dataset.frequency == 'daily' assert set(dataset.tags) == set(tags) assert isinstance(dataset.temporal_coverage, db.DateRange) assert dataset.temporal_coverage.start == start assert dataset.temporal_coverage.end == end extras = dataset.extras assert 'dct:identifier' in extras assert extras['dct:identifier'] == id assert 'uri' in extras assert extras['uri'] == uri
def test_all_fields(self): uri = 'https://test.org/dataset' node = URIRef(uri) g = Graph() id = faker.uuid4() title = faker.sentence() acronym = faker.word() description = faker.paragraph() tags = faker.words(nb=3) start = faker.past_date(start_date='-30d') end = faker.future_date(end_date='+30d') g.set((node, RDF.type, DCAT.Dataset)) g.set((node, DCT.identifier, Literal(id))) g.set((node, DCT.title, Literal(title))) g.set((node, SKOS.altLabel, Literal(acronym))) g.set((node, DCT.description, Literal(description))) g.set((node, DCT.accrualPeriodicity, FREQ.daily)) pot = BNode() g.add((node, DCT.temporal, pot)) g.set((pot, RDF.type, DCT.PeriodOfTime)) g.set((pot, SCHEMA.startDate, Literal(start))) g.set((pot, SCHEMA.endDate, Literal(end))) for tag in tags: g.add((node, DCAT.keyword, Literal(tag))) dataset = dataset_from_rdf(g) dataset.validate() self.assertIsInstance(dataset, Dataset) self.assertEqual(dataset.title, title) self.assertEqual(dataset.acronym, acronym) self.assertEqual(dataset.description, description) self.assertEqual(dataset.frequency, 'daily') self.assertEqual(set(dataset.tags), set(tags)) self.assertIsInstance(dataset.temporal_coverage, db.DateRange) self.assertEqual(dataset.temporal_coverage.start, start) self.assertEqual(dataset.temporal_coverage.end, end) extras = dataset.extras self.assertIn('dct:identifier', extras) self.assertEqual(extras['dct:identifier'], id) self.assertIn('uri', extras) self.assertEqual(extras['uri'], uri)