def process(self, item):
        '''Generate a random dataset from a fake identifier'''
        # Get or create a harvested dataset with this identifier.
        # Harvest metadata are already filled on creation.
        dataset = self.get_dataset(item.remote_id)

        # Here you comes your implementation. You should :
        # - fetch the remote dataset (if necessary)
        # - validate the fetched payload
        # - map its content to the dataset fields
        # - store extra significant data in the `extra` attribute
        # - map resources data

        dataset.title = faker.sentence()
        dataset.description = faker.text()
        dataset.tags = list(set(faker.words(nb=faker.pyint())))

        # Resources
        for i in range(faker.pyint()):
            dataset.resources.append(
                Resource(title=faker.sentence(),
                         description=faker.text(),
                         url=faker.url(),
                         filetype='remote',
                         mime=faker.mime_type(category='text'),
                         format=faker.file_extension(category='text'),
                         filesize=faker.pyint()))

        return dataset
Exemplo n.º 2
0
def metadata_factory(url, data=None):
    response = {
        'etag': '',
        'url': url,
        'content-length': faker.pyint(),
        'content-disposition': '',
        'content-md5': faker.md5(),
        'content-location': '',
        'expires': faker.iso8601(),
        'status': 200,
        'updated': faker.iso8601(),
        'last-modified': faker.iso8601(),
        'content-encoding': 'gzip',
        'content-type': faker.mime_type()
    }
    if data:
        response.update(data)
    return json.dumps(response)
Exemplo n.º 3
0
def all_metadata():
    resource_data = {
        'name': faker.sentence(),
        'description': faker.paragraph(),
        'url': faker.unique_url(),
        'mimetype': faker.mime_type(),
        'format': faker.file_extension(),
    }
    data = {
        'name': faker.unique_string(),
        'title': faker.sentence(),
        'notes': faker.paragraph(),
        'tags': [{
            'name': faker.unique_string()
        } for _ in range(3)],
        'resources': [resource_data],
    }
    return data, {'resource_data': resource_data}
Exemplo n.º 4
0
    def test_all_resource_fields(self):
        node = BNode()
        g = Graph()

        title = faker.sentence()
        url = faker.uri()
        description = faker.paragraph()
        filesize = faker.pyint()
        issued = faker.date_time_between(start_date='-60d', end_date='-30d')
        modified = faker.past_datetime(start_date='-30d')
        mime = faker.mime_type()
        sha1 = faker.sha1()

        g.add((node, RDF.type, DCAT.Distribution))
        g.add((node, DCT.title, Literal(title)))
        g.add((node, DCT.description, Literal(description)))
        g.add((node, DCAT.downloadURL, Literal(url)))
        g.add((node, DCT.issued, Literal(issued)))
        g.add((node, DCT.modified, Literal(modified)))
        g.add((node, DCAT.bytesSize, Literal(filesize)))
        g.add((node, DCAT.mediaType, Literal(mime)))
        g.add((node, DCT.term('format'), Literal('CSV')))

        checksum = BNode()
        g.add((node, SPDX.checksum, checksum))
        g.add((checksum, RDF.type, SPDX.Checksum))
        g.add((checksum, SPDX.algorithm, SPDX.checksumAlgorithm_sha1))
        g.add((checksum, SPDX.checksumValue, Literal(sha1)))

        resource = resource_from_rdf(g)
        resource.validate()

        assert isinstance(resource, Resource)
        assert resource.title == title
        assert resource.url == url
        assert resource.description == description
        assert resource.filesize == filesize
        assert resource.mime == mime
        assert isinstance(resource.checksum, Checksum)
        assert resource.checksum.type == 'sha1'
        assert resource.checksum.value == sha1
        assert resource.published == issued
        assert resource.modified == modified
        assert resource.format == 'csv'
Exemplo n.º 5
0
def metadata_factory(url, data=None):
    """Base for a mocked Croquemort HTTP response"""
    response = {
        'etag': '',
        'checked-url': url,
        'content-length': faker.pyint(),
        'content-disposition': '',
        'content-md5': faker.md5(),
        'content-location': '',
        'expires': faker.iso8601(),
        'final-status-code': 200,
        'updated': faker.iso8601(),
        'last-modified': faker.iso8601(),
        'content-encoding': 'gzip',
        'content-type': faker.mime_type()
    }
    if data:
        response.update(data)
    return json.dumps(response)