Exemplo n.º 1
0
    def process_resource(self, resource: ResourceWrapper):
        resource_name = resource.res.name
        if resource_name not in self.converted_resources:
            return resource
        else:
            converted_resource = self.converted_resources[resource_name]
            mode = converted_resource.get('mode', 'rewrite')
            table_name = converted_resource['table-name']
            storage = Storage(self.engine, prefix=table_name)
            if mode == 'rewrite' and '' in storage.buckets:
                storage.delete('')
            schema_descriptor = resource.res.descriptor['schema']
            schema = self.normalize_schema_for_engine(self.engine.dialect.name,
                                                      schema_descriptor)
            if '' not in storage.buckets:
                logging.info('Creating DB table %s', table_name)
                try:
                    storage.create('', schema)
                except ValidationError as e:
                    logging.error('Error validating schema %r',
                                  schema_descriptor)
                    for err in e.errors:
                        logging.error('Error validating schema: %s', err)
                    raise
            else:
                storage.describe('', schema)

            update_keys = None
            if mode == 'update':
                update_keys = converted_resource.get('update_keys')
                if update_keys is None:
                    update_keys = schema_descriptor.get('primaryKey', [])
            logging.info('Writing to DB %s -> %s (mode=%s, keys=%s)',
                         resource_name, table_name, mode, update_keys)
            return map(
                self.get_output_row,
                storage.write(
                    '',
                    self.normalize_for_engine(self.engine.dialect.name,
                                              resource, schema_descriptor),
                    keyed=True,
                    as_generator=True,
                    update_keys=update_keys,
                    buffer_size=self.batch_size,
                    use_bloom_filter=self.use_bloom_filter,
                ))
Exemplo n.º 2
0
    def handle_resource(self, resource, spec, parameters, datapackage):
        resource_name = spec['name']
        if resource_name not in self.converted_resources:
            return resource
        else:
            converted_resource = self.converted_resources[resource_name]
            mode = converted_resource.get('mode', 'rewrite')
            table_name = converted_resource['table-name']
            storage = Storage(self.engine, prefix=table_name)
            if mode == 'rewrite' and '' in storage.buckets:
                storage.delete('')
            schema = self.normalise_schema_for_engine(self.engine.dialect.name,
                                                      spec['schema'])
            if '' not in storage.buckets:
                logging.info('Creating DB table %s', table_name)
                try:
                    storage.create('', schema)
                except ValidationError as e:
                    logging.error('Error validating schema %r', spec['schema'])
                    for err in e.errors:
                        logging.error('Error validating schema: %s', err)
                    raise
            else:
                storage.describe('', schema)

            update_keys = None
            if mode == 'update':
                update_keys = converted_resource.get('update_keys')
                if update_keys is None:
                    update_keys = spec['schema'].get('primaryKey', [])
            logging.info('Writing to DB %s -> %s (mode=%s, keys=%s)',
                         resource_name, table_name, mode, update_keys)
            return map(
                self.get_output_row,
                storage.write('',
                              self.normalise_for_engine(
                                  self.engine.dialect.name, resource, spec),
                              keyed=True,
                              as_generator=True,
                              update_keys=update_keys))
def test_storage_limited_databases(dialect, database_url):

    # Create storage
    engine = create_engine(database_url)
    storage = Storage(engine=engine, prefix='test_storage_')

    # Delete buckets
    storage.delete()

    # Create buckets
    storage.create(
        ['articles', 'comments'],
        [remove_fk(ARTICLES['schema']),
         remove_fk(COMMENTS['schema'])],
        indexes_fields=[[['rating'], ['name']], []])
    storage.create('comments', remove_fk(COMMENTS['schema']), force=True)
    storage.create('temporal', TEMPORAL['schema'])
    storage.create('location', LOCATION['schema'])
    storage.create('compound', COMPOUND['schema'])

    # Write data
    storage.write('articles', ARTICLES['data'])
    storage.write('comments', COMMENTS['data'])
    storage.write('temporal', TEMPORAL['data'])
    storage.write('location', LOCATION['data'])
    storage.write('compound', COMPOUND['data'])

    # Create new storage to use reflection only
    storage = Storage(engine=engine, prefix='test_storage_')

    # Create existent bucket
    with pytest.raises(tableschema.exceptions.StorageError):
        storage.create('articles', ARTICLES['schema'])

    # Assert buckets
    assert storage.buckets == [
        'articles', 'comments', 'compound', 'location', 'temporal'
    ]

    # Assert schemas
    assert storage.describe('articles') == {
        'fields': [
            {
                'name': 'id',
                'type': 'integer',
                'constraints': {
                    'required': True
                }
            },
            {
                'name': 'parent',
                'type': 'integer'
            },
            {
                'name': 'name',
                'type': 'string'
            },
            {
                'name': 'current',
                'type': 'boolean' if dialect == 'sqlite' else 'integer'
            },
            {
                'name': 'rating',
                'type': 'number'
            },
        ],
        'primaryKey':
        'id',
        # foreignKeys not supported
    }
    assert storage.describe('comments') == {
        'fields': [
            {
                'name': 'entry_id',
                'type': 'integer',
                'constraints': {
                    'required': True
                }
            },
            {
                'name': 'comment',
                'type': 'string'
            },
            {
                'name': 'note',
                'type': 'string'
            },  # type downgrade
        ],
        'primaryKey':
        'entry_id',
        # foreignKeys not supported
    }
    assert storage.describe('temporal') == {
        'fields': [
            {
                'name': 'date',
                'type': 'date'
            },
            {
                'name': 'date_year',
                'type': 'date'
            },  # format removal
            {
                'name': 'datetime',
                'type': 'datetime'
            },
            {
                'name': 'duration',
                'type': 'string'
            },  # type fallback
            {
                'name': 'time',
                'type': 'time'
            },
            {
                'name': 'year',
                'type': 'integer'
            },  # type downgrade
            {
                'name': 'yearmonth',
                'type': 'string'
            },  # type fallback
        ],
    }
    assert storage.describe('location') == {
        'fields': [
            {
                'name': 'location',
                'type': 'string'
            },  # type fallback
            {
                'name': 'geopoint',
                'type': 'string'
            },  # type fallback
        ],
    }
    assert storage.describe('compound') == {
        'fields': [
            {
                'name': 'stats',
                'type': 'string'
            },  # type fallback
            {
                'name': 'persons',
                'type': 'string'
            },  # type fallback
        ],
    }

    # Assert data
    assert storage.read('articles') == cast(ARTICLES)['data']
    assert storage.read('comments') == cast(COMMENTS)['data']
    assert storage.read('temporal') == cast(TEMPORAL,
                                            skip=['duration',
                                                  'yearmonth'])['data']
    assert storage.read('location') == cast(LOCATION,
                                            skip=['geojson',
                                                  'geopoint'])['data']
    assert storage.read('compound') == cast(COMPOUND, skip=['array',
                                                            'object'])['data']

    # Assert data with forced schema
    storage.describe('compound', COMPOUND['schema'])
    assert storage.read('compound') == cast(COMPOUND)['data']

    # Delete non existent bucket
    with pytest.raises(tableschema.exceptions.StorageError):
        storage.delete('non_existent')

    # Delete buckets
    storage.delete()
articles_data = topen('data/articles.csv', with_headers=True).read()
comments_data = topen('data/comments.csv', with_headers=True).read()

# Engine
engine = create_engine(os.environ['POSTGRES_URL'])

# Storage
storage = Storage(engine=engine, prefix='prefix_')

# Delete tables
for table in reversed(storage.tables):
    storage.delete(table)

# Create tables
storage.create(['articles', 'comments'], [articles_schema, comments_schema])

# Write data to tables
storage.write('articles', articles_data)
storage.write('comments', comments_data)

# List tables
print(storage.tables)

# Describe tables
print(storage.describe('articles'))
print(storage.describe('comments'))

# Read data from tables
print(list(storage.read('articles')))
print(list(storage.read('comments')))
Exemplo n.º 5
0
            "name": "id",
            "type": "integer",
            "constraints": {
                "required": true
            }
        },
        {
            "name": "name",
            "type": "string"
        },
        {
            "name": "ssn",
            "type": "string",
            "protected": true
        }
    ]
}
"""

storage.create(['records'], [json.loads(records_schema)], encrypted_definitions=encryptedDefintion)

records_data = [
    [ 1, "John", "123456789"]
]

storage.write('records', records_data)

print(storage.describe('records'))

print(list(storage.read('records')))
Exemplo n.º 6
0
def test_storage():

    # Create storage
    engine = create_engine(os.environ['POSTGRES_URL'])
    storage = Storage(engine=engine, prefix='test_storage_')

    # Delete buckets
    storage.delete()

    # Create buckets
    storage.create(['articles', 'comments'],
                   [ARTICLES['schema'], COMMENTS['schema']],
                   indexes_fields=[[['rating'], ['name']], []])
    storage.create('comments', COMMENTS['schema'], force=True)
    storage.create('temporal', TEMPORAL['schema'])
    storage.create('location', LOCATION['schema'])
    storage.create('compound', COMPOUND['schema'])

    # Write data
    storage.write('articles', ARTICLES['data'])
    storage.write('comments', COMMENTS['data'])
    storage.write('temporal', TEMPORAL['data'])
    storage.write('location', LOCATION['data'])
    storage.write('compound', COMPOUND['data'])

    # Create new storage to use reflection only
    storage = Storage(engine=engine, prefix='test_storage_')

    # Create existent bucket
    with pytest.raises(tableschema.exceptions.StorageError):
        storage.create('articles', ARTICLES['schema'])

    # Assert buckets
    assert storage.buckets == [
        'articles', 'compound', 'location', 'temporal', 'comments'
    ]

    # Assert schemas
    assert storage.describe('articles') == ARTICLES['schema']
    assert storage.describe('comments') == {
        'fields': [
            {
                'name': 'entry_id',
                'type': 'integer',
                'constraints': {
                    'required': True
                }
            },
            {
                'name': 'comment',
                'type': 'string'
            },
            {
                'name': 'note',
                'type': 'string'
            },  # type downgrade
        ],
        'primaryKey':
        'entry_id',
        'foreignKeys': [
            {
                'fields': 'entry_id',
                'reference': {
                    'resource': 'articles',
                    'fields': 'id'
                }
            },
        ],
    }
    assert storage.describe('temporal') == {
        'fields': [
            {
                'name': 'date',
                'type': 'date'
            },
            {
                'name': 'date_year',
                'type': 'date'
            },  # format removal
            {
                'name': 'datetime',
                'type': 'datetime'
            },
            {
                'name': 'duration',
                'type': 'string'
            },  # type fallback
            {
                'name': 'time',
                'type': 'time'
            },
            {
                'name': 'year',
                'type': 'integer'
            },  # type downgrade
            {
                'name': 'yearmonth',
                'type': 'string'
            },  # type fallback
        ],
    }
    assert storage.describe('location') == {
        'fields': [
            {
                'name': 'location',
                'type': 'object'
            },  # type downgrade
            {
                'name': 'geopoint',
                'type': 'string'
            },  # type fallback
        ],
    }
    assert storage.describe('compound') == {
        'fields': [
            {
                'name': 'stats',
                'type': 'object'
            },
            {
                'name': 'persons',
                'type': 'object'
            },  # type downgrade
        ],
    }

    # Assert data
    assert storage.read('articles') == cast(ARTICLES)['data']
    assert storage.read('comments') == cast(COMMENTS)['data']
    assert storage.read('temporal') == cast(TEMPORAL,
                                            skip=['duration',
                                                  'yearmonth'])['data']
    assert storage.read('location') == cast(LOCATION,
                                            skip=['geopoint'])['data']
    assert storage.read('compound') == cast(COMPOUND)['data']

    # Assert data with forced schema
    storage.describe('compound', COMPOUND['schema'])
    assert storage.read('compound') == cast(COMPOUND)['data']

    # Delete non existent bucket
    with pytest.raises(tableschema.exceptions.StorageError):
        storage.delete('non_existent')

    # Delete buckets
    storage.delete()