def setUp(self): self.schema = JsonObject(TABLE_NAME, Property('id', 'VARCHAR(36)')) self.database = Database(psycopg2.connect(CONNECTION)) self.database.open() table = TargetTable(self.schema, self.database) table.create() self.database.commit()
def __init__(self, metadata, source, schema, bucket, db_connection): self.metadata = metadata self.source = source self.schema = schema self.bucket = bucket if isinstance(db_connection, Database): self.database = db_connection else: self.database = Database(db_connection) self.file_name = '{0}_manifest.json'.format(schema.table) self.journal_file_name = '{0}_journal.db'.format(schema.table)
def test_stage_update_when_column_name_defined(self): with patch.object(Database, 'execute') as execute: schema = JsonObject( TABLE_NAME, Property('property1', 'VARCHAR(10)', 'someColumn'), Property('property2', 'TIMESTAMP', 'anotherColumn')) table = TargetTable(schema, Database(Mock())) table.stage_update() expected_sql = 'CREATE TABLE {0}_update (someColumn VARCHAR(10), ' \ 'anotherColumn TIMESTAMP)'.format(TABLE_NAME) execute.assert_called_once_with(expected_sql)
def test_create_when_column_name_not_defined_for_nested_property(self): with patch.object(Database, 'execute') as execute: schema = JsonObject( TABLE_NAME, Property('property1', 'VARCHAR(10)'), Property('property2', Property('timestamp', 'TIMESTAMP'))) table = TargetTable(schema, Database(Mock())) table.create() expected_sql = 'CREATE TABLE {0} (property1 VARCHAR(10), ' \ 'property2_timestamp TIMESTAMP)'.format(TABLE_NAME) execute.assert_called_once_with(expected_sql)
def __init__(self, aws_access_key_id, aws_secret_access_key, bucket, db_connection): super(S3CopyPipeline, self).__init__() self.aws_access_key_id = aws_access_key_id self.aws_secret_access_key = aws_secret_access_key if isinstance(bucket, basestring): self.bucket = Bucket(aws_access_key_id, aws_secret_access_key, bucket) else: self.bucket = bucket if isinstance(db_connection, Database): self.database = db_connection else: self.database = Database(db_connection)
def sql_manifest_copy(self, metadata, source, schema, max_error_count=1): sql_manifest_copy_step = ManifestCopyFromS3JsonStep(metadata=metadata, source=source, schema=schema, aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key, bucket=self.bucket, table=TargetTable( schema, self.database)) sql_manifest = SqlManifest(metadata, source, schema, self.bucket, self.database) sql_manifest.database = Database( sqlite3.connect(sql_manifest.journal_file_name)) sql_manifest_copy_step.manifest = sql_manifest self.__add_copy_step(sql_manifest_copy_step, max_error_count) return self
class SqlStepShould(unittest.TestCase): def setUp(self): self.schema = JsonObject(TABLE_NAME, Property('id', 'VARCHAR(36)')) self.database = Database(psycopg2.connect(CONNECTION)) self.database.open() table = TargetTable(self.schema, self.database) table.create() self.database.commit() def tearDown(self): self.database.open() self.database.execute('DROP TABLE IF EXISTS %s', (AsIs(TABLE_NAME), )) self.database.commit() def test_run(self): SqlStep(self.database, ("INSERT INTO %s VALUES('%s')", TABLE_NAME, 1), ("INSERT INTO %s VALUES('%s')", TABLE_NAME, 2), ("INSERT INTO {0} VALUES ('3')".format(TABLE_NAME))).run() self.database.open() self.database.execute("SELECT COUNT(*) FROM %s", (AsIs(TABLE_NAME), )) count = int(self.database.cursor.fetchone()[0]) self.assertEqual(3, count) self.database.execute("SELECT * FROM %s", (AsIs(TABLE_NAME), )) values = list(self.database.cursor.fetchall()) self.assertEqual([('1', ), ('2', ), ('3', )], values) def test_execute(self): self.database.open() sql = SqlStep(self.database) sql.execute(("INSERT INTO %s VALUES('%s')", TABLE_NAME, 1)) sql.execute(("INSERT INTO %s VALUES('%s')", TABLE_NAME, 2)) sql.execute(("INSERT INTO {0} VALUES ('3')".format(TABLE_NAME))) self.database.commit() self.database.open() self.database.execute("SELECT COUNT(*) FROM %s", (AsIs(TABLE_NAME), )) count = int(self.database.cursor.fetchone()[0]) self.assertEqual(3, count) self.database.execute("SELECT * FROM %s", (AsIs(TABLE_NAME), )) values = list(self.database.cursor.fetchall()) self.assertEqual([('1', ), ('2', ), ('3', )], values)
def test_query_table_with_psycopg2_connection(self): db = Database(psycopg2.connect(CONNECTION)) db.open() db.execute('CREATE TABLE SomeTable (id VARCHAR(255))') db.execute('INSERT INTO SomeTable VALUES (1)') db.execute('INSERT INTO SomeTable VALUES (2)') db.execute('SELECT * FROM SomeTable') rows = db.fetchall() self.assertEqual(True, isinstance(rows, types.GeneratorType)) self.assertEqual([('1',), ('2',)], list(rows)) db.rollback()
def test_query_table_with_sqlite3_connection(self): database_file = 'sometable.db' db = Database(sqlite3.connect(database_file)) db.open() db.execute('CREATE TABLE IF NOT EXISTS SomeTable (id VARCHAR(255))') db.execute('INSERT INTO SomeTable VALUES (1)') db.execute('INSERT INTO SomeTable VALUES (?)', (2,)) db.execute('SELECT * FROM SomeTable') rows = db.fetchall() self.assertEqual(True, isinstance(rows, types.GeneratorType)) self.assertEqual(['1', '2'], [row[0] for row in rows]) db.rollback() db.close() os.remove(database_file)
def setUp(self): self.schema = JsonObject(TABLE_NAME, Property('id', 'VARCHAR(36)')) self.database = Database(psycopg2.connect(CONNECTION)) self.database.open()
class TargetTableShould(unittest.TestCase): def setUp(self): self.schema = JsonObject(TABLE_NAME, Property('id', 'VARCHAR(36)')) self.database = Database(psycopg2.connect(CONNECTION)) self.database.open() def tearDown(self): self.database.rollback() def assertColumns(self, table_name, schema): self.database.execute( "SELECT column_name, udt_name FROM information_schema.columns " "WHERE table_name = '%s'", (AsIs(table_name),)) columns = self.database.cursor.fetchall() self.assertEqual(len(schema.schema), len(columns)) for i, column in enumerate(columns): self.assertEqual(schema.schema[i].column_name.lower(), column[0]) self.assertEqual(True, column[1] in schema.schema[i].type.lower()) def test_not_exist(self): table = TargetTable(self.schema, self.database) self.assertEqual(False, table.exists()) def test_exist(self): table = TargetTable(self.schema, self.database) table.create() self.assertEqual(True, table.exists()) def test_create_when_column_name_not_defined(self): schema = JsonObject(TABLE_NAME, Property('property1', 'VARCHAR(10)'), Property('property2', 'TIMESTAMP')) table = TargetTable(schema, self.database) table.create() self.assertColumns(schema.table, schema) def test_create_when_column_name_defined(self): schema = JsonObject(TABLE_NAME, Property('property1', 'VARCHAR(10)', 'someColumn'), Property('property2', 'TIMESTAMP', 'anotherColumn')) table = TargetTable(schema, self.database) table.create() self.assertColumns(schema.table, schema) def test_stage_update_when_column_name_not_defined(self): schema = JsonObject(TABLE_NAME, Property('property1', 'VARCHAR(10)'), Property('property2', 'TIMESTAMP')) table = TargetTable(schema, self.database) table.stage_update() self.assertColumns(schema.update_table, schema) def test_stage_update_when_column_name_defined(self): schema = JsonObject(TABLE_NAME, Property('property1', 'VARCHAR(10)', 'someColumn'), Property('property2', 'TIMESTAMP', 'anotherColumn')) table = TargetTable(schema, self.database) table.stage_update() self.assertColumns(schema.update_table, schema) def test_promote_update(self): table = TargetTable(self.schema, self.database) table.stage_update() table.promote_update() self.assertColumns(self.schema.table, self.schema) def test_insert_update(self): table = TargetTable(self.schema, self.database) table.create() table.stage_update() self.database.execute("INSERT INTO %s VALUES('%s')", (AsIs(TABLE_NAME), AsIs(1),)) self.database.execute("INSERT INTO %s VALUES('%s')", (AsIs(self.schema.update_table), AsIs(2),)) self.database.execute("INSERT INTO %s VALUES('%s')", (AsIs(self.schema.update_table), AsIs(3),)) table.insert_update() self.database.execute( "SELECT * FROM %s", (AsIs(TABLE_NAME),)) values = list(self.database.cursor.fetchall()) self.assertEqual([('1',), ('2',), ('3',)], values) def test_drop(self): table = TargetTable(self.schema, self.database) table.create() table.drop() self.assertEqual(False, table.exists())
class SqlManifest(object): def __init__(self, metadata, source, schema, bucket, db_connection): self.metadata = metadata self.source = source self.schema = schema self.bucket = bucket if isinstance(db_connection, Database): self.database = db_connection else: self.database = Database(db_connection) self.file_name = '{0}_manifest.json'.format(schema.table) self.journal_file_name = '{0}_journal.db'.format(schema.table) @property def all_keys(self): return (k.name for k in self.bucket.list(self.source) if not k.name.endswith('/')) @property def manifest_key(self): return normalize_path('{0}/{1}'.format(self.metadata, self.file_name)) @property def journal_key(self): return normalize_path('{0}/{1}'.format(self.metadata, self.journal_file_name)) @property def manifest_url(self): return 's3://{0}{1}'.format(self.bucket.name, self.manifest_key) def journal(self): journal = self.bucket.get(self.journal_key) if journal.exists(): journal.get_contents_to_filename(self.journal_file_name) self.database.open() self.database.execute('SELECT key FROM journal') return (row[0] for row in self.database.fetchall()) else: self.database.open() self.database.execute( 'CREATE TABLE IF NOT EXISTS journal (key TEXT)') self.database.commit() self.database.close() return [] def get(self): updated_journal = self.all_keys journal = self.journal() keys = list(set(updated_journal) - set(journal)) return { 'manifest': { 'entries': ({ 'url': 's3://{0}/{1}'.format(self.bucket.name, key), 'mandatory': True } for key in keys) }, 'updated_journal': updated_journal } def save(self): entries = self.get()['manifest']['entries'] offset = 0 offsets = [] last_entry = None with open(self.file_name, 'wb') as f: f.seek(0) f.truncate() offset += self.__write(f, '{\n') offsets.append(offset) offset += self.__write(f, '"entries": [\n') offsets.append(offset) for entry in entries: line = json.dumps(entry) + ',\n' offset += self.__write(f, line) offsets.append(offset) last_entry = line f.seek(offsets[-2]) f.truncate() if last_entry is not None: line = last_entry[0:-2] + '\n' f.write(line) f.write(']}') self.bucket.get(self.manifest_key).set_contents_from_filename( self.file_name) def commit(self, saved_keys): self.database.open() self.database.execute('DELETE FROM journal') for key in saved_keys: self.database.execute('INSERT INTO journal VALUES (?)', (key, )) self.database.commit() self.database.close() self.bucket.get(self.journal_key).set_contents_from_filename( self.journal_file_name) def exists(self): return self.bucket.get(self.manifest_key).exists() def journal_exists(self): return self.bucket.get(self.journal_key).exists() @staticmethod def __write(fd, line): fd.write(line) return len(line)
class SqlStepShould(unittest.TestCase): def setUp(self): self.schema = JsonObject(TABLE_NAME, Property('id', 'VARCHAR(36)')) self.database = Database(psycopg2.connect(CONNECTION)) self.database.open() table = TargetTable(self.schema, self.database) table.create() self.database.commit() def tearDown(self): self.database.open() self.database.execute('DROP TABLE IF EXISTS %s', (AsIs(TABLE_NAME),)) self.database.commit() def test_run(self): SqlStep(self.database, ("INSERT INTO %s VALUES('%s')", TABLE_NAME, 1), ("INSERT INTO %s VALUES('%s')", TABLE_NAME, 2), ("INSERT INTO {0} VALUES ('3')".format(TABLE_NAME))).run() self.database.open() self.database.execute( "SELECT COUNT(*) FROM %s", (AsIs(TABLE_NAME),)) count = int(self.database.cursor.fetchone()[0]) self.assertEqual(3, count) self.database.execute( "SELECT * FROM %s", (AsIs(TABLE_NAME),)) values = list(self.database.cursor.fetchall()) self.assertEqual([('1',), ('2',), ('3',)], values) def test_execute(self): self.database.open() sql = SqlStep(self.database) sql.execute(("INSERT INTO %s VALUES('%s')", TABLE_NAME, 1)) sql.execute(("INSERT INTO %s VALUES('%s')", TABLE_NAME, 2)) sql.execute(("INSERT INTO {0} VALUES ('3')".format(TABLE_NAME))) self.database.commit() self.database.open() self.database.execute( "SELECT COUNT(*) FROM %s", (AsIs(TABLE_NAME),)) count = int(self.database.cursor.fetchone()[0]) self.assertEqual(3, count) self.database.execute( "SELECT * FROM %s", (AsIs(TABLE_NAME),)) values = list(self.database.cursor.fetchall()) self.assertEqual([('1',), ('2',), ('3',)], values)