def test_parallelism(self, query_patch): query_patch.return_value = [{'type': 'CSV'}] minimal_config = { 'account': "dummy-value", 'dbname': "dummy-value", 'user': "******", 'password': "******", 'warehouse': "dummy-value", 'default_target_schema': "dummy-value", 'file_format': "dummy-value" } # Using external stages should allow parallelism external_stage_with_parallel = { 's3_bucket': 'dummy-bucket', 'stage': 'dummy_schema.dummy_stage', 'parallelism': 5 } self.assertEqual( db_sync.DbSync({ **minimal_config, **external_stage_with_parallel }).connection_config['parallelism'], 5) # Using snowflake table stages should enforce single thread parallelism table_stage_with_parallel = {'parallelism': 5} self.assertEqual( db_sync.DbSync({ **minimal_config, **table_stage_with_parallel }).connection_config['parallelism'], 1)
def test_sync_table_with_stream_that_changes_to_have_no_pk( self, query_patch): minimal_config = { 'account': "dummy-account", 'dbname': "dummy-db", 'user': "******", 'password': "******", 'warehouse': "dummy-wh", 'default_target_schema': "dummy-schema", 'file_format': "dummy-file-format" } stream_schema_message = { "stream": "public-table1", "schema": { "properties": { "id": { "type": ["integer"] }, "c_str": { "type": ["null", "string"] } } }, "key_properties": [] } table_cache = [{ 'SCHEMA_NAME': 'DUMMY-SCHEMA', 'TABLE_NAME': 'TABLE1', 'COLUMN_NAME': 'ID', 'DATA_TYPE': 'NUMBER' }, { 'SCHEMA_NAME': 'DUMMY-SCHEMA', 'TABLE_NAME': 'TABLE1', 'COLUMN_NAME': 'C_STR', 'DATA_TYPE': 'TEXT' }] query_patch.side_effect = [[{ 'type': 'CSV' }], [{ 'column_name': 'ID' }], None] dbsync = db_sync.DbSync(minimal_config, stream_schema_message, table_cache) dbsync.sync_table() query_patch.assert_has_calls([ call('SHOW FILE FORMATS LIKE \'dummy-file-format\''), call('show primary keys in table dummy-db.dummy-schema."TABLE1";'), call([ 'alter table dummy-schema."TABLE1" drop primary key;', 'alter table dummy-schema."TABLE1" alter column "ID" drop not null;' ]) ])
def test_copy_to_archive(self, query_patch, copy_object_patch): query_patch.return_value = [{'type': 'CSV'}] minimal_config = { 'account': "dummy-value", 'dbname': "dummy-value", 'user': "******", 'password': "******", 'warehouse': "dummy-value", 'default_target_schema': "dummy-value", 'file_format': "dummy-value", 's3_bucket': 'dummy-bucket', 'stage': 'dummy_schema.dummy_stage' } # Assert default values (same bucket, 'archive' as the archive prefix) s3_config = {} dbsync = db_sync.DbSync({**minimal_config, **s3_config}) dbsync.copy_to_archive('source/file', 'tap/schema/file', {'meta': "data"}) self.assertEqual(copy_object_patch.call_args[0][0], 'dummy-bucket/source/file') self.assertEqual(copy_object_patch.call_args[0][1], 'dummy-bucket') self.assertEqual(copy_object_patch.call_args[0][2], 'archive/tap/schema/file') # Assert custom archive bucket and prefix s3_config = { 'archive_load_files_s3_bucket': "custom-bucket", 'archive_load_files_s3_prefix': "custom-prefix" } dbsync = db_sync.DbSync({**minimal_config, **s3_config}) dbsync.copy_to_archive('source/file', 'tap/schema/file', {'meta': "data"}) self.assertEqual(copy_object_patch.call_args[0][0], 'dummy-bucket/source/file') self.assertEqual(copy_object_patch.call_args[0][1], 'custom-bucket') self.assertEqual(copy_object_patch.call_args[0][2], 'custom-prefix/tap/schema/file')
def test_snowpipe_detail_generation(self): with open( f'{os.path.dirname(__file__)}/resources/same-schemas-multiple-times.json', 'r') as f: lines = f.readlines() self.config = test_utils.get_test_config() DbSync_obj = db_sync.DbSync(self.config, json.loads(lines[0])) schema_table_name = DbSync_obj.table_name( 'tap_mysql_test-test_table_one', False) pipe_name = DbSync_obj._generate_pipe_name(self.config['dbname'], schema_table_name) stripped_db_name = self.config['dbname'].replace('"', '') stripped_table_name = schema_table_name.replace('"', '') expected_pipe_name = f"{stripped_db_name}.{stripped_table_name}_s3_pipe" self.assertEqual(pipe_name, expected_pipe_name)
def test_generate_s3_key_prefix(self): with open( f'{os.path.dirname(__file__)}/resources/same-schemas-multiple-times.json', 'r') as f: lines = f.readlines() self.config = test_utils.get_test_config() DbSync_obj = db_sync.DbSync(self.config, json.loads(lines[0])) expected_string = f"{self.config['s3_key_prefix'].replace('/','')}/{self.config['default_target_schema']}__test_table_one/" s3_key_with_snowpipe = DbSync_obj._generate_s3_key_prefix( 'tap_mysql_test-test_table_one', True) self.assertEqual(s3_key_with_snowpipe, expected_string) s3_key_without_snowpipe = DbSync_obj._generate_s3_key_prefix( 'tap_mysql_test-test_table_one', False) self.assertEqual(s3_key_without_snowpipe, f"{self.config['s3_key_prefix'].replace('/','')}/")
def test_copy_failure_message(self, load_file_copy_patch, query_patch): LOGGER_NAME = "target_snowflake" query_patch.return_value = [{'type': 'CSV'}] minimal_config = { 'account': "dummy_account", 'dbname': "dummy_dbname", 'user': "******", 'password': "******", 'warehouse': "dummy_warehouse", 'default_target_schema': "dummy_default_target_schema", 'file_format': "dummy_file_format", } stream_schema_message = { "stream": "dummy_stream", "schema": { "properties": { "id": { "type": ["integer"] }, "c_str": { "type": ["null", "string"] } } }, "key_properties": [] } # Single primary key string dbsync = db_sync.DbSync(minimal_config, stream_schema_message) load_file_copy_patch.side_effect = Exception() expected_msg = ( f'ERROR:{LOGGER_NAME}:Error while executing COPY query ' f'for table "{minimal_config["default_target_schema"]}."{stream_schema_message["stream"].upper()}"" ' f'in stream "{stream_schema_message["stream"]}"') with self.assertRaises(Exception), self.assertLogs( logger=LOGGER_NAME, level="ERROR") as captured_logs: dbsync.load_file(s3_key="dummy-key", count=256, size_bytes=256) self.assertIn(expected_msg, captured_logs.output)
def test_sync_table_with_new_pk_in_stream(self, query_patch): minimal_config = { 'account': "dummy-account", 'dbname': "dummy-db", 'user': "******", 'password': "******", 'warehouse': "dummy-wh", 'default_target_schema': "dummy-schema", 'file_format': "dummy-file-format" } stream_schema_message = { "stream": "public-table1", "schema": { "properties": { "id": { "type": ["integer"] }, "c_str": { "type": ["null", "string"] }, "name": { "type": ["string"] }, } }, "key_properties": ["id", "name"] } table_cache = [{ 'SCHEMA_NAME': 'DUMMY-SCHEMA', 'TABLE_NAME': 'TABLE1', 'COLUMN_NAME': 'ID', 'DATA_TYPE': 'NUMBER' }, { 'SCHEMA_NAME': 'DUMMY-SCHEMA', 'TABLE_NAME': 'TABLE1', 'COLUMN_NAME': 'C_STR', 'DATA_TYPE': 'TEXT' }, { 'SCHEMA_NAME': 'DUMMY-SCHEMA', 'TABLE_NAME': 'TABLE1', 'COLUMN_NAME': 'NAME', 'DATA_TYPE': 'TEXT' }] query_patch.side_effect = [[{ 'type': 'CSV' }], [{ 'column_name': 'ID' }], None] dbsync = db_sync.DbSync(minimal_config, stream_schema_message, table_cache) dbsync.sync_table() # due to usage of sets in the code, order of columns in queries in not guaranteed # so have to break assertions to account for this. calls = query_patch.call_args_list self.assertEqual(3, len(calls)) self.assertEqual('SHOW FILE FORMATS LIKE \'dummy-file-format\'', calls[0][0][0]) self.assertEqual( 'show primary keys in table dummy-db.dummy-schema."TABLE1";', calls[1][0][0]) self.assertEqual('alter table dummy-schema."TABLE1" drop primary key;', calls[2][0][0][0]) self.assertIn( calls[2][0][0][1], { 'alter table dummy-schema."TABLE1" add primary key("ID", "NAME");', 'alter table dummy-schema."TABLE1" add primary key("NAME", "ID");' }) self.assertListEqual(sorted(calls[2][0][0][2:]), [ 'alter table dummy-schema."TABLE1" alter column "ID" drop not null;', 'alter table dummy-schema."TABLE1" alter column "NAME" drop not null;', ])
def test_record_primary_key_string(self, query_patch): query_patch.return_value = [{'type': 'CSV'}] minimal_config = { 'account': "dummy-value", 'dbname': "dummy-value", 'user': "******", 'password': "******", 'warehouse': "dummy-value", 'default_target_schema': "dummy-value", 'file_format': "dummy-value" } stream_schema_message = { "stream": "public-table1", "schema": { "properties": { "id": { "type": ["integer"] }, "c_str": { "type": ["null", "string"] }, "c_bool": { "type": ["boolean"] } } }, "key_properties": ["id"] } # Single primary key string dbsync = db_sync.DbSync(minimal_config, stream_schema_message) self.assertEqual(dbsync.record_primary_key_string({'id': 123}), '123') # Composite primary key string stream_schema_message['key_properties'] = ['id', 'c_str'] dbsync = db_sync.DbSync(minimal_config, stream_schema_message) self.assertEqual( dbsync.record_primary_key_string({ 'id': 123, 'c_str': 'xyz' }), '123,xyz') # Missing field as PK stream_schema_message['key_properties'] = ['invalid_col'] dbsync = db_sync.DbSync(minimal_config, stream_schema_message) with self.assertRaisesRegex( PrimaryKeyNotFoundException, r"Primary key 'invalid_col' does not exist in record or is null\. Available " r"fields: \['id', 'c_str'\]"): dbsync.record_primary_key_string({'id': 123, 'c_str': 'xyz'}) # Null PK field stream_schema_message['key_properties'] = ['id'] dbsync = db_sync.DbSync(minimal_config, stream_schema_message) with self.assertRaisesRegex( PrimaryKeyNotFoundException, r"Primary key 'id' does not exist in record or is null\. Available " r"fields: \['id', 'c_str'\]"): dbsync.record_primary_key_string({'id': None, 'c_str': 'xyz'}) # falsy PK field accepted stream_schema_message['key_properties'] = ['id'] dbsync = db_sync.DbSync(minimal_config, stream_schema_message) self.assertEqual( dbsync.record_primary_key_string({ 'id': 0, 'c_str': 'xyz' }), '0') # falsy PK field accepted stream_schema_message['key_properties'] = ['id', 'c_bool'] dbsync = db_sync.DbSync(minimal_config, stream_schema_message) self.assertEqual( dbsync.record_primary_key_string({ 'id': 1, 'c_bool': False, 'c_str': 'xyz' }), '1,False')