def test_archive_load_files_log_based_replication(self, os_remove_mock, dbSync_mock): self.config['tap_id'] = 'test_tap_id' self.config['archive_load_files'] = True with open( f'{os.path.dirname(__file__)}/resources/logical-streams.json', 'r') as f: lines = f.readlines() instance = dbSync_mock.return_value instance.create_schema_if_not_exists.return_value = None instance.sync_table.return_value = None instance.put_to_stage.return_value = 'some-s3-folder/some-name_date_batch_hash.csg.gz' target_snowflake.persist_lines(self.config, lines) copy_to_archive_args = instance.copy_to_archive.call_args[0] self.assertEqual(copy_to_archive_args[0], 'some-s3-folder/some-name_date_batch_hash.csg.gz') self.assertEqual( copy_to_archive_args[1], 'test_tap_id/logical1_table2/some-name_date_batch_hash.csg.gz') self.assertDictEqual( copy_to_archive_args[2], { 'tap': 'test_tap_id', 'schema': 'logical1', 'table': 'logical1_table2', 'archived-by': 'pipelinewise_target_snowflake' })
def test_archive_load_files_incremental_replication( self, os_remove_mock, dbSync_mock): self.config['tap_id'] = 'test_tap_id' self.config['archive_load_files'] = True self.config['s3_bucket'] = 'dummy_bucket' with open( f'{os.path.dirname(__file__)}/resources/messages-simple-table.json', 'r') as f: lines = f.readlines() instance = dbSync_mock.return_value instance.create_schema_if_not_exists.return_value = None instance.sync_table.return_value = None instance.put_to_stage.return_value = 'some-s3-folder/some-name_date_batch_hash.csg.gz' target_snowflake.persist_lines(self.config, lines) copy_to_archive_args = instance.copy_to_archive.call_args[0] self.assertEqual(copy_to_archive_args[0], 'some-s3-folder/some-name_date_batch_hash.csg.gz') self.assertEqual( copy_to_archive_args[1], 'test_tap_id/test_simple_table/some-name_date_batch_hash.csg.gz') self.assertDictEqual( copy_to_archive_args[2], { 'tap': 'test_tap_id', 'schema': 'tap_mysql_test', 'table': 'test_simple_table', 'archived-by': 'pipelinewise_target_snowflake', 'incremental-key': 'id', 'incremental-key-min': '1', 'incremental-key-max': '5' })
def test_persist_lines_with_only_state_messages(self, dbSync_mock, flush_streams_mock): """ Given only state messages, target should emit the last one """ self.config['batch_size_rows'] = 5 with open( f'{os.path.dirname(__file__)}/resources/streams_only_state.json', 'r') as f: lines = f.readlines() instance = dbSync_mock.return_value instance.create_schema_if_not_exists.return_value = None instance.sync_table.return_value = None # catch stdout buf = io.StringIO() with redirect_stdout(buf): target_snowflake.persist_lines(self.config, lines) flush_streams_mock.assert_not_called() self.assertEqual( buf.getvalue().strip(), '{"bookmarks": {"tap_mysql_test-test_simple_table": {"replication_key": "id", ' '"replication_key_value": 100, "version": 1}}}')
def test_persist_40_records_with_batch_wait_limit(self, dbSync_mock, flush_streams_mock, dateTime_mock): start_time = datetime(2021, 4, 6, 0, 0, 0) increment = 11 counter = itertools.count() # Move time forward by {{increment}} seconds every time utcnow() is called dateTime_mock.utcnow.side_effect = lambda: start_time + timedelta( seconds=increment * next(counter)) self.config['batch_size_rows'] = 100 self.config['batch_wait_limit_seconds'] = 10 self.config['flush_all_streams'] = True # Expecting 40 records with open( f'{os.path.dirname(__file__)}/resources/logical-streams.json', 'r') as f: lines = f.readlines() instance = dbSync_mock.return_value instance.create_schema_if_not_exists.return_value = None instance.sync_table.return_value = None flush_streams_mock.return_value = '{"currently_syncing": null}' target_snowflake.persist_lines(self.config, lines) # Expecting flush after every records + 1 at the end assert flush_streams_mock.call_count == 41
def test_loading_unicode_characters(self): """Loading unicode encoded characters""" tap_lines = test_utils.get_test_tap_lines( 'messages-with-unicode-characters.json') # Load with default settings target_snowflake.persist_lines(self.config, tap_lines) # Get loaded rows from tables snowflake = DbSync(self.config) target_schema = self.config.get('schema', '') table_unicode = snowflake.query( "SELECT * FROM {}.test_table_unicode".format(target_schema)) self.assertEqual(table_unicode, [{ 'C_INT': 1, 'C_PK': 1, 'C_VARCHAR': 'Hello world, Καλημέρα κόσμε, コンニチハ' }, { 'C_INT': 2, 'C_PK': 2, 'C_VARCHAR': 'Chinese: 和毛泽东 <<重上井冈山>>. 严永欣, 一九八八年.' }, { 'C_INT': 3, 'C_PK': 3, 'C_VARCHAR': 'Russian: Зарегистрируйтесь сейчас на Десятую Международную Конференцию по' }, { 'C_INT': 4, 'C_PK': 4, 'C_VARCHAR': 'Thai: แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช' }, { 'C_INT': 5, 'C_PK': 5, 'C_VARCHAR': 'Arabic: لقد لعبت أنت وأصدقاؤك لمدة وحصلتم علي من إجمالي النقاط' }, { 'C_INT': 6, 'C_PK': 6, 'C_VARCHAR': 'Special Characters: [",\'!@£$%^&*()]' }])
def test_loading_tables_with_no_encryption(self): """Loading multiple tables from the same input tap with various columns types""" tap_lines = test_utils.get_test_tap_lines( 'messages-with-three-streams.json') # Turning off client-side encryption and load self.config['client_side_encryption_master_key'] = '' target_snowflake.persist_lines(self.config, tap_lines) self.assert_three_streams_are_into_snowflake()
def test_loading_tables_with_client_side_encryption_and_wrong_master_key( self): """Loading multiple tables from the same input tap with various columns types""" tap_lines = test_utils.get_test_tap_lines( 'messages-with-three-streams.json') # Turning on client-side encryption and load but using a well formatted but wrong master key self.config[ 'client_side_encryption_master_key'] = "Wr0n6m45t3rKeY0123456789a0123456789a0123456=" with assert_raises(snowflake.connector.errors.ProgrammingError): target_snowflake.persist_lines(self.config, tap_lines)
def test_loading_tables_with_client_side_encryption(self): """Loading multiple tables from the same input tap with various columns types""" tap_lines = test_utils.get_test_tap_lines( 'messages-with-three-streams.json') # Turning on client-side encryption and load self.config['client_side_encryption_master_key'] = os.environ.get( 'CLIENT_SIDE_ENCRYPTION_MASTER_KEY') target_snowflake.persist_lines(self.config, tap_lines) self.assert_three_streams_are_into_snowflake()
def test_loading_with_multiple_schema(self): """Loading table with multiple SCHEMA messages""" tap_lines = test_utils.get_test_tap_lines( 'messages-with-multi-schemas.json') # Load with default settings target_snowflake.persist_lines(self.config, tap_lines) # Check if data loaded correctly self.assert_three_streams_are_into_snowflake( should_metadata_columns_exist=False, should_hard_deleted_rows=False)
def test_loading_tables_with_hard_delete(self): """Loading multiple tables from the same input tap with deleted rows""" tap_lines = test_utils.get_test_tap_lines( 'messages-with-three-streams.json') # Turning on hard delete mode self.config['hard_delete'] = True target_snowflake.persist_lines(self.config, tap_lines) # Check if data loaded correctly and metadata columns exist self.assert_three_streams_are_into_snowflake( should_metadata_columns_exist=True, should_hard_deleted_rows=True)
def test_loading_tables_with_metadata_columns(self): """Loading multiple tables from the same input tap with various columns types""" tap_lines = test_utils.get_test_tap_lines( 'messages-with-three-streams.json') # Turning on adding metadata columns self.config['add_metadata_columns'] = True target_snowflake.persist_lines(self.config, tap_lines) # Check if data loaded correctly and metadata columns exist self.assert_three_streams_are_into_snowflake( should_metadata_columns_exist=True)
def persist_lines_with_cache(self, lines): """Enables table caching option and loads singer messages into snowflake. Table caching mechanism is creating and maintaining an extra table in snowflake about the table structures. It's very similar to the INFORMATION_SCHEMA.COLUMNS system views but querying INFORMATION_SCHEMA is slow especially when lot of taps running in parallel. Selecting from a real table instead of INFORMATION_SCHEMA and keeping it in memory while the target-snowflake is running results better load performance. """ information_schema_cache = target_snowflake.load_information_schema_cache(self.config) target_snowflake.persist_lines(self.config, lines, information_schema_cache)
def test_persist_lines_with_40_records_and_batch_size_of_20_expect_flushing_once(self, dbSync_mock, flush_streams_mock, temp_file_mock): self.config['batch_size_rows'] = 20 self.config['flush_all_streams'] = True with open(f'{os.path.dirname(__file__)}/resources/logical-streams.json', 'r') as f: lines = f.readlines() instance = dbSync_mock.return_value instance.create_schema_if_not_exists.return_value = None instance.sync_table.return_value = None flush_streams_mock.return_value = '{"currently_syncing": null}' target_snowflake.persist_lines(self.config, lines) flush_streams_mock.assert_called_once()
def test_persist_lines_with_same_schema_expect_flushing_once(self, dbSync_mock, flush_streams_mock): self.config['batch_size_rows'] = 20 with open(f'{os.path.dirname(__file__)}/resources/same-schemas-multiple-times.json', 'r') as f: lines = f.readlines() instance = dbSync_mock.return_value instance.create_schema_if_not_exists.return_value = None instance.sync_table.return_value = None flush_streams_mock.return_value = '{"currently_syncing": null}' target_snowflake.persist_lines(self.config, lines) self.assertEqual(1, flush_streams_mock.call_count)
def test_verify_snowpipe_usage(self, dbSync_mock, flush_streams_mock): with open( f'{os.path.dirname(__file__)}/resources/same-schemas-multiple-times.json', 'r') as f: lines = f.readlines() instance = dbSync_mock.return_value instance.create_schema_if_not_exists.return_value = None instance.sync_table.return_value = None flush_streams_mock.return_value = '{"currently_syncing": null}' target_snowflake.persist_lines(self.config, lines) flush_streams_mock.assert_called_once() assert target_snowflake._verify_snowpipe_usage( ) == 'dict with all key values=1'
def test_message_order(self): """RECORD message without a previously received SCHEMA message should raise an exception""" tap_lines = test_utils.get_test_tap_lines('invalid-message-order.json') with assert_raises(Exception): target_snowflake.persist_lines(self.config, tap_lines)
def test_invalid_json(self): """Receiving invalid JSONs should raise an exception""" tap_lines = test_utils.get_test_tap_lines('invalid-json.json') with assert_raises(json.decoder.JSONDecodeError): target_snowflake.persist_lines(self.config, tap_lines)