def test_parallelism(self, query_patch):
        query_patch.return_value = [{'type': 'CSV'}]

        minimal_config = {
            'account': "dummy-value",
            'dbname': "dummy-value",
            'user': "******",
            'password': "******",
            'warehouse': "dummy-value",
            'default_target_schema': "dummy-value",
            'file_format': "dummy-value"
        }

        # Using external stages should allow parallelism
        external_stage_with_parallel = {
            's3_bucket': 'dummy-bucket',
            'stage': 'dummy_schema.dummy_stage',
            'parallelism': 5
        }

        self.assertEqual(
            db_sync.DbSync({
                **minimal_config,
                **external_stage_with_parallel
            }).connection_config['parallelism'], 5)

        # Using snowflake table stages should enforce single thread parallelism
        table_stage_with_parallel = {'parallelism': 5}
        self.assertEqual(
            db_sync.DbSync({
                **minimal_config,
                **table_stage_with_parallel
            }).connection_config['parallelism'], 1)
    def test_sync_table_with_stream_that_changes_to_have_no_pk(
            self, query_patch):
        minimal_config = {
            'account': "dummy-account",
            'dbname': "dummy-db",
            'user': "******",
            'password': "******",
            'warehouse': "dummy-wh",
            'default_target_schema': "dummy-schema",
            'file_format': "dummy-file-format"
        }

        stream_schema_message = {
            "stream": "public-table1",
            "schema": {
                "properties": {
                    "id": {
                        "type": ["integer"]
                    },
                    "c_str": {
                        "type": ["null", "string"]
                    }
                }
            },
            "key_properties": []
        }

        table_cache = [{
            'SCHEMA_NAME': 'DUMMY-SCHEMA',
            'TABLE_NAME': 'TABLE1',
            'COLUMN_NAME': 'ID',
            'DATA_TYPE': 'NUMBER'
        }, {
            'SCHEMA_NAME': 'DUMMY-SCHEMA',
            'TABLE_NAME': 'TABLE1',
            'COLUMN_NAME': 'C_STR',
            'DATA_TYPE': 'TEXT'
        }]
        query_patch.side_effect = [[{
            'type': 'CSV'
        }], [{
            'column_name': 'ID'
        }], None]

        dbsync = db_sync.DbSync(minimal_config, stream_schema_message,
                                table_cache)
        dbsync.sync_table()

        query_patch.assert_has_calls([
            call('SHOW FILE FORMATS LIKE \'dummy-file-format\''),
            call('show primary keys in table dummy-db.dummy-schema."TABLE1";'),
            call([
                'alter table dummy-schema."TABLE1" drop primary key;',
                'alter table dummy-schema."TABLE1" alter column "ID" drop not null;'
            ])
        ])
    def test_copy_to_archive(self, query_patch, copy_object_patch):
        query_patch.return_value = [{'type': 'CSV'}]
        minimal_config = {
            'account': "dummy-value",
            'dbname': "dummy-value",
            'user': "******",
            'password': "******",
            'warehouse': "dummy-value",
            'default_target_schema': "dummy-value",
            'file_format': "dummy-value",
            's3_bucket': 'dummy-bucket',
            'stage': 'dummy_schema.dummy_stage'
        }

        # Assert default values (same bucket, 'archive' as the archive prefix)
        s3_config = {}
        dbsync = db_sync.DbSync({**minimal_config, **s3_config})
        dbsync.copy_to_archive('source/file', 'tap/schema/file',
                               {'meta': "data"})

        self.assertEqual(copy_object_patch.call_args[0][0],
                         'dummy-bucket/source/file')
        self.assertEqual(copy_object_patch.call_args[0][1], 'dummy-bucket')
        self.assertEqual(copy_object_patch.call_args[0][2],
                         'archive/tap/schema/file')

        # Assert custom archive bucket and prefix
        s3_config = {
            'archive_load_files_s3_bucket': "custom-bucket",
            'archive_load_files_s3_prefix': "custom-prefix"
        }
        dbsync = db_sync.DbSync({**minimal_config, **s3_config})
        dbsync.copy_to_archive('source/file', 'tap/schema/file',
                               {'meta': "data"})

        self.assertEqual(copy_object_patch.call_args[0][0],
                         'dummy-bucket/source/file')
        self.assertEqual(copy_object_patch.call_args[0][1], 'custom-bucket')
        self.assertEqual(copy_object_patch.call_args[0][2],
                         'custom-prefix/tap/schema/file')
Пример #4
0
    def test_snowpipe_detail_generation(self):
        with open(
                f'{os.path.dirname(__file__)}/resources/same-schemas-multiple-times.json',
                'r') as f:
            lines = f.readlines()
        self.config = test_utils.get_test_config()

        DbSync_obj = db_sync.DbSync(self.config, json.loads(lines[0]))
        schema_table_name = DbSync_obj.table_name(
            'tap_mysql_test-test_table_one', False)
        pipe_name = DbSync_obj._generate_pipe_name(self.config['dbname'],
                                                   schema_table_name)
        stripped_db_name = self.config['dbname'].replace('"', '')
        stripped_table_name = schema_table_name.replace('"', '')
        expected_pipe_name = f"{stripped_db_name}.{stripped_table_name}_s3_pipe"

        self.assertEqual(pipe_name, expected_pipe_name)
Пример #5
0
    def test_generate_s3_key_prefix(self):

        with open(
                f'{os.path.dirname(__file__)}/resources/same-schemas-multiple-times.json',
                'r') as f:
            lines = f.readlines()

        self.config = test_utils.get_test_config()
        DbSync_obj = db_sync.DbSync(self.config, json.loads(lines[0]))

        expected_string = f"{self.config['s3_key_prefix'].replace('/','')}/{self.config['default_target_schema']}__test_table_one/"
        s3_key_with_snowpipe = DbSync_obj._generate_s3_key_prefix(
            'tap_mysql_test-test_table_one', True)
        self.assertEqual(s3_key_with_snowpipe, expected_string)

        s3_key_without_snowpipe = DbSync_obj._generate_s3_key_prefix(
            'tap_mysql_test-test_table_one', False)
        self.assertEqual(s3_key_without_snowpipe,
                         f"{self.config['s3_key_prefix'].replace('/','')}/")
    def test_copy_failure_message(self, load_file_copy_patch, query_patch):
        LOGGER_NAME = "target_snowflake"
        query_patch.return_value = [{'type': 'CSV'}]
        minimal_config = {
            'account': "dummy_account",
            'dbname': "dummy_dbname",
            'user': "******",
            'password': "******",
            'warehouse': "dummy_warehouse",
            'default_target_schema': "dummy_default_target_schema",
            'file_format': "dummy_file_format",
        }

        stream_schema_message = {
            "stream": "dummy_stream",
            "schema": {
                "properties": {
                    "id": {
                        "type": ["integer"]
                    },
                    "c_str": {
                        "type": ["null", "string"]
                    }
                }
            },
            "key_properties": []
        }

        # Single primary key string
        dbsync = db_sync.DbSync(minimal_config, stream_schema_message)
        load_file_copy_patch.side_effect = Exception()
        expected_msg = (
            f'ERROR:{LOGGER_NAME}:Error while executing COPY query '
            f'for table "{minimal_config["default_target_schema"]}."{stream_schema_message["stream"].upper()}"" '
            f'in stream "{stream_schema_message["stream"]}"')
        with self.assertRaises(Exception), self.assertLogs(
                logger=LOGGER_NAME, level="ERROR") as captured_logs:
            dbsync.load_file(s3_key="dummy-key", count=256, size_bytes=256)
        self.assertIn(expected_msg, captured_logs.output)
    def test_sync_table_with_new_pk_in_stream(self, query_patch):
        minimal_config = {
            'account': "dummy-account",
            'dbname': "dummy-db",
            'user': "******",
            'password': "******",
            'warehouse': "dummy-wh",
            'default_target_schema': "dummy-schema",
            'file_format': "dummy-file-format"
        }

        stream_schema_message = {
            "stream": "public-table1",
            "schema": {
                "properties": {
                    "id": {
                        "type": ["integer"]
                    },
                    "c_str": {
                        "type": ["null", "string"]
                    },
                    "name": {
                        "type": ["string"]
                    },
                }
            },
            "key_properties": ["id", "name"]
        }

        table_cache = [{
            'SCHEMA_NAME': 'DUMMY-SCHEMA',
            'TABLE_NAME': 'TABLE1',
            'COLUMN_NAME': 'ID',
            'DATA_TYPE': 'NUMBER'
        }, {
            'SCHEMA_NAME': 'DUMMY-SCHEMA',
            'TABLE_NAME': 'TABLE1',
            'COLUMN_NAME': 'C_STR',
            'DATA_TYPE': 'TEXT'
        }, {
            'SCHEMA_NAME': 'DUMMY-SCHEMA',
            'TABLE_NAME': 'TABLE1',
            'COLUMN_NAME': 'NAME',
            'DATA_TYPE': 'TEXT'
        }]
        query_patch.side_effect = [[{
            'type': 'CSV'
        }], [{
            'column_name': 'ID'
        }], None]

        dbsync = db_sync.DbSync(minimal_config, stream_schema_message,
                                table_cache)
        dbsync.sync_table()

        # due to usage of sets in the code, order of columns in queries in not guaranteed
        # so have to break assertions to account for this.
        calls = query_patch.call_args_list
        self.assertEqual(3, len(calls))

        self.assertEqual('SHOW FILE FORMATS LIKE \'dummy-file-format\'',
                         calls[0][0][0])
        self.assertEqual(
            'show primary keys in table dummy-db.dummy-schema."TABLE1";',
            calls[1][0][0])

        self.assertEqual('alter table dummy-schema."TABLE1" drop primary key;',
                         calls[2][0][0][0])

        self.assertIn(
            calls[2][0][0][1], {
                'alter table dummy-schema."TABLE1" add primary key("ID", "NAME");',
                'alter table dummy-schema."TABLE1" add primary key("NAME", "ID");'
            })

        self.assertListEqual(sorted(calls[2][0][0][2:]), [
            'alter table dummy-schema."TABLE1" alter column "ID" drop not null;',
            'alter table dummy-schema."TABLE1" alter column "NAME" drop not null;',
        ])
    def test_record_primary_key_string(self, query_patch):
        query_patch.return_value = [{'type': 'CSV'}]
        minimal_config = {
            'account': "dummy-value",
            'dbname': "dummy-value",
            'user': "******",
            'password': "******",
            'warehouse': "dummy-value",
            'default_target_schema': "dummy-value",
            'file_format': "dummy-value"
        }

        stream_schema_message = {
            "stream": "public-table1",
            "schema": {
                "properties": {
                    "id": {
                        "type": ["integer"]
                    },
                    "c_str": {
                        "type": ["null", "string"]
                    },
                    "c_bool": {
                        "type": ["boolean"]
                    }
                }
            },
            "key_properties": ["id"]
        }

        # Single primary key string
        dbsync = db_sync.DbSync(minimal_config, stream_schema_message)
        self.assertEqual(dbsync.record_primary_key_string({'id': 123}), '123')

        # Composite primary key string
        stream_schema_message['key_properties'] = ['id', 'c_str']
        dbsync = db_sync.DbSync(minimal_config, stream_schema_message)
        self.assertEqual(
            dbsync.record_primary_key_string({
                'id': 123,
                'c_str': 'xyz'
            }), '123,xyz')

        # Missing field as PK
        stream_schema_message['key_properties'] = ['invalid_col']
        dbsync = db_sync.DbSync(minimal_config, stream_schema_message)
        with self.assertRaisesRegex(
                PrimaryKeyNotFoundException,
                r"Primary key 'invalid_col' does not exist in record or is null\. Available "
                r"fields: \['id', 'c_str'\]"):
            dbsync.record_primary_key_string({'id': 123, 'c_str': 'xyz'})

        # Null PK field
        stream_schema_message['key_properties'] = ['id']
        dbsync = db_sync.DbSync(minimal_config, stream_schema_message)
        with self.assertRaisesRegex(
                PrimaryKeyNotFoundException,
                r"Primary key 'id' does not exist in record or is null\. Available "
                r"fields: \['id', 'c_str'\]"):
            dbsync.record_primary_key_string({'id': None, 'c_str': 'xyz'})

        # falsy PK field accepted
        stream_schema_message['key_properties'] = ['id']
        dbsync = db_sync.DbSync(minimal_config, stream_schema_message)
        self.assertEqual(
            dbsync.record_primary_key_string({
                'id': 0,
                'c_str': 'xyz'
            }), '0')

        # falsy PK field accepted
        stream_schema_message['key_properties'] = ['id', 'c_bool']
        dbsync = db_sync.DbSync(minimal_config, stream_schema_message)
        self.assertEqual(
            dbsync.record_primary_key_string({
                'id': 1,
                'c_bool': False,
                'c_str': 'xyz'
            }), '1,False')