def test_transform_postgres_ssh(self): input = { "host": "airbyte.io", "port": 5432, "username": "******", "password": "******", "database": "my_db", "schema": "public", "tunnel_method": { "tunnel_host": "1.2.3.4", "tunnel_method": "SSH_PASSWORD_AUTH", "tunnel_port": 22, "tunnel_user": "******", "tunnel_user_password": "******", }, } port = TransformConfig.pick_a_port() actual = TransformConfig().transform_postgres(input) expected = { "type": "postgres", "dbname": "my_db", "host": "localhost", "pass": "******", "port": port, "schema": "public", "threads": 8, "user": "******", } assert expected == actual assert extract_schema(actual) == "public"
def test_parse(self): t = TransformConfig() assert { "integration_type": DestinationType.postgres, "config": "config.json", "output_path": "out.yml" } == t.parse([ "--integration-type", "postgres", "--config", "config.json", "--out", "out.yml" ])
def test_transform(self): input = { "host": "airbyte.io", "port": 5432, "username": "******", "password": "******", "database": "my_db", "schema": "public", } expected = self.get_base_config() expected["normalize"]["outputs"]["prod"] = { "type": "postgres", "dbname": "my_db", "host": "airbyte.io", "pass": "******", "port": 5432, "schema": "public", "threads": 32, "user": "******", } actual = TransformConfig().transform(DestinationType.postgres, input) assert expected == actual assert extract_schema( actual["normalize"]["outputs"]["prod"]) == "public"
def test_transform_mssql(self): input = { "type": "sqlserver", "host": "airbyte.io", "port": 1433, "database": "my_db", "schema": "my_db", "username": "******", "password": "******", } actual = TransformConfig().transform_mysql(input) expected = { "type": "sqlserver", "server": "airbyte.io", "port": 1433, "schema": "my_db", "database": "my_db", "username": "******", "password": "******", } assert expected == actual # DBT schema is equivalent to MySQL database assert extract_schema(actual) == "my_db"
def test_transform_bigquery(self): input = { "project_id": "my_project_id", "dataset_id": "my_dataset_id", "credentials_json": '{ "type": "service_account" }' } actual_output = TransformConfig().transform_bigquery(input) expected_output = { "type": "bigquery", "method": "service-account", "project": "my_project_id", "dataset": "my_dataset_id", "keyfile": "/tmp/bq_keyfile.json", "retries": 1, "threads": 32, } with open("/tmp/bq_keyfile.json", "r") as file: actual_keyfile = json.loads(file.read()) expected_keyfile = {"type": "service_account"} if os.path.exists("/tmp/bq_keyfile.json"): os.remove("/tmp/bq_keyfile.json") assert expected_output == actual_output assert expected_keyfile == actual_keyfile assert extract_schema(actual_output) == "my_dataset_id"
def test_transform_bigquery(self): input = { "project_id": "my_project_id", "dataset_id": "my_dataset_id", "credentials_json": '{ "type": "service_account-json" }', "transformation_priority": "interactive", "dataset_location": "EU", } actual_output = TransformConfig().transform_bigquery(input) expected_output = { "type": "bigquery", "method": "service-account-json", "project": "my_project_id", "dataset": "my_dataset_id", "priority": "interactive", "keyfile_json": {"type": "service_account-json"}, "location": "EU", "retries": 3, "threads": 8, } actual_keyfile = actual_output["keyfile_json"] expected_keyfile = {"type": "service_account-json"} assert expected_output == actual_output assert expected_keyfile == actual_keyfile assert extract_schema(actual_output) == "my_dataset_id"
def test_transform_snowflake(self): input = { "host": "http://123abc.us-east-7.aws.snowflakecomputing.com", "role": "AIRBYTE_ROLE", "warehouse": "AIRBYTE_WAREHOUSE", "database": "AIRBYTE_DATABASE", "schema": "AIRBYTE_SCHEMA", "username": "******", "password": "******", } actual = TransformConfig().transform_snowflake(input) expected = { "account": "123abc.us-east-7.aws", "client_session_keep_alive": False, "database": "AIRBYTE_DATABASE", "password": "******", "query_tag": "normalization", "role": "AIRBYTE_ROLE", "schema": "AIRBYTE_SCHEMA", "threads": 32, "type": "snowflake", "user": "******", "warehouse": "AIRBYTE_WAREHOUSE", } assert expected == actual assert extract_schema(actual) == "AIRBYTE_SCHEMA"
def test_transform_bigquery_with_invalid_format(self): input = {"project_id": "my_project_id", "dataset_id": "foo:bar:baz"} try: TransformConfig().transform_bigquery(input) assert False, "transform_bigquery should have raised an exception" except ValueError: pass
def test_transform_bigquery_with_embedded_mismatched_project_id(self): input = {"project_id": "my_project_id", "dataset_id": "bad_project_id:my_dataset_id"} try: TransformConfig().transform_bigquery(input) assert False, "transform_bigquery should have raised an exception" except ValueError: pass
def test_is_port_free(self): # to test that this accurately identifies 'free' ports, we'll find a 'free' port and then try to use it test_port = 13055 while not TransformConfig.is_port_free(test_port): test_port += 1 if test_port > 65535: raise RuntimeError("couldn't find a free port...") with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.bind(("localhost", test_port)) # if we haven't failed then we accurately identified a 'free' port. # now we can test for accurate identification of 'in-use' port since we're using it assert TransformConfig.is_port_free(test_port) is False # and just for good measure now that our context manager is closed (and port open again) time.sleep(1) assert TransformConfig.is_port_free(test_port) is True
def generate_profile_yaml_file( self, destination_type: DestinationType, test_root_dir: str, random_schema: bool = False ) -> Dict[str, Any]: """ Each destination requires different settings to connect to. This step generates the adequate profiles.yml as described here: https://docs.getdbt.com/reference/profiles.yml """ config_generator = TransformConfig() profiles_config = config_generator.read_json_config(f"../secrets/{destination_type.value.lower()}.json") # Adapt credential file to look like destination config.json if destination_type.value == DestinationType.BIGQUERY.value: credentials = profiles_config["basic_bigquery_config"] profiles_config = { "credentials_json": json.dumps(credentials), "dataset_id": self.target_schema, "project_id": credentials["project_id"], "dataset_location": "US", } elif destination_type.value == DestinationType.MYSQL.value: profiles_config["database"] = self.target_schema elif destination_type.value == DestinationType.REDSHIFT.value: profiles_config["schema"] = self.target_schema if random_schema: profiles_config["schema"] = self.target_schema + "_" + "".join(random.choices(string.ascii_lowercase, k=5)) else: profiles_config["schema"] = self.target_schema if destination_type.value == DestinationType.CLICKHOUSE.value: clickhouse_config = copy(profiles_config) profiles_yaml = config_generator.transform(destination_type, clickhouse_config) else: profiles_yaml = config_generator.transform(destination_type, profiles_config) config_generator.write_yaml_config(test_root_dir, profiles_yaml, "profiles.yml") return profiles_config
def generate_profile_yaml_file(self, destination_type: DestinationType, test_root_dir: str) -> Dict[str, Any]: """ Each destination requires different settings to connect to. This step generates the adequate profiles.yml as described here: https://docs.getdbt.com/reference/profiles.yml """ config_generator = TransformConfig() profiles_config = config_generator.read_json_config( f"../secrets/{destination_type.value.lower()}.json") # Adapt credential file to look like destination config.json if destination_type.value == DestinationType.BIGQUERY.value: credentials = profiles_config["basic_bigquery_config"] profiles_config = { "credentials_json": json.dumps(credentials), "dataset_id": self.target_schema, "project_id": credentials["project_id"], } elif destination_type.value == DestinationType.MYSQL.value: profiles_config["database"] = self.target_schema else: profiles_config["schema"] = self.target_schema if destination_type.value == DestinationType.CLICKHOUSE.value: # Python ClickHouse driver uses native port 9000, which is different # from official ClickHouse JDBC driver clickhouse_config = copy(profiles_config) clickhouse_config["port"] = 9000 profiles_yaml = config_generator.transform(destination_type, clickhouse_config) else: profiles_yaml = config_generator.transform(destination_type, profiles_config) config_generator.write_yaml_config(test_root_dir, profiles_yaml, "profiles.yml") return profiles_config
def test_write_ssh_config(self): original_config_input = { "type": "postgres", "dbname": "my_db", "host": "airbyte.io", "pass": "******", "port": 5432, "schema": "public", "threads": 32, "user": "******", "tunnel_method": { "tunnel_host": "1.2.3.4", "tunnel_method": "SSH_PASSWORD_AUTH", "tunnel_port": 22, "tunnel_user": "******", "tunnel_user_password": "******", }, } transformed_config_input = self.get_base_config() transformed_config_input["normalize"]["outputs"]["prod"] = { "port": 7890, } expected = { "db_host": "airbyte.io", "db_port": 5432, "tunnel_map": { "tunnel_host": "1.2.3.4", "tunnel_method": "SSH_PASSWORD_AUTH", "tunnel_port": 22, "tunnel_user": "******", "tunnel_user_password": "******", }, "local_port": 7890, } tmp_path = tempfile.TemporaryDirectory().name TransformConfig.write_ssh_config(tmp_path, original_config_input, transformed_config_input) with open(os.path.join(tmp_path, "ssh.json"), "r") as f: assert json.load(f) == expected
def test_transform_bigquery_with_embedded_project_id(self): input = {"project_id": "my_project_id", "dataset_id": "my_project_id:my_dataset_id"} actual_output = TransformConfig().transform_bigquery(input) expected_output = { "type": "bigquery", "method": "oauth", "project": "my_project_id", "dataset": "my_dataset_id", "priority": "interactive", "retries": 3, "threads": 8, } assert expected_output == actual_output assert extract_schema(actual_output) == "my_dataset_id"
def test_transform_clickhouse(self): input = {"host": "airbyte.io", "port": 9440, "database": "default", "username": "******", "password": "******", "ssl": True} actual = TransformConfig().transform_clickhouse(input) expected = { "type": "clickhouse", "host": "airbyte.io", "port": 9440, "schema": "default", "user": "******", "password": "******", "secure": True, } assert expected == actual assert extract_schema(actual) == "default"
def test_transform_snowflake_oauth(self): input = { "host": "http://123abc.us-east-7.aws.snowflakecomputing.com", "role": "AIRBYTE_ROLE", "warehouse": "AIRBYTE_WAREHOUSE", "database": "AIRBYTE_DATABASE", "schema": "AIRBYTE_SCHEMA", "username": "******", "credentials": { "auth_type": "OAuth2.0", "client_id": "AIRBYTE_CLIENT_ID", "access_token": "AIRBYTE_ACCESS_TOKEN", "client_secret": "AIRBYTE_CLIENT_SECRET", "refresh_token": "AIRBYTE_REFRESH_TOKEN", }, } actual = TransformConfig().transform_snowflake(input) expected = { "account": "123abc.us-east-7.aws", "client_session_keep_alive": False, "database": "AIRBYTE_DATABASE", "query_tag": "normalization", "role": "AIRBYTE_ROLE", "schema": "AIRBYTE_SCHEMA", "threads": 5, "retry_all": True, "retry_on_database_errors": True, "connect_retries": 3, "connect_timeout": 15, "type": "snowflake", "user": "******", "warehouse": "AIRBYTE_WAREHOUSE", "authenticator": "oauth", "oauth_client_id": "AIRBYTE_CLIENT_ID", "oauth_client_secret": "AIRBYTE_CLIENT_SECRET", "token": "AIRBYTE_REFRESH_TOKEN", } assert expected == actual assert extract_schema(actual) == "AIRBYTE_SCHEMA"
def generate_profile_yaml_file(destination_type: DestinationType, test_root_dir: str) -> Dict[str, Any]: """ Each destination requires different settings to connect to. This step generates the adequate profiles.yml as described here: https://docs.getdbt.com/reference/profiles.yml """ config_generator = TransformConfig() profiles_config = config_generator.read_json_config(f"../secrets/{destination_type.value.lower()}.json") # Adapt credential file to look like destination config.json if destination_type.value == DestinationType.BIGQUERY.value: profiles_config["credentials_json"] = json.dumps(profiles_config) profiles_config["dataset_id"] = target_schema else: profiles_config["schema"] = target_schema profiles_yaml = config_generator.transform(destination_type, profiles_config) config_generator.write_yaml_config(test_root_dir, profiles_yaml) return profiles_config
def test_transform_postgres(self): input = { "host": "airbyte.io", "port": 5432, "username": "******", "password": "******", "database": "my_db", "schema": "public", } actual = TransformConfig().transform_postgres(input) expected = { "type": "postgres", "dbname": "my_db", "host": "airbyte.io", "pass": "******", "port": 5432, "schema": "public", "threads": 32, "user": "******", } assert expected == actual assert extract_schema(actual) == "public"
def single_test(config, expected_output): assert TransformConfig.is_ssh_tunnelling(config) == expected_output
def test_pick_a_port(self): supposedly_open_port = TransformConfig.pick_a_port() with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.bind(("localhost", supposedly_open_port))