示例#1
0
    def test_transform_postgres_ssh(self):
        input = {
            "host": "airbyte.io",
            "port": 5432,
            "username": "******",
            "password": "******",
            "database": "my_db",
            "schema": "public",
            "tunnel_method": {
                "tunnel_host": "1.2.3.4",
                "tunnel_method": "SSH_PASSWORD_AUTH",
                "tunnel_port": 22,
                "tunnel_user": "******",
                "tunnel_user_password": "******",
            },
        }
        port = TransformConfig.pick_a_port()

        actual = TransformConfig().transform_postgres(input)
        expected = {
            "type": "postgres",
            "dbname": "my_db",
            "host": "localhost",
            "pass": "******",
            "port": port,
            "schema": "public",
            "threads": 8,
            "user": "******",
        }

        assert expected == actual
        assert extract_schema(actual) == "public"
 def test_parse(self):
     t = TransformConfig()
     assert {
         "integration_type": DestinationType.postgres,
         "config": "config.json",
         "output_path": "out.yml"
     } == t.parse([
         "--integration-type", "postgres", "--config", "config.json",
         "--out", "out.yml"
     ])
    def test_transform(self):
        input = {
            "host": "airbyte.io",
            "port": 5432,
            "username": "******",
            "password": "******",
            "database": "my_db",
            "schema": "public",
        }

        expected = self.get_base_config()
        expected["normalize"]["outputs"]["prod"] = {
            "type": "postgres",
            "dbname": "my_db",
            "host": "airbyte.io",
            "pass": "******",
            "port": 5432,
            "schema": "public",
            "threads": 32,
            "user": "******",
        }
        actual = TransformConfig().transform(DestinationType.postgres, input)

        assert expected == actual
        assert extract_schema(
            actual["normalize"]["outputs"]["prod"]) == "public"
示例#4
0
    def test_transform_mssql(self):
        input = {
            "type": "sqlserver",
            "host": "airbyte.io",
            "port": 1433,
            "database": "my_db",
            "schema": "my_db",
            "username": "******",
            "password": "******",
        }

        actual = TransformConfig().transform_mysql(input)
        expected = {
            "type": "sqlserver",
            "server": "airbyte.io",
            "port": 1433,
            "schema": "my_db",
            "database": "my_db",
            "username": "******",
            "password": "******",
        }

        assert expected == actual
        # DBT schema is equivalent to MySQL database
        assert extract_schema(actual) == "my_db"
    def test_transform_bigquery(self):
        input = {
            "project_id": "my_project_id",
            "dataset_id": "my_dataset_id",
            "credentials_json": '{ "type": "service_account" }'
        }

        actual_output = TransformConfig().transform_bigquery(input)
        expected_output = {
            "type": "bigquery",
            "method": "service-account",
            "project": "my_project_id",
            "dataset": "my_dataset_id",
            "keyfile": "/tmp/bq_keyfile.json",
            "retries": 1,
            "threads": 32,
        }

        with open("/tmp/bq_keyfile.json", "r") as file:
            actual_keyfile = json.loads(file.read())
        expected_keyfile = {"type": "service_account"}
        if os.path.exists("/tmp/bq_keyfile.json"):
            os.remove("/tmp/bq_keyfile.json")
        assert expected_output == actual_output
        assert expected_keyfile == actual_keyfile
        assert extract_schema(actual_output) == "my_dataset_id"
示例#6
0
    def test_transform_bigquery(self):
        input = {
            "project_id": "my_project_id",
            "dataset_id": "my_dataset_id",
            "credentials_json": '{ "type": "service_account-json" }',
            "transformation_priority": "interactive",
            "dataset_location": "EU",
        }

        actual_output = TransformConfig().transform_bigquery(input)
        expected_output = {
            "type": "bigquery",
            "method": "service-account-json",
            "project": "my_project_id",
            "dataset": "my_dataset_id",
            "priority": "interactive",
            "keyfile_json": {"type": "service_account-json"},
            "location": "EU",
            "retries": 3,
            "threads": 8,
        }

        actual_keyfile = actual_output["keyfile_json"]
        expected_keyfile = {"type": "service_account-json"}
        assert expected_output == actual_output
        assert expected_keyfile == actual_keyfile
        assert extract_schema(actual_output) == "my_dataset_id"
    def test_transform_snowflake(self):
        input = {
            "host": "http://123abc.us-east-7.aws.snowflakecomputing.com",
            "role": "AIRBYTE_ROLE",
            "warehouse": "AIRBYTE_WAREHOUSE",
            "database": "AIRBYTE_DATABASE",
            "schema": "AIRBYTE_SCHEMA",
            "username": "******",
            "password": "******",
        }

        actual = TransformConfig().transform_snowflake(input)
        expected = {
            "account": "123abc.us-east-7.aws",
            "client_session_keep_alive": False,
            "database": "AIRBYTE_DATABASE",
            "password": "******",
            "query_tag": "normalization",
            "role": "AIRBYTE_ROLE",
            "schema": "AIRBYTE_SCHEMA",
            "threads": 32,
            "type": "snowflake",
            "user": "******",
            "warehouse": "AIRBYTE_WAREHOUSE",
        }

        assert expected == actual
        assert extract_schema(actual) == "AIRBYTE_SCHEMA"
示例#8
0
    def test_transform_bigquery_with_invalid_format(self):
        input = {"project_id": "my_project_id", "dataset_id": "foo:bar:baz"}

        try:
            TransformConfig().transform_bigquery(input)
            assert False, "transform_bigquery should have raised an exception"
        except ValueError:
            pass
示例#9
0
    def test_transform_bigquery_with_embedded_mismatched_project_id(self):
        input = {"project_id": "my_project_id", "dataset_id": "bad_project_id:my_dataset_id"}

        try:
            TransformConfig().transform_bigquery(input)
            assert False, "transform_bigquery should have raised an exception"
        except ValueError:
            pass
示例#10
0
    def test_is_port_free(self):
        # to test that this accurately identifies 'free' ports, we'll find a 'free' port and then try to use it
        test_port = 13055
        while not TransformConfig.is_port_free(test_port):
            test_port += 1
            if test_port > 65535:
                raise RuntimeError("couldn't find a free port...")

        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
            s.bind(("localhost", test_port))
            # if we haven't failed then we accurately identified a 'free' port.
            # now we can test for accurate identification of 'in-use' port since we're using it
            assert TransformConfig.is_port_free(test_port) is False

        # and just for good measure now that our context manager is closed (and port open again)
        time.sleep(1)
        assert TransformConfig.is_port_free(test_port) is True
示例#11
0
 def generate_profile_yaml_file(
     self, destination_type: DestinationType, test_root_dir: str, random_schema: bool = False
 ) -> Dict[str, Any]:
     """
     Each destination requires different settings to connect to. This step generates the adequate profiles.yml
     as described here: https://docs.getdbt.com/reference/profiles.yml
     """
     config_generator = TransformConfig()
     profiles_config = config_generator.read_json_config(f"../secrets/{destination_type.value.lower()}.json")
     # Adapt credential file to look like destination config.json
     if destination_type.value == DestinationType.BIGQUERY.value:
         credentials = profiles_config["basic_bigquery_config"]
         profiles_config = {
             "credentials_json": json.dumps(credentials),
             "dataset_id": self.target_schema,
             "project_id": credentials["project_id"],
             "dataset_location": "US",
         }
     elif destination_type.value == DestinationType.MYSQL.value:
         profiles_config["database"] = self.target_schema
     elif destination_type.value == DestinationType.REDSHIFT.value:
         profiles_config["schema"] = self.target_schema
         if random_schema:
             profiles_config["schema"] = self.target_schema + "_" + "".join(random.choices(string.ascii_lowercase, k=5))
     else:
         profiles_config["schema"] = self.target_schema
     if destination_type.value == DestinationType.CLICKHOUSE.value:
         clickhouse_config = copy(profiles_config)
         profiles_yaml = config_generator.transform(destination_type, clickhouse_config)
     else:
         profiles_yaml = config_generator.transform(destination_type, profiles_config)
     config_generator.write_yaml_config(test_root_dir, profiles_yaml, "profiles.yml")
     return profiles_config
示例#12
0
 def generate_profile_yaml_file(self, destination_type: DestinationType,
                                test_root_dir: str) -> Dict[str, Any]:
     """
     Each destination requires different settings to connect to. This step generates the adequate profiles.yml
     as described here: https://docs.getdbt.com/reference/profiles.yml
     """
     config_generator = TransformConfig()
     profiles_config = config_generator.read_json_config(
         f"../secrets/{destination_type.value.lower()}.json")
     # Adapt credential file to look like destination config.json
     if destination_type.value == DestinationType.BIGQUERY.value:
         credentials = profiles_config["basic_bigquery_config"]
         profiles_config = {
             "credentials_json": json.dumps(credentials),
             "dataset_id": self.target_schema,
             "project_id": credentials["project_id"],
         }
     elif destination_type.value == DestinationType.MYSQL.value:
         profiles_config["database"] = self.target_schema
     else:
         profiles_config["schema"] = self.target_schema
     if destination_type.value == DestinationType.CLICKHOUSE.value:
         # Python ClickHouse driver uses native port 9000, which is different
         # from official ClickHouse JDBC driver
         clickhouse_config = copy(profiles_config)
         clickhouse_config["port"] = 9000
         profiles_yaml = config_generator.transform(destination_type,
                                                    clickhouse_config)
     else:
         profiles_yaml = config_generator.transform(destination_type,
                                                    profiles_config)
     config_generator.write_yaml_config(test_root_dir, profiles_yaml,
                                        "profiles.yml")
     return profiles_config
示例#13
0
 def test_write_ssh_config(self):
     original_config_input = {
         "type": "postgres",
         "dbname": "my_db",
         "host": "airbyte.io",
         "pass": "******",
         "port": 5432,
         "schema": "public",
         "threads": 32,
         "user": "******",
         "tunnel_method": {
             "tunnel_host": "1.2.3.4",
             "tunnel_method": "SSH_PASSWORD_AUTH",
             "tunnel_port": 22,
             "tunnel_user": "******",
             "tunnel_user_password": "******",
         },
     }
     transformed_config_input = self.get_base_config()
     transformed_config_input["normalize"]["outputs"]["prod"] = {
         "port": 7890,
     }
     expected = {
         "db_host": "airbyte.io",
         "db_port": 5432,
         "tunnel_map": {
             "tunnel_host": "1.2.3.4",
             "tunnel_method": "SSH_PASSWORD_AUTH",
             "tunnel_port": 22,
             "tunnel_user": "******",
             "tunnel_user_password": "******",
         },
         "local_port": 7890,
     }
     tmp_path = tempfile.TemporaryDirectory().name
     TransformConfig.write_ssh_config(tmp_path, original_config_input, transformed_config_input)
     with open(os.path.join(tmp_path, "ssh.json"), "r") as f:
         assert json.load(f) == expected
示例#14
0
    def test_transform_bigquery_with_embedded_project_id(self):
        input = {"project_id": "my_project_id", "dataset_id": "my_project_id:my_dataset_id"}

        actual_output = TransformConfig().transform_bigquery(input)
        expected_output = {
            "type": "bigquery",
            "method": "oauth",
            "project": "my_project_id",
            "dataset": "my_dataset_id",
            "priority": "interactive",
            "retries": 3,
            "threads": 8,
        }

        assert expected_output == actual_output
        assert extract_schema(actual_output) == "my_dataset_id"
示例#15
0
    def test_transform_clickhouse(self):
        input = {"host": "airbyte.io", "port": 9440, "database": "default", "username": "******", "password": "******", "ssl": True}

        actual = TransformConfig().transform_clickhouse(input)
        expected = {
            "type": "clickhouse",
            "host": "airbyte.io",
            "port": 9440,
            "schema": "default",
            "user": "******",
            "password": "******",
            "secure": True,
        }

        assert expected == actual
        assert extract_schema(actual) == "default"
示例#16
0
    def test_transform_snowflake_oauth(self):

        input = {
            "host": "http://123abc.us-east-7.aws.snowflakecomputing.com",
            "role": "AIRBYTE_ROLE",
            "warehouse": "AIRBYTE_WAREHOUSE",
            "database": "AIRBYTE_DATABASE",
            "schema": "AIRBYTE_SCHEMA",
            "username": "******",
            "credentials": {
                "auth_type": "OAuth2.0",
                "client_id": "AIRBYTE_CLIENT_ID",
                "access_token": "AIRBYTE_ACCESS_TOKEN",
                "client_secret": "AIRBYTE_CLIENT_SECRET",
                "refresh_token": "AIRBYTE_REFRESH_TOKEN",
            },
        }

        actual = TransformConfig().transform_snowflake(input)
        expected = {
            "account": "123abc.us-east-7.aws",
            "client_session_keep_alive": False,
            "database": "AIRBYTE_DATABASE",
            "query_tag": "normalization",
            "role": "AIRBYTE_ROLE",
            "schema": "AIRBYTE_SCHEMA",
            "threads": 5,
            "retry_all": True,
            "retry_on_database_errors": True,
            "connect_retries": 3,
            "connect_timeout": 15,
            "type": "snowflake",
            "user": "******",
            "warehouse": "AIRBYTE_WAREHOUSE",
            "authenticator": "oauth",
            "oauth_client_id": "AIRBYTE_CLIENT_ID",
            "oauth_client_secret": "AIRBYTE_CLIENT_SECRET",
            "token": "AIRBYTE_REFRESH_TOKEN",
        }

        assert expected == actual
        assert extract_schema(actual) == "AIRBYTE_SCHEMA"
示例#17
0
def generate_profile_yaml_file(destination_type: DestinationType, test_root_dir: str) -> Dict[str, Any]:
    """
    Each destination requires different settings to connect to. This step generates the adequate profiles.yml
    as described here: https://docs.getdbt.com/reference/profiles.yml
    """
    config_generator = TransformConfig()
    profiles_config = config_generator.read_json_config(f"../secrets/{destination_type.value.lower()}.json")
    # Adapt credential file to look like destination config.json
    if destination_type.value == DestinationType.BIGQUERY.value:
        profiles_config["credentials_json"] = json.dumps(profiles_config)
        profiles_config["dataset_id"] = target_schema
    else:
        profiles_config["schema"] = target_schema
    profiles_yaml = config_generator.transform(destination_type, profiles_config)
    config_generator.write_yaml_config(test_root_dir, profiles_yaml)
    return profiles_config
示例#18
0
    def test_transform_postgres(self):
        input = {
            "host": "airbyte.io",
            "port": 5432,
            "username": "******",
            "password": "******",
            "database": "my_db",
            "schema": "public",
        }

        actual = TransformConfig().transform_postgres(input)
        expected = {
            "type": "postgres",
            "dbname": "my_db",
            "host": "airbyte.io",
            "pass": "******",
            "port": 5432,
            "schema": "public",
            "threads": 32,
            "user": "******",
        }

        assert expected == actual
        assert extract_schema(actual) == "public"
示例#19
0
 def single_test(config, expected_output):
     assert TransformConfig.is_ssh_tunnelling(config) == expected_output
示例#20
0
 def test_pick_a_port(self):
     supposedly_open_port = TransformConfig.pick_a_port()
     with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
         s.bind(("localhost", supposedly_open_port))