Пример #1
0
 def dialect(self) -> sa.engine.Dialect:
     # TODO: AJB 20220512 move this dialect retrieval to a separate class from the SqlAlchemyExecutionEngine
     #  and then use it here.
     dialect_name: GESqlDialect = self._dialect_name
     if dialect_name == GESqlDialect.ORACLE:
         return import_library_module(
             module_name="sqlalchemy.dialects.oracle"
         ).dialect()
     elif dialect_name == GESqlDialect.SNOWFLAKE:
         return import_library_module(
             module_name="snowflake.sqlalchemy.snowdialect"
         ).dialect()
     elif dialect_name == GESqlDialect.DREMIO:
         # WARNING: Dremio Support is experimental, functionality is not fully under test
         return import_library_module(
             module_name="sqlalchemy_dremio.pyodbc"
         ).dialect()
     # NOTE: AJB 20220512 Redshift dialect is not yet fully supported.
     # The below throws an `AttributeError: type object 'RedshiftDialect_psycopg2' has no attribute 'positional'`
     # elif dialect_name == "redshift":
     #     return import_library_module(
     #         module_name="sqlalchemy_redshift.dialect"
     #     ).RedshiftDialect
     elif dialect_name == GESqlDialect.BIGQUERY:
         return import_library_module(
             module_name=self._BIGQUERY_MODULE_NAME
         ).dialect()
     elif dialect_name == GESqlDialect.TERADATASQL:
         # WARNING: Teradata Support is experimental, functionality is not fully under test
         return import_library_module(
             module_name="teradatasqlalchemy.dialect"
         ).dialect()
     else:
         return sa.create_engine(self._connection_string).dialect
Пример #2
0
def reload_modules(module_names: list) -> None:
    module_name: str
    for module_name in module_names:
        module_obj: Optional[ModuleType] = import_library_module(
            module_name=module_name)
        if module_obj is not None:
            try:
                _ = importlib.reload(module_obj)
            except RuntimeError:
                pass
Пример #3
0
    def __init__(
        self,
        name: Optional[str] = None,
        credentials: Optional[dict] = None,
        data_context: Optional[Any] = None,
        engine=None,
        connection_string: Optional[str] = None,
        url: Optional[str] = None,
        batch_data_dict: Optional[dict] = None,
        create_temp_table: bool = True,
        concurrency: Optional[ConcurrencyConfig] = None,
        **kwargs,  # These will be passed as optional parameters to the SQLAlchemy engine, **not** the ExecutionEngine
    ) -> None:
        """Builds a SqlAlchemyExecutionEngine, using a provided connection string/url/engine/credentials to access the
        desired database. Also initializes the dialect to be used and configures usage statistics.

            Args:
                name (str): \
                    The name of the SqlAlchemyExecutionEngine
                credentials: \
                    If the Execution Engine is not provided, the credentials can be used to build the Execution
                    Engine. If the Engine is provided, it will be used instead
                data_context (DataContext): \
                    An object representing a Great Expectations project that can be used to access Expectation
                    Suites and the Project Data itself
                engine (Engine): \
                    A SqlAlchemy Engine used to set the SqlAlchemyExecutionEngine being configured, useful if an
                    Engine has already been configured and should be reused. Will override Credentials
                    if provided.
                connection_string (string): \
                    If neither the engines nor the credentials have been provided, a connection string can be used
                    to access the data. This will be overridden by both the engine and credentials if those are
                    provided.
                url (string): \
                    If neither the engines, the credentials, nor the connection_string have been provided,
                    a url can be used to access the data. This will be overridden by all other configuration
                    options if any are provided.
                concurrency (ConcurrencyConfig): Concurrency config used to configure the sqlalchemy engine.
        """
        super().__init__(name=name, batch_data_dict=batch_data_dict)
        self._name = name

        self._credentials = credentials
        self._connection_string = connection_string
        self._url = url
        self._create_temp_table = create_temp_table

        if engine is not None:
            if credentials is not None:
                logger.warning(
                    "Both credentials and engine were provided during initialization of SqlAlchemyExecutionEngine. "
                    "Ignoring credentials."
                )
            self.engine = engine
        else:
            concurrency: ConcurrencyConfig
            if data_context is None or data_context.concurrency is None:
                concurrency = ConcurrencyConfig()
            else:
                concurrency = data_context.concurrency

            concurrency.add_sqlalchemy_create_engine_parameters(kwargs)

            if credentials is not None:
                self.engine = self._build_engine(credentials=credentials, **kwargs)
            elif connection_string is not None:
                self.engine = sa.create_engine(connection_string, **kwargs)
            elif url is not None:
                parsed_url = make_url(url)
                self.drivername = parsed_url.drivername
                self.engine = sa.create_engine(url, **kwargs)
            else:
                raise InvalidConfigError(
                    "Credentials or an engine are required for a SqlAlchemyExecutionEngine."
                )

        # these are two backends where temp_table_creation is not supported we set the default value to False.
        if self.engine.dialect.name.lower() in [
            "trino",
            "awsathena",  # WKS 202201 - AWS Athena currently doesn't support temp_tables.
        ]:
            self._create_temp_table = False

        # Get the dialect **for purposes of identifying types**
        if self.engine.dialect.name.lower() in [
            "postgresql",
            "mysql",
            "sqlite",
            "oracle",
            "mssql",
        ]:
            # These are the officially included and supported dialects by sqlalchemy
            self.dialect_module = import_library_module(
                module_name=f"sqlalchemy.dialects.{self.engine.dialect.name}"
            )

        elif self.engine.dialect.name.lower() == "snowflake":
            self.dialect_module = import_library_module(
                module_name="snowflake.sqlalchemy.snowdialect"
            )
        elif self.engine.dialect.name.lower() == "dremio":
            # WARNING: Dremio Support is experimental, functionality is not fully under test
            self.dialect_module = import_library_module(
                module_name="sqlalchemy_dremio.pyodbc"
            )
        elif self.engine.dialect.name.lower() == "redshift":
            self.dialect_module = import_library_module(
                module_name="sqlalchemy_redshift.dialect"
            )
        elif self.engine.dialect.name.lower() == "bigquery":
            self.dialect_module = import_library_module(
                module_name=_BIGQUERY_MODULE_NAME
            )
        elif self.engine.dialect.name.lower() == "teradatasql":
            # WARNING: Teradata Support is experimental, functionality is not fully under test
            self.dialect_module = import_library_module(
                module_name="teradatasqlalchemy.dialect"
            )
        else:
            self.dialect_module = None

        # <WILL> 20210726 - engine_backup is used by the snowflake connector, which requires connection and engine
        # to be closed and disposed separately. Currently self.engine can refer to either a Connection or Engine,
        # depending on the backend. This will need to be cleaned up in an upcoming refactor, so that Engine and
        # Connection can be handled separately.
        self._engine_backup = None
        if self.engine and self.engine.dialect.name.lower() in [
            "sqlite",
            "mssql",
            "snowflake",
            "mysql",
        ]:
            self._engine_backup = self.engine
            # sqlite/mssql temp tables only persist within a connection so override the engine
            self.engine = self.engine.connect()

        # Send a connect event to provide dialect type
        if data_context is not None and getattr(
            data_context, "_usage_statistics_handler", None
        ):
            handler = data_context._usage_statistics_handler
            handler.send_usage_message(
                event=UsageStatsEvents.EXECUTION_ENGINE_SQLALCHEMY_CONNECT.value,
                event_payload={
                    "anonymized_name": handler.anonymizer.anonymize(self.name),
                    "sqlalchemy_dialect": self.engine.name,
                },
                success=True,
            )

        # Gather the call arguments of the present function (and add the "class_name"), filter out the Falsy values,
        # and set the instance "_config" variable equal to the resulting dictionary.
        self._config = {
            "name": name,
            "credentials": credentials,
            "data_context": data_context,
            "engine": engine,
            "connection_string": connection_string,
            "url": url,
            "batch_data_dict": batch_data_dict,
            "module_name": self.__class__.__module__,
            "class_name": self.__class__.__name__,
        }
        self._config.update(kwargs)
        filter_properties_dict(properties=self._config, clean_falsy=True, inplace=True)

        self._data_splitter = SqlAlchemyDataSplitter()
        self._data_sampler = SqlAlchemyDataSampler()
Пример #4
0
    def __init__(
        self,
        name=None,
        credentials=None,
        data_context=None,
        engine=None,
        connection_string=None,
        url=None,
        batch_data_dict=None,
        **kwargs,  # These will be passed as optional parameters to the SQLAlchemy engine, **not** the ExecutionEngine
    ):
        """Builds a SqlAlchemyExecutionEngine, using a provided connection string/url/engine/credentials to access the
        desired database. Also initializes the dialect to be used and configures usage statistics.

            Args:
                name (str): \
                    The name of the SqlAlchemyExecutionEngine
                credentials: \
                    If the Execution Engine is not provided, the credentials can be used to build the Execution
                    Engine. If the Engine is provided, it will be used instead
                data_context (DataContext): \
                    An object representing a Great Expectations project that can be used to access Expectation
                    Suites and the Project Data itself
                engine (Engine): \
                    A SqlAlchemy Engine used to set the SqlAlchemyExecutionEngine being configured, useful if an
                    Engine has already been configured and should be reused. Will override Credentials
                    if provided.
                connection_string (string): \
                    If neither the engines nor the credentials have been provided, a connection string can be used
                    to access the data. This will be overridden by both the engine and credentials if those are
                    provided.
                url (string): \
                    If neither the engines, the credentials, nor the connection_string have been provided,
                    a url can be used to access the data. This will be overridden by all other configuration
                    options if any are provided.
        """
        super().__init__(name=name, batch_data_dict=batch_data_dict)  # , **kwargs)
        self._name = name

        self._credentials = credentials
        self._connection_string = connection_string
        self._url = url

        if engine is not None:
            if credentials is not None:
                logger.warning(
                    "Both credentials and engine were provided during initialization of SqlAlchemyExecutionEngine. "
                    "Ignoring credentials."
                )
            self.engine = engine
        elif credentials is not None:
            self.engine = self._build_engine(credentials=credentials, **kwargs)
        elif connection_string is not None:
            self.engine = sa.create_engine(connection_string, **kwargs)
        elif url is not None:
            self.drivername = urlparse(url).scheme
            self.engine = sa.create_engine(url, **kwargs)
        else:
            raise InvalidConfigError(
                "Credentials or an engine are required for a SqlAlchemyExecutionEngine."
            )

        # Get the dialect **for purposes of identifying types**
        if self.engine.dialect.name.lower() in [
            "postgresql",
            "mysql",
            "sqlite",
            "oracle",
            "mssql",
            "oracle",
        ]:
            # These are the officially included and supported dialects by sqlalchemy
            self.dialect = import_library_module(
                module_name="sqlalchemy.dialects." + self.engine.dialect.name
            )

        elif self.engine.dialect.name.lower() == "snowflake":
            self.dialect = import_library_module(
                module_name="snowflake.sqlalchemy.snowdialect"
            )
        elif self.engine.dialect.name.lower() == "redshift":
            self.dialect = import_library_module(
                module_name="sqlalchemy_redshift.dialect"
            )
        elif self.engine.dialect.name.lower() == "bigquery":
            self.dialect = import_library_module(
                module_name="pybigquery.sqlalchemy_bigquery"
            )
        else:
            self.dialect = None

        if self.engine and self.engine.dialect.name.lower() in [
            "sqlite",
            "mssql",
            "snowflake",
        ]:
            # sqlite/mssql temp tables only persist within a connection so override the engine
            self.engine = self.engine.connect()

        # Send a connect event to provide dialect type
        if data_context is not None and getattr(
            data_context, "_usage_statistics_handler", None
        ):
            handler = data_context._usage_statistics_handler
            handler.send_usage_message(
                event="execution_engine.sqlalchemy.connect",
                event_payload={
                    "anonymized_name": handler._execution_engine_anonymizer.anonymize(
                        self.name
                    ),
                    "sqlalchemy_dialect": self.engine.name,
                },
                success=True,
            )

        # Gather the call arguments of the present function (and add the "class_name"), filter out the Falsy values,
        # and set the instance "_config" variable equal to the resulting dictionary.
        self._config = get_currently_executing_function_call_arguments(
            **{"class_name": self.__class__.__name__}
        )
        filter_properties_dict(
            properties=self._config, inplace=True,
        )
Пример #5
0
def is_library_loadable(library_name: str) -> bool:
    module_obj: Union[ModuleType,
                      None] = import_library_module(module_name=library_name)
    return module_obj is not None