def _sqlalchemy_window(cls, column, _table, **kwargs): # Will - 20210126 # This is a special case that needs to be handled for mysql, where you cannot refer to a temp_table # more than once in the same query. So instead of passing dup_query as-is, a second temp_table is created with # the column we will be performing the expectation on, and the query is performed against it. dialect = kwargs.get("_dialect", None) sql_engine = kwargs.get("_sqlalchemy_engine", None) try: dialect_name = dialect.dialect.name except AttributeError: try: dialect_name = dialect.name except AttributeError: dialect_name = "" if sql_engine and dialect and dialect_name == "mysql": temp_table_name = generate_temporary_table_name() temp_table_stmt = "CREATE TEMPORARY TABLE {new_temp_table} AS SELECT tmp.{column_name} FROM {source_table} tmp".format( new_temp_table=temp_table_name, source_table=_table, column_name=column.name, ) sql_engine.execute(temp_table_stmt) dup_query = (sa.select([column]).select_from( sa.text(temp_table_name)).group_by(column).having( sa.func.count(column) > 1)) else: dup_query = (sa.select( [column]).select_from(_table).group_by(column).having( sa.func.count(column) > 1)) return column.notin_(dup_query)
def _get_column_quantiles_athena( column, quantiles: Iterable, selectable, sqlalchemy_engine, ) -> list: approx_percentiles = f"approx_percentile({column}, ARRAY{list(quantiles)})" selects_approx: List[TextClause] = [sa.text(approx_percentiles)] quantiles_query_approx: Select = sa.select(selects_approx).select_from( selectable) try: quantiles_results: Row = sqlalchemy_engine.execute( quantiles_query_approx).fetchone() # the ast literal eval is needed because the method is returning a json string and not a dict results = ast.literal_eval(quantiles_results[0]) return results return results except ProgrammingError as pe: exception_message: str = "An SQL syntax Exception occurred." exception_traceback: str = traceback.format_exc() exception_message += ( f'{type(pe).__name__}: "{str(pe)}". Traceback: "{exception_traceback}".' ) logger.error(exception_message) raise pe
def _get_column_quantiles_generic_sqlalchemy( column, quantiles: Iterable, allow_relative_error: bool, dialect, selectable, sqlalchemy_engine, ) -> list: selects: List[WithinGroup] = [ sa.func.percentile_disc(quantile).within_group(column.asc()) for quantile in quantiles ] quantiles_query: Select = sa.select(selects).select_from(selectable) try: quantiles_results: Row = sqlalchemy_engine.execute(quantiles_query).fetchone() return list(quantiles_results) except ProgrammingError: # ProgrammingError: (psycopg2.errors.SyntaxError) Aggregate function "percentile_disc" is not supported; # use approximate percentile_disc or percentile_cont instead. if attempt_allowing_relative_error(dialect): # Redshift does not have a percentile_disc method, but does support an approximate version. sql_approx: str = get_approximate_percentile_disc_sql( selects=selects, sql_engine_dialect=dialect ) selects_approx: List[TextClause] = [sa.text(sql_approx)] quantiles_query_approx: Select = sa.select(selects_approx).select_from( selectable ) if allow_relative_error: try: quantiles_results: Row = sqlalchemy_engine.execute( quantiles_query_approx ).fetchone() return list(quantiles_results) except ProgrammingError as pe: exception_message: str = "An SQL syntax Exception occurred." exception_traceback: str = traceback.format_exc() exception_message += f'{type(pe).__name__}: "{str(pe)}". Traceback: "{exception_traceback}".' logger.error(exception_message) raise pe else: raise ValueError( f'The SQL engine dialect "{str(dialect)}" does not support computing quantiles ' "without approximation error; set allow_relative_error to True to allow approximate quantiles." ) else: raise ValueError( f'The SQL engine dialect "{str(dialect)}" does not support computing quantiles with ' "approximation error; set allow_relative_error to False to disable approximate quantiles." )
def _get_column_quantiles_trino(column, quantiles: Iterable, selectable, sqlalchemy_engine) -> list: # Trino does not have the percentile_disc func, but instead has approx_percentile sql_approx: str = f"approx_percentile({column}, ARRAY{list(quantiles)})" selects_approx: List[TextClause] = [sa.text(sql_approx)] quantiles_query: Select = sa.select(selects_approx).select_from(selectable) try: quantiles_results: Row = sqlalchemy_engine.execute( quantiles_query).fetchone() return list(quantiles_results)[0] except (ProgrammingError, TrinoUserError) as pe: exception_message: str = "An SQL syntax Exception occurred." exception_traceback: str = traceback.format_exc() exception_message += ( f'{type(pe).__name__}: "{str(pe)}". Traceback: "{exception_traceback}".' ) logger.error(exception_message) raise pe
def _sqlalchemy( cls, execution_engine: SqlAlchemyExecutionEngine, metric_domain_kwargs: dict, metric_value_kwargs: dict, metrics: Dict[str, Any], runtime_configuration: dict, ) -> List[sqlalchemy_engine_Row]: query: Optional[str] = metric_value_kwargs.get( "query" ) or cls.default_kwarg_values.get("query") selectable: Union[sa.sql.Selectable, str] selectable, _, _ = execution_engine.get_compute_domain( metric_domain_kwargs, domain_type=MetricDomainTypes.TABLE ) column: str = metric_value_kwargs.get("column") if isinstance(selectable, sa.Table): query = query.format(col=column, active_batch=selectable) elif isinstance( selectable, sa.sql.Subquery ): # Specifying a runtime query in a RuntimeBatchRequest returns the active bacth as a Subquery; sectioning the active batch off w/ parentheses ensures flow of operations doesn't break query = query.format(col=column, active_batch=f"({selectable})") elif isinstance( selectable, sa.sql.Select ): # Specifying a row_condition returns the active batch as a Select object, requiring compilation & aliasing when formatting the parameterized query query = query.format( col=column, active_batch=f'({selectable.compile(compile_kwargs={"literal_binds": True})}) AS subselect', ) else: query = query.format(col=column, active_batch=f"({selectable})") engine: sqlalchemy_engine_Engine = execution_engine.engine result: List[sqlalchemy_engine_Row] = engine.execute(sa.text(query)).fetchall() return result