def _get_column_quantiles_mysql( column, quantiles: Iterable, selectable, sqlalchemy_engine ) -> list: # MySQL does not support "percentile_disc", so we implement it as a compound query. # Please see https://stackoverflow.com/questions/19770026/calculate-percentile-value-using-mysql for reference. percent_rank_query: CTE = ( sa.select( [ column, sa.cast( sa.func.percent_rank().over(order_by=column.asc()), sa.dialects.mysql.DECIMAL(18, 15), ).label("p"), ] ) .order_by(sa.column("p").asc()) .select_from(selectable) .cte("t") ) selects: List[WithinGroup] = [] for idx, quantile in enumerate(quantiles): # pymysql cannot handle conversion of numpy float64 to float; convert just in case if np.issubdtype(type(quantile), np.float_): quantile = float(quantile) quantile_column: Label = ( sa.func.first_value(column) .over( order_by=sa.case( [ ( percent_rank_query.c.p <= sa.cast(quantile, sa.dialects.mysql.DECIMAL(18, 15)), percent_rank_query.c.p, ) ], else_=None, ).desc() ) .label(f"q_{idx}") ) selects.append(quantile_column) quantiles_query: Select = ( sa.select(selects).distinct().order_by(percent_rank_query.c.p.desc()) ) try: quantiles_results: RowProxy = sqlalchemy_engine.execute( quantiles_query ).fetchone() return list(quantiles_results) except ProgrammingError as pe: exception_message: str = "An SQL syntax Exception occurred." exception_traceback: str = traceback.format_exc() exception_message += ( f'{type(pe).__name__}: "{str(pe)}". Traceback: "{exception_traceback}".' ) logger.error(exception_message) raise pe
def _get_column_quantiles_generic_sqlalchemy( column, quantiles: Iterable, allow_relative_error: bool, dialect, selectable, sqlalchemy_engine, ) -> list: selects: List[WithinGroup] = [ sa.func.percentile_disc(quantile).within_group(column.asc()) for quantile in quantiles ] quantiles_query: Select = sa.select(selects).select_from(selectable) try: quantiles_results: RowProxy = sqlalchemy_engine.execute( quantiles_query ).fetchone() return list(quantiles_results) except ProgrammingError: # ProgrammingError: (psycopg2.errors.SyntaxError) Aggregate function "percentile_disc" is not supported; # use approximate percentile_disc or percentile_cont instead. if attempt_allowing_relative_error(dialect): # Redshift does not have a percentile_disc method, but does support an approximate version. sql_approx: str = get_approximate_percentile_disc_sql( selects=selects, sql_engine_dialect=dialect ) selects_approx: List[TextClause] = [sa.text(sql_approx)] quantiles_query_approx: Select = sa.select(selects_approx).select_from( selectable ) if allow_relative_error: try: quantiles_results: RowProxy = sqlalchemy_engine.execute( quantiles_query_approx ).fetchone() return list(quantiles_results) except ProgrammingError as pe: exception_message: str = "An SQL syntax Exception occurred." exception_traceback: str = traceback.format_exc() exception_message += f'{type(pe).__name__}: "{str(pe)}". Traceback: "{exception_traceback}".' logger.error(exception_message) raise pe else: raise ValueError( f'The SQL engine dialect "{str(dialect)}" does not support computing quantiles ' "without approximation error; set allow_relative_error to True to allow approximate quantiles." ) else: raise ValueError( f'The SQL engine dialect "{str(dialect)}" does not support computing quantiles with ' "approximation error; set allow_relative_error to False to disable approximate quantiles." )
def _get_column_quantiles_bigquery( column, quantiles: Iterable, selectable, sqlalchemy_engine ) -> list: # BigQuery does not support "WITHIN", so we need a special case for it selects: List[WithinGroup] = [ sa.func.percentile_disc(column, quantile).over() for quantile in quantiles ] quantiles_query: Select = sa.select(selects).select_from(selectable) try: quantiles_results: Row = sqlalchemy_engine.execute(quantiles_query).fetchone() return list(quantiles_results) except ProgrammingError as pe: exception_message: str = "An SQL syntax Exception occurred." exception_traceback: str = traceback.format_exc() exception_message += ( f'{type(pe).__name__}: "{str(pe)}". Traceback: "{exception_traceback}".' ) logger.error(exception_message) raise pe
def _get_column_quantiles_mssql( column, quantiles: Iterable, selectable, sqlalchemy_engine ) -> list: # mssql requires over(), so we add an empty over() clause selects: List[WithinGroup] = [ sa.func.percentile_disc(quantile).within_group(column.asc()).over() for quantile in quantiles ] quantiles_query: Select = sa.select(selects).select_from(selectable) try: quantiles_results: Row = sqlalchemy_engine.execute(quantiles_query).fetchone() return list(quantiles_results) except ProgrammingError as pe: exception_message: str = "An SQL syntax Exception occurred." exception_traceback: str = traceback.format_exc() exception_message += ( f'{type(pe).__name__}: "{str(pe)}". Traceback: "{exception_traceback}".' ) logger.error(exception_message) raise pe