def apply_limit_to_sql(cls, sql: str, limit: int, database) -> str: """ Alters the SQL statement to apply a LIMIT clause :param sql: SQL query :param limit: Maximum number of rows to be returned by the query :param database: Database instance :return: SQL query with limit clause """ # TODO: Fix circular import caused by importing Database if cls.limit_method == LimitMethod.WRAP_SQL: sql = sql.strip("\t\n ;") if database.backend == 'db2': qry = ( select("*") .select_from(TextAsFrom(text(sql), ["*"]).alias("inner_qry")) ) else: qry = ( select("*") .select_from(TextAsFrom(text(sql), ["*"]).alias("inner_qry")) .limit(limit) ) return database.compile_sqla_query(qry) elif LimitMethod.FORCE_LIMIT: if database.backend != 'db2': parsed_query = sql_parse.ParsedQuery(sql) sql = parsed_query.get_query_with_new_limit(limit) return sql
def runsql(self): """Runs arbitrary sql and returns and html table""" session = db.session() limit = 1000 data = json.loads(request.form.get('data')) sql = data.get('sql') database_id = data.get('database_id') mydb = session.query(models.Database).filter_by(id=database_id).first() content = "" if mydb: eng = mydb.get_sqla_engine() if limit: sql = sql.strip().strip(';') qry = (select('*').select_from( TextAsFrom(text(sql), ['*']).alias('inner_qry')).limit(limit)) sql = str( qry.compile(eng, compile_kwargs={"literal_binds": True})) try: df = pd.read_sql_query(sql=sql, con=eng) content = df.to_html( index=False, na_rep='', classes=("dataframe table table-striped table-bordered " "table-condensed sql_results")) except Exception as e: content = ('<div class="alert alert-danger">' "{}</div>").format(e.message) session.commit() return content
def get_from_clause(self): # Supporting arbitrary SQL statements in place of tables if self.sql: tp = self.get_template_processor() from_sql = tp.process_template(self.sql) return TextAsFrom(sa.text(from_sql), []).alias('expr_qry') return self.get_sqla_table()
def get_from_clause(self, template_processor=None): # Supporting arbitrary SQL statements in place of tables if self.sql: from_sql = self.sql if template_processor: from_sql = template_processor.process_template(from_sql) from_sql = sqlparse.format(from_sql, strip_comments=True) return TextAsFrom(sa.text(from_sql), []).alias('expr_qry') return self.get_sqla_table()
def wrap_sql_limit(self, sql, limit=1000): qry = ( select('*') .select_from( TextAsFrom(text(sql), ['*']) .alias('inner_qry') ).limit(limit) ) return self.compile_sqla_query(qry)
def _parse_from(self, sql): # If sql is a single word, it should be a table or cte # Otherwise, it should be a sql if ' ' in sql: return TextAsFrom(text(sql), []) elif sql in self.ctes: return self.ctes[sql] else: return sa.table(sql)
def get_from_clause(self, template_processor=None, db_engine_spec=None): # Supporting arbitrary SQL statements in place of tables if self.sql: from_sql = self.sql if template_processor: from_sql = template_processor.process_template(from_sql) if db_engine_spec: from_sql = db_engine_spec.escape_sql(from_sql) return TextAsFrom(sa.text(from_sql), []).alias('expr_qry') return self.get_sqla_table()
def get_from_clause( self, template_processor: Optional[BaseTemplateProcessor] = None ) -> Union[table, TextAsFrom]: # Supporting arbitrary SQL statements in place of tables if self.sql: from_sql = self.sql if template_processor: from_sql = template_processor.process_template(from_sql) from_sql = sqlparse.format(from_sql, strip_comments=True) return TextAsFrom(sa.text(from_sql), []).alias("expr_qry") return self.get_sqla_table()
def apply_limit_to_sql(cls, sql, limit, database): """Alters the SQL statement to apply a LIMIT clause""" if cls.limit_method == LimitMethod.WRAP_SQL: sql = sql.strip("\t\n ;") qry = (select("*").select_from( TextAsFrom(text(sql), ["*"]).alias("inner_qry")).limit(limit)) return database.compile_sqla_query(qry) elif LimitMethod.FORCE_LIMIT: parsed_query = sql_parse.ParsedQuery(sql) sql = parsed_query.get_query_with_new_limit(limit) return sql
def sql_json(self): """Runs arbitrary sql and returns and json""" session = db.session() limit = 1000 sql = request.form.get('sql') database_id = request.form.get('database_id') mydb = session.query(models.Database).filter_by(id=database_id).first() if not (self.can_access( 'all_datasource_access', 'all_datasource_access') or self.can_access('database_access', mydb.perm)): raise utils.CaravelSecurityException(_( "SQL Lab requires the `all_datasource_access` or " "specific DB permission")) error_msg = "" if not mydb: error_msg = "The database selected doesn't seem to exist" else: eng = mydb.get_sqla_engine() if limit: sql = sql.strip().strip(';') qry = ( select('*') .select_from(TextAsFrom(text(sql), ['*']) .alias('inner_qry')) .limit(limit) ) sql = '{}'.format(qry.compile( eng, compile_kwargs={"literal_binds": True})) try: df = pd.read_sql_query(sql=sql, con=eng) df = df.fillna(0) # TODO make sure NULL except Exception as e: logging.exception(e) error_msg = utils.error_msg_from_exception(e) session.commit() if error_msg: return Response( json.dumps({ 'error': error_msg, }), status=500, mimetype="application/json") else: data = { 'columns': [c for c in df.columns], 'data': df.to_dict(orient='records'), } return json.dumps( data, default=utils.json_int_dttm_ser, allow_nan=False)
def get_from_clause(self, template_processor=None): # Supporting arbitrary SQL statements in place of tables rs = None if self.sql: from_sql = self.sql if template_processor: from_sql = template_processor.process_template(from_sql) rs = TextAsFrom(sa.text(from_sql), []).alias('expr_qry') else : rs = self.get_sqla_table() if len(self.get_dim_acl_where)>0: rs = sa.select(columns="*").select_from(rs).where(and_(*self.get_dim_acl_where)) return rs
def runsql(self): """Runs arbitrary sql and returns and html table""" # TODO deprecate in favor on `sql_json` session = db.session() limit = 1000 data = json.loads(request.form.get('data')) sql = data.get('sql') database_id = data.get('database_id') mydb = session.query(models.Database).filter_by(id=database_id).first() if not (self.can_access( 'all_datasource_access', 'all_datasource_access') or self.can_access('database_access', mydb.perm)): raise utils.CaravelSecurityException(_( "SQL Lab requires the `all_datasource_access` or " "specific db permission")) content = "" if mydb: eng = mydb.get_sqla_engine() if limit: sql = sql.strip().strip(';') qry = ( select('*') .select_from(TextAsFrom(text(sql), ['*']) .alias('inner_qry')) .limit(limit) ) sql = '{}'.format(qry.compile( eng, compile_kwargs={"literal_binds": True})) try: df = pd.read_sql_query(sql=sql, con=eng) content = df.to_html( index=False, na_rep='', classes=( "dataframe table table-striped table-bordered " "table-condensed sql_results").split(' ')) except Exception as e: content = ( '<div class="alert alert-danger">' "{}</div>" ).format(e.message) session.commit() return content
def get_sqla_query( # sqla self, groupby, metrics, granularity, from_dttm, to_dttm, filter=None, # noqa is_timeseries=True, timeseries_limit=15, timeseries_limit_metric=None, row_limit=None, inner_from_dttm=None, inner_to_dttm=None, orderby=None, extras=None, columns=None, form_data=None): """Querying any sqla table from this common interface""" template_kwargs = { 'from_dttm': from_dttm, 'groupby': groupby, 'metrics': metrics, 'row_limit': row_limit, 'to_dttm': to_dttm, 'form_data': form_data, } template_processor = self.get_template_processor(**template_kwargs) # For backward compatibility if granularity not in self.dttm_cols: granularity = self.main_dttm_col cols = {col.column_name: col for col in self.columns} metrics_dict = {m.metric_name: m for m in self.metrics} if not granularity and is_timeseries: raise Exception( _("Datetime column not provided as part table configuration " "and is required by this type of chart")) for m in metrics: if m not in metrics_dict: raise Exception(_("Metric '{}' is not valid".format(m))) metrics_exprs = [metrics_dict.get(m).sqla_col for m in metrics] timeseries_limit_metric = metrics_dict.get(timeseries_limit_metric) timeseries_limit_metric_expr = None if timeseries_limit_metric: timeseries_limit_metric_expr = \ timeseries_limit_metric.sqla_col if metrics: main_metric_expr = metrics_exprs[0] else: main_metric_expr = literal_column("COUNT(*)").label("ccount") select_exprs = [] groupby_exprs = [] if groupby: select_exprs = [] inner_select_exprs = [] inner_groupby_exprs = [] for s in groupby: col = cols[s] outer = col.sqla_col inner = col.sqla_col.label(col.column_name + '__') groupby_exprs.append(outer) select_exprs.append(outer) inner_groupby_exprs.append(inner) inner_select_exprs.append(inner) elif columns: for s in columns: select_exprs.append(cols[s].sqla_col) metrics_exprs = [] if granularity: @compiles(ColumnClause) def visit_column(element, compiler, **kw): """Patch for sqlalchemy bug TODO: sqlalchemy 1.2 release should be doing this on its own. Patch only if the column clause is specific for DateTime set and granularity is selected. """ text = compiler.visit_column(element, **kw) try: if (element.is_literal and hasattr(element.type, 'python_type') and type(element.type) is DateTime): text = text.replace('%%', '%') except NotImplementedError: # Some elements raise NotImplementedError for python_type pass return text dttm_col = cols[granularity] time_grain = extras.get('time_grain_sqla') if is_timeseries: timestamp = dttm_col.get_timestamp_expression(time_grain) select_exprs += [timestamp] groupby_exprs += [timestamp] time_filter = dttm_col.get_time_filter(from_dttm, to_dttm) select_exprs += metrics_exprs qry = sa.select(select_exprs) # Supporting arbitrary SQL statements in place of tables if self.sql: from_sql = template_processor.process_template(self.sql) tbl = TextAsFrom(sa.text(from_sql), []).alias('expr_qry') else: tbl = self.get_sqla_table() if not columns: qry = qry.group_by(*groupby_exprs) where_clause_and = [] having_clause_and = [] for flt in filter: if not all([flt.get(s) for s in ['col', 'op', 'val']]): continue col = flt['col'] op = flt['op'] eq = flt['val'] col_obj = cols.get(col) if col_obj: if op in ('in', 'not in'): values = [] for v in eq: # For backwards compatibility and edge cases # where a column data type might have changed if isinstance(v, basestring): v = v.strip("'").strip('"') if col_obj.is_num: v = utils.string_to_num(v) # Removing empty strings and non numeric values # targeting numeric columns if v is not None: values.append(v) cond = col_obj.sqla_col.in_(values) if op == 'not in': cond = ~cond where_clause_and.append(cond) elif op == '==': where_clause_and.append(col_obj.sqla_col == eq) elif op == '!=': where_clause_and.append(col_obj.sqla_col != eq) elif op == '>': where_clause_and.append(col_obj.sqla_col > eq) elif op == '<': where_clause_and.append(col_obj.sqla_col < eq) elif op == '>=': where_clause_and.append(col_obj.sqla_col >= eq) elif op == '<=': where_clause_and.append(col_obj.sqla_col <= eq) elif op == 'LIKE': where_clause_and.append(col_obj.sqla_col.like(eq)) if extras: where = extras.get('where') if where: where = template_processor.process_template(where) where_clause_and += [sa.text('({})'.format(where))] having = extras.get('having') if having: having = template_processor.process_template(having) having_clause_and += [sa.text('({})'.format(having))] if granularity: qry = qry.where(and_(*([time_filter] + where_clause_and))) else: qry = qry.where(and_(*where_clause_and)) qry = qry.having(and_(*having_clause_and)) if groupby: qry = qry.order_by(desc(main_metric_expr)) elif orderby: for col, ascending in orderby: direction = asc if ascending else desc qry = qry.order_by(direction(col)) qry = qry.limit(row_limit) if is_timeseries and timeseries_limit and groupby: # some sql dialects require for order by expressions # to also be in the select clause -- others, e.g. vertica, # require a unique inner alias inner_main_metric_expr = main_metric_expr.label('mme_inner__') inner_select_exprs += [inner_main_metric_expr] subq = select(inner_select_exprs) subq = subq.select_from(tbl) inner_time_filter = dttm_col.get_time_filter( inner_from_dttm or from_dttm, inner_to_dttm or to_dttm, ) subq = subq.where(and_(*(where_clause_and + [inner_time_filter]))) subq = subq.group_by(*inner_groupby_exprs) ob = inner_main_metric_expr if timeseries_limit_metric_expr is not None: ob = timeseries_limit_metric_expr subq = subq.order_by(desc(ob)) subq = subq.limit(timeseries_limit) on_clause = [] for i, gb in enumerate(groupby): on_clause.append(groupby_exprs[i] == column(gb + '__')) tbl = tbl.join(subq.alias(), and_(*on_clause)) return qry.select_from(tbl)
def _build_cte(self, with_): ctes = {} for item in with_: alias, sql = dict_one(item) ctes[alias] = TextAsFrom(text(sql), []).cte(alias) return ctes