Exemplo n.º 1
0
    def apply_limit_to_sql(cls, sql: str, limit: int, database) -> str:
        """
        Alters the SQL statement to apply a LIMIT clause

        :param sql: SQL query
        :param limit: Maximum number of rows to be returned by the query
        :param database: Database instance
        :return: SQL query with limit clause
        """
        # TODO: Fix circular import caused by importing Database
        if cls.limit_method == LimitMethod.WRAP_SQL:
            sql = sql.strip("\t\n ;")
            if database.backend == 'db2':
                qry = (
                    select("*")
                    .select_from(TextAsFrom(text(sql), ["*"]).alias("inner_qry"))
                )
            else:
                qry = (
                    select("*")
                        .select_from(TextAsFrom(text(sql), ["*"]).alias("inner_qry"))
                        .limit(limit)
                )
            return database.compile_sqla_query(qry)
        elif LimitMethod.FORCE_LIMIT:
            if database.backend != 'db2':
                parsed_query = sql_parse.ParsedQuery(sql)
                sql = parsed_query.get_query_with_new_limit(limit)
        return sql
Exemplo n.º 2
0
 def runsql(self):
     """Runs arbitrary sql and returns and html table"""
     session = db.session()
     limit = 1000
     data = json.loads(request.form.get('data'))
     sql = data.get('sql')
     database_id = data.get('database_id')
     mydb = session.query(models.Database).filter_by(id=database_id).first()
     content = ""
     if mydb:
         eng = mydb.get_sqla_engine()
         if limit:
             sql = sql.strip().strip(';')
             qry = (select('*').select_from(
                 TextAsFrom(text(sql),
                            ['*']).alias('inner_qry')).limit(limit))
             sql = str(
                 qry.compile(eng, compile_kwargs={"literal_binds": True}))
         try:
             df = pd.read_sql_query(sql=sql, con=eng)
             content = df.to_html(
                 index=False,
                 na_rep='',
                 classes=("dataframe table table-striped table-bordered "
                          "table-condensed sql_results"))
         except Exception as e:
             content = ('<div class="alert alert-danger">'
                        "{}</div>").format(e.message)
     session.commit()
     return content
Exemplo n.º 3
0
    def get_from_clause(self):
        # Supporting arbitrary SQL statements in place of tables
        if self.sql:
            tp = self.get_template_processor()
            from_sql = tp.process_template(self.sql)
            return TextAsFrom(sa.text(from_sql), []).alias('expr_qry')

        return self.get_sqla_table()
Exemplo n.º 4
0
 def get_from_clause(self, template_processor=None):
     # Supporting arbitrary SQL statements in place of tables
     if self.sql:
         from_sql = self.sql
         if template_processor:
             from_sql = template_processor.process_template(from_sql)
         from_sql = sqlparse.format(from_sql, strip_comments=True)
         return TextAsFrom(sa.text(from_sql), []).alias('expr_qry')
     return self.get_sqla_table()
Exemplo n.º 5
0
 def wrap_sql_limit(self, sql, limit=1000):
     qry = (
         select('*')
         .select_from(
             TextAsFrom(text(sql), ['*'])
             .alias('inner_qry')
         ).limit(limit)
     )
     return self.compile_sqla_query(qry)
Exemplo n.º 6
0
 def _parse_from(self, sql):
     # If sql is a single word, it should be a table or cte
     # Otherwise, it should be a sql
     if ' ' in sql:
         return TextAsFrom(text(sql), [])
     elif sql in self.ctes:
         return self.ctes[sql]
     else:
         return sa.table(sql)
Exemplo n.º 7
0
 def get_from_clause(self, template_processor=None, db_engine_spec=None):
     # Supporting arbitrary SQL statements in place of tables
     if self.sql:
         from_sql = self.sql
         if template_processor:
             from_sql = template_processor.process_template(from_sql)
         if db_engine_spec:
             from_sql = db_engine_spec.escape_sql(from_sql)
         return TextAsFrom(sa.text(from_sql), []).alias('expr_qry')
     return self.get_sqla_table()
Exemplo n.º 8
0
 def get_from_clause(
     self, template_processor: Optional[BaseTemplateProcessor] = None
 ) -> Union[table, TextAsFrom]:
     # Supporting arbitrary SQL statements in place of tables
     if self.sql:
         from_sql = self.sql
         if template_processor:
             from_sql = template_processor.process_template(from_sql)
         from_sql = sqlparse.format(from_sql, strip_comments=True)
         return TextAsFrom(sa.text(from_sql), []).alias("expr_qry")
     return self.get_sqla_table()
Exemplo n.º 9
0
 def apply_limit_to_sql(cls, sql, limit, database):
     """Alters the SQL statement to apply a LIMIT clause"""
     if cls.limit_method == LimitMethod.WRAP_SQL:
         sql = sql.strip("\t\n ;")
         qry = (select("*").select_from(
             TextAsFrom(text(sql), ["*"]).alias("inner_qry")).limit(limit))
         return database.compile_sqla_query(qry)
     elif LimitMethod.FORCE_LIMIT:
         parsed_query = sql_parse.ParsedQuery(sql)
         sql = parsed_query.get_query_with_new_limit(limit)
     return sql
Exemplo n.º 10
0
    def sql_json(self):
        """Runs arbitrary sql and returns and json"""
        session = db.session()
        limit = 1000
        sql = request.form.get('sql')
        database_id = request.form.get('database_id')
        mydb = session.query(models.Database).filter_by(id=database_id).first()

        if not (self.can_access(
                'all_datasource_access', 'all_datasource_access') or
                self.can_access('database_access', mydb.perm)):
            raise utils.CaravelSecurityException(_(
                "SQL Lab requires the `all_datasource_access` or "
                "specific DB permission"))

        error_msg = ""
        if not mydb:
            error_msg = "The database selected doesn't seem to exist"
        else:
            eng = mydb.get_sqla_engine()
            if limit:
                sql = sql.strip().strip(';')
                qry = (
                    select('*')
                    .select_from(TextAsFrom(text(sql), ['*'])
                                 .alias('inner_qry'))
                    .limit(limit)
                )
                sql = '{}'.format(qry.compile(
                    eng, compile_kwargs={"literal_binds": True}))
            try:
                df = pd.read_sql_query(sql=sql, con=eng)
                df = df.fillna(0)  # TODO make sure NULL
            except Exception as e:
                logging.exception(e)
                error_msg = utils.error_msg_from_exception(e)

        session.commit()
        if error_msg:
            return Response(
                json.dumps({
                    'error': error_msg,
                }),
                status=500,
                mimetype="application/json")
        else:
            data = {
                'columns': [c for c in df.columns],
                'data': df.to_dict(orient='records'),
            }
            return json.dumps(
                data, default=utils.json_int_dttm_ser, allow_nan=False)
Exemplo n.º 11
0
 def get_from_clause(self, template_processor=None):
     # Supporting arbitrary SQL statements in place of tables
     rs = None
     if self.sql:
         from_sql = self.sql
         if template_processor:
             from_sql = template_processor.process_template(from_sql)
         rs = TextAsFrom(sa.text(from_sql), []).alias('expr_qry')
     else :
         rs = self.get_sqla_table()
     if len(self.get_dim_acl_where)>0:
         rs = sa.select(columns="*").select_from(rs).where(and_(*self.get_dim_acl_where))
     return rs
Exemplo n.º 12
0
    def runsql(self):
        """Runs arbitrary sql and returns and html table"""
        # TODO deprecate in favor on `sql_json`
        session = db.session()
        limit = 1000
        data = json.loads(request.form.get('data'))
        sql = data.get('sql')
        database_id = data.get('database_id')
        mydb = session.query(models.Database).filter_by(id=database_id).first()

        if not (self.can_access(
                'all_datasource_access', 'all_datasource_access') or
                self.can_access('database_access', mydb.perm)):
            raise utils.CaravelSecurityException(_(
                "SQL Lab requires the `all_datasource_access` or "
                "specific db permission"))

        content = ""
        if mydb:
            eng = mydb.get_sqla_engine()
            if limit:
                sql = sql.strip().strip(';')
                qry = (
                    select('*')
                    .select_from(TextAsFrom(text(sql), ['*'])
                                 .alias('inner_qry'))
                    .limit(limit)
                )
                sql = '{}'.format(qry.compile(
                    eng, compile_kwargs={"literal_binds": True}))
            try:
                df = pd.read_sql_query(sql=sql, con=eng)
                content = df.to_html(
                    index=False,
                    na_rep='',
                    classes=(
                        "dataframe table table-striped table-bordered "
                        "table-condensed sql_results").split(' '))
            except Exception as e:
                content = (
                    '<div class="alert alert-danger">'
                    "{}</div>"
                ).format(e.message)
        session.commit()
        return content
Exemplo n.º 13
0
    def get_sqla_query(  # sqla
            self,
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            filter=None,  # noqa
            is_timeseries=True,
            timeseries_limit=15,
            timeseries_limit_metric=None,
            row_limit=None,
            inner_from_dttm=None,
            inner_to_dttm=None,
            orderby=None,
            extras=None,
            columns=None,
            form_data=None):
        """Querying any sqla table from this common interface"""

        template_kwargs = {
            'from_dttm': from_dttm,
            'groupby': groupby,
            'metrics': metrics,
            'row_limit': row_limit,
            'to_dttm': to_dttm,
            'form_data': form_data,
        }
        template_processor = self.get_template_processor(**template_kwargs)

        # For backward compatibility
        if granularity not in self.dttm_cols:
            granularity = self.main_dttm_col

        cols = {col.column_name: col for col in self.columns}
        metrics_dict = {m.metric_name: m for m in self.metrics}

        if not granularity and is_timeseries:
            raise Exception(
                _("Datetime column not provided as part table configuration "
                  "and is required by this type of chart"))
        for m in metrics:
            if m not in metrics_dict:
                raise Exception(_("Metric '{}' is not valid".format(m)))
        metrics_exprs = [metrics_dict.get(m).sqla_col for m in metrics]
        timeseries_limit_metric = metrics_dict.get(timeseries_limit_metric)
        timeseries_limit_metric_expr = None
        if timeseries_limit_metric:
            timeseries_limit_metric_expr = \
                timeseries_limit_metric.sqla_col
        if metrics:
            main_metric_expr = metrics_exprs[0]
        else:
            main_metric_expr = literal_column("COUNT(*)").label("ccount")

        select_exprs = []
        groupby_exprs = []

        if groupby:
            select_exprs = []
            inner_select_exprs = []
            inner_groupby_exprs = []
            for s in groupby:
                col = cols[s]
                outer = col.sqla_col
                inner = col.sqla_col.label(col.column_name + '__')

                groupby_exprs.append(outer)
                select_exprs.append(outer)
                inner_groupby_exprs.append(inner)
                inner_select_exprs.append(inner)
        elif columns:
            for s in columns:
                select_exprs.append(cols[s].sqla_col)
            metrics_exprs = []

        if granularity:

            @compiles(ColumnClause)
            def visit_column(element, compiler, **kw):
                """Patch for sqlalchemy bug

                TODO: sqlalchemy 1.2 release should be doing this on its own.
                Patch only if the column clause is specific for DateTime
                set and granularity is selected.
                """
                text = compiler.visit_column(element, **kw)
                try:
                    if (element.is_literal
                            and hasattr(element.type, 'python_type')
                            and type(element.type) is DateTime):
                        text = text.replace('%%', '%')
                except NotImplementedError:
                    # Some elements raise NotImplementedError for python_type
                    pass
                return text

            dttm_col = cols[granularity]
            time_grain = extras.get('time_grain_sqla')

            if is_timeseries:
                timestamp = dttm_col.get_timestamp_expression(time_grain)
                select_exprs += [timestamp]
                groupby_exprs += [timestamp]

            time_filter = dttm_col.get_time_filter(from_dttm, to_dttm)

        select_exprs += metrics_exprs
        qry = sa.select(select_exprs)

        # Supporting arbitrary SQL statements in place of tables
        if self.sql:
            from_sql = template_processor.process_template(self.sql)
            tbl = TextAsFrom(sa.text(from_sql), []).alias('expr_qry')
        else:
            tbl = self.get_sqla_table()

        if not columns:
            qry = qry.group_by(*groupby_exprs)

        where_clause_and = []
        having_clause_and = []
        for flt in filter:
            if not all([flt.get(s) for s in ['col', 'op', 'val']]):
                continue
            col = flt['col']
            op = flt['op']
            eq = flt['val']
            col_obj = cols.get(col)
            if col_obj:
                if op in ('in', 'not in'):
                    values = []
                    for v in eq:
                        # For backwards compatibility and edge cases
                        # where a column data type might have changed
                        if isinstance(v, basestring):
                            v = v.strip("'").strip('"')
                            if col_obj.is_num:
                                v = utils.string_to_num(v)

                        # Removing empty strings and non numeric values
                        # targeting numeric columns
                        if v is not None:
                            values.append(v)
                    cond = col_obj.sqla_col.in_(values)
                    if op == 'not in':
                        cond = ~cond
                    where_clause_and.append(cond)
                elif op == '==':
                    where_clause_and.append(col_obj.sqla_col == eq)
                elif op == '!=':
                    where_clause_and.append(col_obj.sqla_col != eq)
                elif op == '>':
                    where_clause_and.append(col_obj.sqla_col > eq)
                elif op == '<':
                    where_clause_and.append(col_obj.sqla_col < eq)
                elif op == '>=':
                    where_clause_and.append(col_obj.sqla_col >= eq)
                elif op == '<=':
                    where_clause_and.append(col_obj.sqla_col <= eq)
                elif op == 'LIKE':
                    where_clause_and.append(col_obj.sqla_col.like(eq))
        if extras:
            where = extras.get('where')
            if where:
                where = template_processor.process_template(where)
                where_clause_and += [sa.text('({})'.format(where))]
            having = extras.get('having')
            if having:
                having = template_processor.process_template(having)
                having_clause_and += [sa.text('({})'.format(having))]
        if granularity:
            qry = qry.where(and_(*([time_filter] + where_clause_and)))
        else:
            qry = qry.where(and_(*where_clause_and))
        qry = qry.having(and_(*having_clause_and))
        if groupby:
            qry = qry.order_by(desc(main_metric_expr))
        elif orderby:
            for col, ascending in orderby:
                direction = asc if ascending else desc
                qry = qry.order_by(direction(col))

        qry = qry.limit(row_limit)

        if is_timeseries and timeseries_limit and groupby:
            # some sql dialects require for order by expressions
            # to also be in the select clause -- others, e.g. vertica,
            # require a unique inner alias
            inner_main_metric_expr = main_metric_expr.label('mme_inner__')
            inner_select_exprs += [inner_main_metric_expr]
            subq = select(inner_select_exprs)
            subq = subq.select_from(tbl)
            inner_time_filter = dttm_col.get_time_filter(
                inner_from_dttm or from_dttm,
                inner_to_dttm or to_dttm,
            )
            subq = subq.where(and_(*(where_clause_and + [inner_time_filter])))
            subq = subq.group_by(*inner_groupby_exprs)
            ob = inner_main_metric_expr
            if timeseries_limit_metric_expr is not None:
                ob = timeseries_limit_metric_expr
            subq = subq.order_by(desc(ob))
            subq = subq.limit(timeseries_limit)
            on_clause = []
            for i, gb in enumerate(groupby):
                on_clause.append(groupby_exprs[i] == column(gb + '__'))

            tbl = tbl.join(subq.alias(), and_(*on_clause))

        return qry.select_from(tbl)
Exemplo n.º 14
0
 def _build_cte(self, with_):
     ctes = {}
     for item in with_:
         alias, sql = dict_one(item)
         ctes[alias] = TextAsFrom(text(sql), []).cte(alias)
     return ctes