def _make_proxy(self, selectable, name=None, attach=True, name_is_truncatable=False, **kw): co = ColumnClause(self.name, self.type) co._proxies = [self] if selectable._is_clone_of is not None: co._is_clone_of = \ selectable._is_clone_of.columns.get(co.key) if attach: selectable._columns[co.key] = co return co
def _make_proxy( self, selectable, name=None, attach=True, name_is_truncatable=False, **kw ): if self.name == self.function.name: name = selectable.name else: name = self.name co = ColumnClause(name, self.type) co.key = self.name co._proxies = [self] if selectable._is_clone_of is not None: co._is_clone_of = selectable._is_clone_of.columns.get(co.key) co.table = selectable co.named_with_table = False if attach: selectable._columns[co.key] = co return co
def query( # sqla self, groupby, metrics, granularity, from_dttm, to_dttm, filter=None, # noqa is_timeseries=True, timeseries_limit=15, row_limit=None, inner_from_dttm=None, inner_to_dttm=None, extras=None, columns=None): """Querying any sqla table from this common interface""" # For backward compatibility if granularity not in self.dttm_cols: granularity = self.main_dttm_col cols = {col.column_name: col for col in self.columns} qry_start_dttm = datetime.now() if not granularity and is_timeseries: raise Exception( "Datetime column not provided as part table configuration " "and is required by this type of chart") metrics_exprs = [ literal_column(m.expression).label(m.metric_name) for m in self.metrics if m.metric_name in metrics ] if metrics: main_metric_expr = literal_column([ m.expression for m in self.metrics if m.metric_name == metrics[0] ][0]) else: main_metric_expr = literal_column("COUNT(*)") select_exprs = [] groupby_exprs = [] if groupby: select_exprs = [] inner_select_exprs = [] inner_groupby_exprs = [] for s in groupby: col = cols[s] expr = col.expression if expr: outer = literal_column(expr).label(s) inner = literal_column(expr).label('__' + s) else: outer = column(s).label(s) inner = column(s).label('__' + s) groupby_exprs.append(outer) select_exprs.append(outer) inner_groupby_exprs.append(inner) inner_select_exprs.append(inner) elif columns: for s in columns: select_exprs.append(s) metrics_exprs = [] if granularity: dttm_expr = cols[granularity].expression or granularity timestamp = literal_column(dttm_expr).label('timestamp') # Transforming time grain into an expression based on configuration time_grain_sqla = extras.get('time_grain_sqla') if time_grain_sqla: udf = self.database.grains_dict().get(time_grain_sqla, '{col}') timestamp_grain = literal_column( udf.function.format(col=dttm_expr)).label('timestamp') else: timestamp_grain = timestamp if is_timeseries: select_exprs += [timestamp_grain] groupby_exprs += [timestamp_grain] tf = '%Y-%m-%d %H:%M:%S.%f' time_filter = [ timestamp >= from_dttm.strftime(tf), timestamp <= to_dttm.strftime(tf), ] inner_time_filter = copy(time_filter) if inner_from_dttm: inner_time_filter[0] = timestamp >= inner_from_dttm.strftime( tf) if inner_to_dttm: inner_time_filter[1] = timestamp <= inner_to_dttm.strftime(tf) select_exprs += metrics_exprs qry = select(select_exprs) from_clause = table(self.table_name) if not columns: qry = qry.group_by(*groupby_exprs) where_clause_and = [] having_clause_and = [] for col, op, eq in filter: col_obj = cols[col] if op in ('in', 'not in'): values = eq.split(",") if col_obj.expression: cond = ColumnClause(col_obj.expression, is_literal=True).in_(values) else: cond = column(col).in_(values) if op == 'not in': cond = ~cond where_clause_and.append(cond) if extras and 'where' in extras: where_clause_and += [text(extras['where'])] if extras and 'having' in extras: having_clause_and += [text(extras['having'])] if granularity: qry = qry.where(and_(*(time_filter + where_clause_and))) else: qry = qry.where(and_(*where_clause_and)) qry = qry.having(and_(*having_clause_and)) if groupby: qry = qry.order_by(desc(main_metric_expr)) qry = qry.limit(row_limit) if timeseries_limit and groupby: subq = select(inner_select_exprs) subq = subq.select_from(table(self.table_name)) subq = subq.where(and_(*(where_clause_and + inner_time_filter))) subq = subq.group_by(*inner_groupby_exprs) subq = subq.order_by(desc(main_metric_expr)) subq = subq.limit(timeseries_limit) on_clause = [] for i, gb in enumerate(groupby): on_clause.append(groupby_exprs[i] == column("__" + gb)) from_clause = from_clause.join(subq.alias(), and_(*on_clause)) qry = qry.select_from(from_clause) engine = self.database.get_sqla_engine() sql = "{}".format( qry.compile(engine, compile_kwargs={"literal_binds": True})) df = pd.read_sql_query(sql=sql, con=engine) sql = sqlparse.format(sql, reindent=True) return QueryResult(df=df, duration=datetime.now() - qry_start_dttm, query=sql)
def query( self, groupby, metrics, granularity, from_dttm, to_dttm, limit_spec=None, filter=None, is_timeseries=True, timeseries_limit=15, row_limit=None, inner_from_dttm=None, inner_to_dttm=None, extras=None): cols = {col.column_name: col for col in self.columns} qry_start_dttm = datetime.now() if not self.main_dttm_col: raise Exception( "Datetime column not provided as part table configuration") timestamp = literal_column( self.main_dttm_col).label('timestamp') metrics_exprs = [ literal_column(m.expression).label(m.metric_name) for m in self.metrics if m.metric_name in metrics] if metrics: main_metric_expr = literal_column([ m.expression for m in self.metrics if m.metric_name == metrics[0]][0]) else: main_metric_expr = literal_column("COUNT(*)") select_exprs = [] groupby_exprs = [] if groupby: select_exprs = [literal_column(s) for s in groupby] select_exprs = [] groupby_exprs = [] inner_select_exprs = [] inner_groupby_exprs = [] for s in groupby: col = cols[s] expr = col.expression if expr: outer = ColumnClause(expr, is_literal=True).label(s) inner = ColumnClause(expr, is_literal=True).label('__' + s) else: outer = literal_column(s).label(s) inner = literal_column(s).label('__' + s) groupby_exprs.append(outer) select_exprs.append(outer) inner_groupby_exprs.append(inner) inner_select_exprs.append(inner) if granularity != "all": select_exprs += [timestamp] groupby_exprs += [timestamp] select_exprs += metrics_exprs qry = select(select_exprs) from_clause = table(self.table_name) qry = qry.group_by(*groupby_exprs) time_filter = [ timestamp >= from_dttm.isoformat(), timestamp <= to_dttm.isoformat(), ] inner_time_filter = copy(time_filter) if inner_from_dttm: inner_time_filter[0] = timestamp >= inner_from_dttm.isoformat() if inner_to_dttm: inner_time_filter[1] = timestamp <= inner_to_dttm.isoformat() where_clause_and = [] for col, op, eq in filter: col_obj = cols[col] if op in ('in', 'not in'): values = eq.split(",") if col_obj.expression: cond = ColumnClause( col_obj.expression, is_literal=True).in_(values) else: cond = literal_column(col).in_(values) if op == 'not in': cond = ~cond where_clause_and.append(cond) if extras and 'where' in extras: where_clause_and += [text(extras['where'])] qry = qry.where(and_(*(time_filter + where_clause_and))) qry = qry.order_by(desc(main_metric_expr)) qry = qry.limit(row_limit) if timeseries_limit and groupby: subq = select(inner_select_exprs) subq = subq.select_from(table(self.table_name)) subq = subq.where(and_(*(where_clause_and + inner_time_filter))) subq = subq.group_by(*inner_groupby_exprs) subq = subq.order_by(desc(main_metric_expr)) subq = subq.limit(timeseries_limit) on_clause = [] for i, gb in enumerate(groupby): on_clause.append( groupby_exprs[i] == literal_column("__" + gb)) from_clause = from_clause.join(subq.alias(), and_(*on_clause)) qry = qry.select_from(from_clause) engine = self.database.get_sqla_engine() sql = str(qry.compile(engine, compile_kwargs={"literal_binds": True})) print sql df = read_sql_query( sql=sql, con=engine ) sql = sqlparse.format(sql, reindent=True) return QueryResult( df=df, duration=datetime.now() - qry_start_dttm, query=sql)
def query(self, groupby, metrics, granularity, from_dttm, to_dttm, custom_query, limit_spec=None, filter=None, is_timeseries=True, timeseries_limit=15, row_limit=None, inner_from_dttm=None, inner_to_dttm=None, extras=None, columns=None): qry_start_dttm = datetime.now() if not custom_query: # For backward compatibility if granularity not in self.dttm_cols: granularity = self.main_dttm_col cols = {col.column_name: col for col in self.columns} if not self.main_dttm_col: raise Exception( "Datetime column not provided as part table configuration") dttm_expr = cols[granularity].expression if dttm_expr: timestamp = ColumnClause(dttm_expr, is_literal=True).label('timestamp') else: timestamp = literal_column(granularity).label('timestamp') metrics_exprs = [ literal_column(m.expression).label(m.metric_name) for m in self.metrics if m.metric_name in metrics ] if metrics: main_metric_expr = literal_column([ m.expression for m in self.metrics if m.metric_name == metrics[0] ][0]) else: main_metric_expr = literal_column("COUNT(*)") groupby_exprs = [] select_exprs = [] if groupby: inner_select_exprs = [] inner_groupby_exprs = [] for s in groupby: col = cols[s] expr = col.expression if expr: outer = ColumnClause(expr, is_literal=True).label(s) inner = ColumnClause(expr, is_literal=True).label('__' + s) else: outer = column(s).label(s) inner = column(s).label('__' + s) groupby_exprs.append(outer) select_exprs.append(outer) inner_groupby_exprs.append(inner) inner_select_exprs.append(inner) elif columns: for s in columns: select_exprs.append(s) metrics_exprs = [] if is_timeseries: select_exprs += [timestamp] groupby_exprs += [timestamp] select_exprs += metrics_exprs qry = select(select_exprs) from_clause = table(self.table_name) if not columns: qry = qry.group_by(*groupby_exprs) time_filter = [ timestamp >= from_dttm.isoformat(), timestamp <= to_dttm.isoformat(), ] inner_time_filter = copy(time_filter) if inner_from_dttm: inner_time_filter[0] = timestamp >= inner_from_dttm.isoformat() if inner_to_dttm: inner_time_filter[1] = timestamp <= inner_to_dttm.isoformat() where_clause_and = [] having_clause_and = [] for col, op, eq in filter: col_obj = cols[col] if op in ('in', 'not in'): values = eq.split(",") if col_obj.expression: cond = ColumnClause(col_obj.expression, is_literal=True).in_(values) else: cond = column(col).in_(values) if op == 'not in': cond = ~cond where_clause_and.append(cond) if extras and 'where' in extras: where_clause_and += [text(extras['where'])] if extras and 'having' in extras: having_clause_and += [text(extras['having'])] qry = qry.where(and_(*(time_filter + where_clause_and))) qry = qry.having(and_(*having_clause_and)) if groupby: qry = qry.order_by(desc(main_metric_expr)) qry = qry.limit(row_limit) if timeseries_limit and groupby: subq = select(inner_select_exprs) subq = subq.select_from(table(self.table_name)) subq = subq.where(and_(*(where_clause_and + inner_time_filter))) subq = subq.group_by(*inner_groupby_exprs) subq = subq.order_by(desc(main_metric_expr)) subq = subq.limit(timeseries_limit) on_clause = [] for i, gb in enumerate(groupby): on_clause.append(groupby_exprs[i] == column("__" + gb)) from_clause = from_clause.join(subq.alias(), and_(*on_clause)) qry = qry.select_from(from_clause) engine = self.database.get_sqla_engine() sql = str( qry.compile(engine, compile_kwargs={"literal_binds": True})) df = read_sql_query(sql=sql, con=engine) textwrap.dedent(sql) else: """ Legacy way of querying by building a SQL string without using the sqlalchemy expression API (new approach which supports all dialects) """ engine = self.database.get_sqla_engine() sql = custom_query.format(**locals()) df = read_sql_query(sql=sql, con=engine) textwrap.dedent(sql) return QueryResult(df=df, duration=datetime.now() - qry_start_dttm, query=sql)