def get_filters(self, raw_filters): # noqa filters = None for flt in raw_filters: if not all(f in flt for f in ['col', 'op', 'val']): continue col = flt['col'] op = flt['op'] eq = flt['val'] cond = None if op in ('in', 'not in'): eq = [ types.replace("'", '').strip() if isinstance(types, string_types) else types for types in eq] elif not isinstance(flt['val'], string_types): eq = eq[0] if len(eq) > 0 else '' if col in self.num_cols: if op in ('in', 'not in'): eq = [utils.string_to_num(v) for v in eq] else: eq = utils.string_to_num(eq) if op == '==': cond = Dimension(col) == eq elif op == '!=': cond = ~(Dimension(col) == eq) elif op in ('in', 'not in'): fields = [] if len(eq) > 1: for s in eq: fields.append(Dimension(col) == s) cond = Filter(type="or", fields=fields) elif len(eq) == 1: cond = Dimension(col) == eq[0] if op == 'not in': cond = ~cond elif op == 'regex': cond = Filter(type="regex", pattern=eq, dimension=col) elif op == '>=': cond = Dimension(col) >= eq elif op == '<=': cond = Dimension(col) <= eq elif op == '>': cond = Dimension(col) > eq elif op == '<': cond = Dimension(col) < eq if filters: filters = Filter(type="and", fields=[ cond, filters ]) else: filters = cond return filters
def handle_single_value(v): # backward compatibility with previous <select> components if isinstance(v, basestring): v = v.strip('\t\n \'"') if target_column_is_numeric: # For backwards compatibility and edge cases # where a column data type might have changed v = utils.string_to_num(v) if v == '<NULL>': return None elif v == '<empty string>': return '' return v
def get_dim_acl_where(self): #维度验证 cols = {col.column_name: col for col in self.columns if self.has_col_access(col)} dim_acslist = security.get_permission_view_by_permission("dim_access") dim_acl_map = {} acl_where_clause_and = [] for ac in dim_acslist: if sm.has_access('dim_access', ac) and len(ac.split('_')) > 1 and ac.split('_')[0] in self.filterable_column_names : if dim_acl_map.has_key(ac.split('_')[0]): dim_acl_map[ac.split('_')[0]] += [ac.split('_')[1]] else : dim_acl_map[ac.split('_')[0]] = [ac.split('_')[1]] acl_filter=[] for dim in dim_acl_map : acl_filter += [{ 'col': dim, 'op': 'in', 'val': dim_acl_map[dim], }] for flt in acl_filter: if not all([flt.get(s) for s in ['col', 'op', 'val']]): continue col = flt['col'] op = flt['op'] eq = flt['val'] if col not in cols: raise Exception(("字段 '{}' 不存在,或没有权限访问".format(col))) col_obj = cols.get(col) if col_obj: if op in ('in', 'not in'): values = [] for v in eq: # For backwards compatibility and edge cases # where a column data type might have changed if isinstance(v, basestring): v = v.strip("'").strip('"') if col_obj.is_num: v = utils.string_to_num(v) # Removing empty strings and non numeric values # targeting numeric columns if v is not None: values.append(v) cond = col_obj.sqla_col.in_(values) acl_where_clause_and.append(cond) return acl_where_clause_and
def get_sqla_query( # sqla self, groupby, metrics, granularity, from_dttm, to_dttm, filter=None, # noqa is_timeseries=True, timeseries_limit=15, timeseries_limit_metric=None, row_limit=None, inner_from_dttm=None, inner_to_dttm=None, orderby=None, extras=None, columns=None, order_desc=True, prequeries=None, is_prequery=False, ): """Querying any sqla table from this common interface""" template_kwargs = { 'from_dttm': from_dttm, 'groupby': groupby, 'metrics': metrics, 'row_limit': row_limit, 'to_dttm': to_dttm, } template_processor = self.get_template_processor(**template_kwargs) db_engine_spec = self.database.db_engine_spec orderby = orderby or [] # For backward compatibility if granularity not in self.dttm_cols: granularity = self.main_dttm_col # Database spec supports join-free timeslot grouping time_groupby_inline = db_engine_spec.time_groupby_inline cols = {col.column_name: col for col in self.columns} metrics_dict = {m.metric_name: m for m in self.metrics} if not granularity and is_timeseries: raise Exception(_( 'Datetime column not provided as part table configuration ' 'and is required by this type of chart')) if not groupby and not metrics and not columns: raise Exception(_('Empty query?')) for m in metrics: if m not in metrics_dict: raise Exception(_("Metric '{}' is not valid".format(m))) metrics_exprs = [metrics_dict.get(m).sqla_col for m in metrics] if metrics_exprs: main_metric_expr = metrics_exprs[0] else: main_metric_expr = literal_column('COUNT(*)').label('ccount') select_exprs = [] groupby_exprs = [] if groupby: select_exprs = [] inner_select_exprs = [] inner_groupby_exprs = [] for s in groupby: col = cols[s] outer = col.sqla_col inner = col.sqla_col.label(col.column_name + '__') groupby_exprs.append(outer) select_exprs.append(outer) inner_groupby_exprs.append(inner) inner_select_exprs.append(inner) elif columns: for s in columns: select_exprs.append(cols[s].sqla_col) metrics_exprs = [] if granularity: dttm_col = cols[granularity] time_grain = extras.get('time_grain_sqla') time_filters = [] if is_timeseries: timestamp = dttm_col.get_timestamp_expression(time_grain) select_exprs += [timestamp] groupby_exprs += [timestamp] # Use main dttm column to support index with secondary dttm columns if db_engine_spec.time_secondary_columns and \ self.main_dttm_col in self.dttm_cols and \ self.main_dttm_col != dttm_col.column_name: time_filters.append(cols[self.main_dttm_col]. get_time_filter(from_dttm, to_dttm)) time_filters.append(dttm_col.get_time_filter(from_dttm, to_dttm)) select_exprs += metrics_exprs qry = sa.select(select_exprs) tbl = self.get_from_clause(template_processor, db_engine_spec) if not columns: qry = qry.group_by(*groupby_exprs) where_clause_and = [] having_clause_and = [] for flt in filter: if not all([flt.get(s) for s in ['col', 'op', 'val']]): continue col = flt['col'] op = flt['op'] eq = flt['val'] col_obj = cols.get(col) if col_obj: if op in ('in', 'not in'): values = [] for v in eq: # For backwards compatibility and edge cases # where a column data type might have changed if isinstance(v, basestring): v = v.strip("'").strip('"') if col_obj.is_num: v = utils.string_to_num(v) # Removing empty strings and non numeric values # targeting numeric columns if v is not None: values.append(v) cond = col_obj.sqla_col.in_(values) if op == 'not in': cond = ~cond where_clause_and.append(cond) else: if col_obj.is_num: eq = utils.string_to_num(flt['val']) if op == '==': where_clause_and.append(col_obj.sqla_col == eq) elif op == '!=': where_clause_and.append(col_obj.sqla_col != eq) elif op == '>': where_clause_and.append(col_obj.sqla_col > eq) elif op == '<': where_clause_and.append(col_obj.sqla_col < eq) elif op == '>=': where_clause_and.append(col_obj.sqla_col >= eq) elif op == '<=': where_clause_and.append(col_obj.sqla_col <= eq) elif op == 'LIKE': where_clause_and.append(col_obj.sqla_col.like(eq)) if extras: where = extras.get('where') if where: where = template_processor.process_template(where) where_clause_and += [sa.text('({})'.format(where))] having = extras.get('having') if having: having = template_processor.process_template(having) having_clause_and += [sa.text('({})'.format(having))] if granularity: qry = qry.where(and_(*(time_filters + where_clause_and))) else: qry = qry.where(and_(*where_clause_and)) qry = qry.having(and_(*having_clause_and)) if not orderby and not columns: orderby = [(main_metric_expr, not order_desc)] for col, ascending in orderby: direction = asc if ascending else desc qry = qry.order_by(direction(col)) if row_limit: qry = qry.limit(row_limit) if is_timeseries and \ timeseries_limit and groupby and not time_groupby_inline: if self.database.db_engine_spec.inner_joins: # some sql dialects require for order by expressions # to also be in the select clause -- others, e.g. vertica, # require a unique inner alias inner_main_metric_expr = main_metric_expr.label('mme_inner__') inner_select_exprs += [inner_main_metric_expr] subq = select(inner_select_exprs) subq = subq.select_from(tbl) inner_time_filter = dttm_col.get_time_filter( inner_from_dttm or from_dttm, inner_to_dttm or to_dttm, ) subq = subq.where(and_(*(where_clause_and + [inner_time_filter]))) subq = subq.group_by(*inner_groupby_exprs) ob = inner_main_metric_expr if timeseries_limit_metric: timeseries_limit_metric = metrics_dict.get(timeseries_limit_metric) ob = timeseries_limit_metric.sqla_col direction = desc if order_desc else asc subq = subq.order_by(direction(ob)) subq = subq.limit(timeseries_limit) on_clause = [] for i, gb in enumerate(groupby): on_clause.append( groupby_exprs[i] == column(gb + '__')) tbl = tbl.join(subq.alias(), and_(*on_clause)) else: # run subquery to get top groups subquery_obj = { 'prequeries': prequeries, 'is_prequery': True, 'is_timeseries': False, 'row_limit': timeseries_limit, 'groupby': groupby, 'metrics': metrics, 'granularity': granularity, 'from_dttm': inner_from_dttm or from_dttm, 'to_dttm': inner_to_dttm or to_dttm, 'filter': filter, 'orderby': orderby, 'extras': extras, 'columns': columns, 'order_desc': True, } result = self.query(subquery_obj) dimensions = [c for c in result.df.columns if c not in metrics] top_groups = self._get_top_groups(result.df, dimensions) qry = qry.where(top_groups) return qry.select_from(tbl)
def get_filters(raw_filters, num_cols): # noqa filters = None for flt in raw_filters: if not all(f in flt for f in ['col', 'op', 'val']): continue col = flt['col'] op = flt['op'] eq = flt['val'] cond = None if op in ('in', 'not in'): eq = [ types.replace('"', '').strip() if isinstance( types, string_types) else types for types in eq ] elif not isinstance(flt['val'], string_types): eq = eq[0] if eq and len(eq) > 0 else '' is_numeric_col = col in num_cols if is_numeric_col: if op in ('in', 'not in'): eq = [utils.string_to_num(v) for v in eq] else: eq = utils.string_to_num(eq) if op == '==': cond = Dimension(col) == eq elif op == '!=': cond = Dimension(col) != eq elif op in ('in', 'not in'): fields = [] # ignore the filter if it has no value if not len(eq): continue elif len(eq) == 1: cond = Dimension(col) == eq[0] else: for s in eq: fields.append(Dimension(col) == s) cond = Filter(type='or', fields=fields) if op == 'not in': cond = ~cond elif op == 'regex': cond = Filter(type='regex', pattern=eq, dimension=col) elif op == '>=': cond = Bound(col, eq, None, alphaNumeric=is_numeric_col) elif op == '<=': cond = Bound(col, None, eq, alphaNumeric=is_numeric_col) elif op == '>': cond = Bound( col, eq, None, lowerStrict=True, alphaNumeric=is_numeric_col, ) elif op == '<': cond = Bound( col, None, eq, upperStrict=True, alphaNumeric=is_numeric_col, ) if filters: filters = Filter(type='and', fields=[ cond, filters, ]) else: filters = cond return filters
def get_filters(raw_filters, num_cols): # noqa filters = None for flt in raw_filters: if not all(f in flt for f in ['col', 'op', 'val']): continue col = flt['col'] op = flt['op'] eq = flt['val'] cond = None if op in ('in', 'not in'): eq = [ types.replace('"', '').strip() if isinstance(types, string_types) else types for types in eq] elif not isinstance(flt['val'], string_types): eq = eq[0] if eq and len(eq) > 0 else '' is_numeric_col = col in num_cols if is_numeric_col: if op in ('in', 'not in'): eq = [utils.string_to_num(v) for v in eq] else: eq = utils.string_to_num(eq) if op == '==': cond = Dimension(col) == eq elif op == '!=': cond = Dimension(col) != eq elif op in ('in', 'not in'): fields = [] # ignore the filter if it has no value if not len(eq): continue elif len(eq) == 1: cond = Dimension(col) == eq[0] else: for s in eq: fields.append(Dimension(col) == s) cond = Filter(type='or', fields=fields) if op == 'not in': cond = ~cond elif op == 'regex': cond = Filter(type='regex', pattern=eq, dimension=col) elif op == '>=': cond = Bound(col, eq, None, alphaNumeric=is_numeric_col) elif op == '<=': cond = Bound(col, None, eq, alphaNumeric=is_numeric_col) elif op == '>': cond = Bound( col, eq, None, lowerStrict=True, alphaNumeric=is_numeric_col, ) elif op == '<': cond = Bound( col, None, eq, upperStrict=True, alphaNumeric=is_numeric_col, ) if filters: filters = Filter(type='and', fields=[ cond, filters, ]) else: filters = cond return filters
def get_sqla_query( # sqla self, groupby, metrics, granularity, from_dttm, to_dttm, filter=None, # noqa is_timeseries=True, timeseries_limit=15, timeseries_limit_metric=None, row_limit=None, inner_from_dttm=None, inner_to_dttm=None, orderby=None, extras=None, columns=None, form_data=None): """Querying any sqla table from this common interface""" template_kwargs = { 'from_dttm': from_dttm, 'groupby': groupby, 'metrics': metrics, 'row_limit': row_limit, 'to_dttm': to_dttm, 'form_data': form_data, } template_processor = self.get_template_processor(**template_kwargs) # For backward compatibility if granularity not in self.dttm_cols: granularity = self.main_dttm_col cols = {col.column_name: col for col in self.columns} metrics_dict = {m.metric_name: m for m in self.metrics} if not granularity and is_timeseries: raise Exception( _("Datetime column not provided as part table configuration " "and is required by this type of chart")) for m in metrics: if m not in metrics_dict: raise Exception(_("Metric '{}' is not valid".format(m))) metrics_exprs = [metrics_dict.get(m).sqla_col for m in metrics] timeseries_limit_metric = metrics_dict.get(timeseries_limit_metric) timeseries_limit_metric_expr = None if timeseries_limit_metric: timeseries_limit_metric_expr = \ timeseries_limit_metric.sqla_col if metrics: main_metric_expr = metrics_exprs[0] else: main_metric_expr = literal_column("COUNT(*)").label("ccount") select_exprs = [] groupby_exprs = [] if groupby: select_exprs = [] inner_select_exprs = [] inner_groupby_exprs = [] for s in groupby: col = cols[s] outer = col.sqla_col inner = col.sqla_col.label(col.column_name + '__') groupby_exprs.append(outer) select_exprs.append(outer) inner_groupby_exprs.append(inner) inner_select_exprs.append(inner) elif columns: for s in columns: select_exprs.append(cols[s].sqla_col) metrics_exprs = [] if granularity: @compiles(ColumnClause) def visit_column(element, compiler, **kw): """Patch for sqlalchemy bug TODO: sqlalchemy 1.2 release should be doing this on its own. Patch only if the column clause is specific for DateTime set and granularity is selected. """ text = compiler.visit_column(element, **kw) try: if (element.is_literal and hasattr(element.type, 'python_type') and type(element.type) is DateTime): text = text.replace('%%', '%') except NotImplementedError: # Some elements raise NotImplementedError for python_type pass return text dttm_col = cols[granularity] time_grain = extras.get('time_grain_sqla') if is_timeseries: timestamp = dttm_col.get_timestamp_expression(time_grain) select_exprs += [timestamp] groupby_exprs += [timestamp] time_filter = dttm_col.get_time_filter(from_dttm, to_dttm) select_exprs += metrics_exprs qry = sa.select(select_exprs) # Supporting arbitrary SQL statements in place of tables if self.sql: from_sql = template_processor.process_template(self.sql) tbl = TextAsFrom(sa.text(from_sql), []).alias('expr_qry') else: tbl = self.get_sqla_table() if not columns: qry = qry.group_by(*groupby_exprs) where_clause_and = [] having_clause_and = [] for flt in filter: if not all([flt.get(s) for s in ['col', 'op', 'val']]): continue col = flt['col'] op = flt['op'] eq = flt['val'] col_obj = cols.get(col) if col_obj: if op in ('in', 'not in'): values = [] for v in eq: # For backwards compatibility and edge cases # where a column data type might have changed if isinstance(v, basestring): v = v.strip("'").strip('"') if col_obj.is_num: v = utils.string_to_num(v) # Removing empty strings and non numeric values # targeting numeric columns if v is not None: values.append(v) cond = col_obj.sqla_col.in_(values) if op == 'not in': cond = ~cond where_clause_and.append(cond) elif op == '==': where_clause_and.append(col_obj.sqla_col == eq) elif op == '!=': where_clause_and.append(col_obj.sqla_col != eq) elif op == '>': where_clause_and.append(col_obj.sqla_col > eq) elif op == '<': where_clause_and.append(col_obj.sqla_col < eq) elif op == '>=': where_clause_and.append(col_obj.sqla_col >= eq) elif op == '<=': where_clause_and.append(col_obj.sqla_col <= eq) elif op == 'LIKE': where_clause_and.append(col_obj.sqla_col.like(eq)) if extras: where = extras.get('where') if where: where = template_processor.process_template(where) where_clause_and += [sa.text('({})'.format(where))] having = extras.get('having') if having: having = template_processor.process_template(having) having_clause_and += [sa.text('({})'.format(having))] if granularity: qry = qry.where(and_(*([time_filter] + where_clause_and))) else: qry = qry.where(and_(*where_clause_and)) qry = qry.having(and_(*having_clause_and)) if groupby: qry = qry.order_by(desc(main_metric_expr)) elif orderby: for col, ascending in orderby: direction = asc if ascending else desc qry = qry.order_by(direction(col)) qry = qry.limit(row_limit) if is_timeseries and timeseries_limit and groupby: # some sql dialects require for order by expressions # to also be in the select clause -- others, e.g. vertica, # require a unique inner alias inner_main_metric_expr = main_metric_expr.label('mme_inner__') inner_select_exprs += [inner_main_metric_expr] subq = select(inner_select_exprs) subq = subq.select_from(tbl) inner_time_filter = dttm_col.get_time_filter( inner_from_dttm or from_dttm, inner_to_dttm or to_dttm, ) subq = subq.where(and_(*(where_clause_and + [inner_time_filter]))) subq = subq.group_by(*inner_groupby_exprs) ob = inner_main_metric_expr if timeseries_limit_metric_expr is not None: ob = timeseries_limit_metric_expr subq = subq.order_by(desc(ob)) subq = subq.limit(timeseries_limit) on_clause = [] for i, gb in enumerate(groupby): on_clause.append(groupby_exprs[i] == column(gb + '__')) tbl = tbl.join(subq.alias(), and_(*on_clause)) return qry.select_from(tbl)
def get_sqla_query( # sqla self, groupby, metrics, granularity, from_dttm, to_dttm, filter=None, # noqa is_timeseries=True, timeseries_limit=15, timeseries_limit_metric=None, row_limit=None, inner_from_dttm=None, inner_to_dttm=None, orderby=None, extras=None, columns=None, order_desc=True, prequeries=None, is_prequery=False, ): """Querying any sqla table from this common interface""" template_kwargs = { 'from_dttm': from_dttm, 'groupby': groupby, 'metrics': metrics, 'row_limit': row_limit, 'to_dttm': to_dttm, } template_processor = self.get_template_processor(**template_kwargs) db_engine_spec = self.database.db_engine_spec orderby = orderby or [] # For backward compatibility if granularity not in self.dttm_cols: granularity = self.main_dttm_col # Database spec supports join-free timeslot grouping time_groupby_inline = db_engine_spec.time_groupby_inline cols = {col.column_name: col for col in self.columns} metrics_dict = {m.metric_name: m for m in self.metrics} if not granularity and is_timeseries: raise Exception(_( 'Datetime column not provided as part table configuration ' 'and is required by this type of chart')) if not groupby and not metrics and not columns: raise Exception(_('Empty query?')) for m in metrics: if m not in metrics_dict: raise Exception(_("Metric '{}' is not valid".format(m))) metrics_exprs = [metrics_dict.get(m).sqla_col for m in metrics] if metrics_exprs: main_metric_expr = metrics_exprs[0] else: main_metric_expr = literal_column('COUNT(*)').label('ccount') select_exprs = [] groupby_exprs = [] if groupby: select_exprs = [] inner_select_exprs = [] inner_groupby_exprs = [] for s in groupby: col = cols[s] outer = col.sqla_col inner = col.sqla_col.label(col.column_name + '__') groupby_exprs.append(outer) select_exprs.append(outer) inner_groupby_exprs.append(inner) inner_select_exprs.append(inner) elif columns: for s in columns: select_exprs.append(cols[s].sqla_col) metrics_exprs = [] if granularity: dttm_col = cols[granularity] time_grain = extras.get('time_grain_sqla') time_filters = [] if is_timeseries: timestamp = dttm_col.get_timestamp_expression(time_grain) select_exprs += [timestamp] groupby_exprs += [timestamp] # Use main dttm column to support index with secondary dttm columns if db_engine_spec.time_secondary_columns and \ self.main_dttm_col in self.dttm_cols and \ self.main_dttm_col != dttm_col.column_name: time_filters.append(cols[self.main_dttm_col]. get_time_filter(from_dttm, to_dttm)) time_filters.append(dttm_col.get_time_filter(from_dttm, to_dttm)) select_exprs += metrics_exprs qry = sa.select(select_exprs) tbl = self.get_from_clause(template_processor, db_engine_spec) if not columns: qry = qry.group_by(*groupby_exprs) where_clause_and = [] having_clause_and = [] for flt in filter: if not all([flt.get(s) for s in ['col', 'op', 'val']]): continue col = flt['col'] op = flt['op'] eq = flt['val'] col_obj = cols.get(col) if col_obj: if op in ('in', 'not in'): values = [] for v in eq: # For backwards compatibility and edge cases # where a column data type might have changed if isinstance(v, basestring): v = v.strip("'").strip('"') if col_obj.is_num: v = utils.string_to_num(v) # Removing empty strings and non numeric values # targeting numeric columns if v is not None: values.append(v) cond = col_obj.sqla_col.in_(values) if op == 'not in': cond = ~cond where_clause_and.append(cond) else: if col_obj.is_num: eq = utils.string_to_num(flt['val']) if op == '==': where_clause_and.append(col_obj.sqla_col == eq) elif op == '!=': where_clause_and.append(col_obj.sqla_col != eq) elif op == '>': where_clause_and.append(col_obj.sqla_col > eq) elif op == '<': where_clause_and.append(col_obj.sqla_col < eq) elif op == '>=': where_clause_and.append(col_obj.sqla_col >= eq) elif op == '<=': where_clause_and.append(col_obj.sqla_col <= eq) elif op == 'LIKE': where_clause_and.append(col_obj.sqla_col.like(eq)) if extras: where = extras.get('where') if where: where = template_processor.process_template(where) where_clause_and += [sa.text('({})'.format(where))] having = extras.get('having') if having: having = template_processor.process_template(having) having_clause_and += [sa.text('({})'.format(having))] if granularity: qry = qry.where(and_(*(time_filters + where_clause_and))) else: qry = qry.where(and_(*where_clause_and)) qry = qry.having(and_(*having_clause_and)) if not orderby and not columns: orderby = [(main_metric_expr, not order_desc)] for col, ascending in orderby: direction = asc if ascending else desc qry = qry.order_by(direction(col)) if row_limit: qry = qry.limit(row_limit) if is_timeseries and \ timeseries_limit and groupby and not time_groupby_inline: if self.database.db_engine_spec.inner_joins: # some sql dialects require for order by expressions # to also be in the select clause -- others, e.g. vertica, # require a unique inner alias inner_main_metric_expr = main_metric_expr.label('mme_inner__') inner_select_exprs += [inner_main_metric_expr] subq = select(inner_select_exprs) subq = subq.select_from(tbl) inner_time_filter = dttm_col.get_time_filter( inner_from_dttm or from_dttm, inner_to_dttm or to_dttm, ) subq = subq.where(and_(*(where_clause_and + [inner_time_filter]))) subq = subq.group_by(*inner_groupby_exprs) ob = inner_main_metric_expr if timeseries_limit_metric: timeseries_limit_metric = metrics_dict.get(timeseries_limit_metric) ob = timeseries_limit_metric.sqla_col direction = desc if order_desc else asc subq = subq.order_by(direction(ob)) subq = subq.limit(timeseries_limit) on_clause = [] for i, gb in enumerate(groupby): on_clause.append( groupby_exprs[i] == column(gb + '__')) tbl = tbl.join(subq.alias(), and_(*on_clause)) else: # run subquery to get top groups subquery_obj = { 'prequeries': prequeries, 'is_prequery': True, 'is_timeseries': False, 'row_limit': timeseries_limit, 'groupby': groupby, 'metrics': metrics, 'granularity': granularity, 'from_dttm': inner_from_dttm or from_dttm, 'to_dttm': inner_to_dttm or to_dttm, 'filter': filter, 'orderby': orderby, 'extras': extras, 'columns': columns, 'order_desc': True, } result = self.query(subquery_obj) dimensions = [c for c in result.df.columns if c not in metrics] top_groups = self._get_top_groups(result.df, dimensions) qry = qry.where(top_groups) return qry.select_from(tbl)
def get_sqla_query( # sqla self, groupby, metrics, granularity, from_dttm, to_dttm, filter=None, # noqa is_timeseries=True, timeseries_limit=15, timeseries_limit_metric=None, row_limit=None, inner_from_dttm=None, inner_to_dttm=None, orderby=None, extras=None, columns=None, form_data=None): """Querying any sqla table from this common interface""" template_kwargs = { 'from_dttm': from_dttm, 'groupby': groupby, 'metrics': metrics, 'row_limit': row_limit, 'to_dttm': to_dttm, 'form_data': form_data, } template_processor = self.get_template_processor(**template_kwargs) db_engine_spec = self.database.db_engine_spec # For backward compatibility if granularity not in self.dttm_cols: granularity = self.main_dttm_col # Database spec supports join-free timeslot grouping time_groupby_inline = db_engine_spec.time_groupby_inline cols = {col.column_name: col for col in self.columns if self.has_col_access(col)} metrics_dict = {m.metric_name: m for m in self.metrics if self.has_met_access(m)} if not granularity and is_timeseries: raise Exception(( "缺少时间字段")) if metrics: for m in metrics: if m not in metrics_dict: raise Exception(("字段 '{}' 不存在,或没有权限访问".format(m))) if groupby: for s in groupby: if s not in cols: raise Exception(("字段 '{}' 不存在,或没有权限访问".format(s))) if columns: for s in columns: if s not in cols: raise Exception(("字段 '{}' 不存在,或没有权限访问".format(s))) metrics_exprs = [metrics_dict.get(m).sqla_col for m in metrics] timeseries_limit_metric = metrics_dict.get(timeseries_limit_metric) timeseries_limit_metric_expr = None if timeseries_limit_metric: timeseries_limit_metric_expr = \ timeseries_limit_metric.sqla_col if metrics_exprs: main_metric_expr = metrics_exprs[0] else: main_metric_expr = literal_column("COUNT(*)").label("ccount") select_exprs = [] groupby_exprs = [] if groupby: select_exprs = [] inner_select_exprs = [] inner_groupby_exprs = [] for s in groupby: col = cols[s] outer = col.sqla_col inner = col.sqla_col.label(col.column_name + '__') groupby_exprs.append(outer) select_exprs.append(outer) inner_groupby_exprs.append(inner) inner_select_exprs.append(inner) elif columns: for s in columns: select_exprs.append(cols[s].sqla_col) metrics_exprs = [] if granularity: dttm_col = cols[granularity] time_grain = extras.get('time_grain_sqla') time_filters = [] if is_timeseries: timestamp = dttm_col.get_timestamp_expression(time_grain) select_exprs.insert(0,timestamp) groupby_exprs += [timestamp] # Use main dttm column to support index with secondary dttm columns if db_engine_spec.time_secondary_columns and \ self.main_dttm_col in self.dttm_cols and \ self.main_dttm_col != dttm_col.column_name: time_filters.append(cols[self.main_dttm_col]. get_time_filter(from_dttm, to_dttm)) time_filters.append(dttm_col.get_time_filter(from_dttm, to_dttm)) select_exprs += metrics_exprs qry = sa.select(select_exprs) tbl = self.get_from_clause(template_processor) if not columns: qry = qry.group_by(*groupby_exprs) where_clause_and = [] having_clause_and = [] filters = copy.deepcopy(filter) for flt in filters: if not all([flt.get(s) for s in ['col', 'op', 'val']]): continue col = flt['col'] op = flt['op'] eq = flt['val'] if col not in cols: raise Exception(("字段 '{}' 不存在,或没有权限访问".format(col))) col_obj = cols.get(col) if col_obj: if op in ('in', 'not in'): values = [] for v in eq: # For backwards compatibility and edge cases # where a column data type might have changed if isinstance(v, basestring): v = v.strip("'").strip('"') if col_obj.is_num: v = utils.string_to_num(v) # Removing empty strings and non numeric values # targeting numeric columns if v is not None: values.append(v) cond = col_obj.sqla_col.in_(values) if op == 'not in': cond = ~cond where_clause_and.append(cond) elif op == '==': where_clause_and.append(col_obj.sqla_col == eq) elif op == '!=': where_clause_and.append(col_obj.sqla_col != eq) elif op == '>': where_clause_and.append(col_obj.sqla_col > eq) elif op == '<': where_clause_and.append(col_obj.sqla_col < eq) elif op == '>=': where_clause_and.append(col_obj.sqla_col >= eq) elif op == '<=': where_clause_and.append(col_obj.sqla_col <= eq) elif op == 'start_with': where_clause_and.append(col_obj.sqla_col.like('%{}'.format(eq))) elif op == 'end_with': where_clause_and.append(col_obj.sqla_col.like('{}%'.format(eq))) elif op == 'LIKE': where_clause_and.append(col_obj.sqla_col.like('%{}%'.format(eq))) if extras: where = extras.get('where') if where: where = template_processor.process_template(where) where_clause_and += [sa.text('({})'.format(where))] having = extras.get('having') if having: having = template_processor.process_template(having) having_clause_and += [sa.text('({})'.format(having))] if granularity: qry = qry.where(and_(*(time_filters + where_clause_and))) else: qry = qry.where(and_(*where_clause_and)) qry = qry.having(and_(*having_clause_and)) if groupby: qry = qry.order_by(desc(main_metric_expr)) elif orderby: for col, ascending in orderby: direction = asc if ascending else desc qry = qry.order_by(direction(col)) if row_limit: qry = qry.limit(row_limit) if is_timeseries and \ timeseries_limit and groupby and not time_groupby_inline: # some sql dialects require for order by expressions # to also be in the select clause -- others, e.g. vertica, # require a unique inner alias inner_main_metric_expr = main_metric_expr.label('mme_inner__') inner_select_exprs += [inner_main_metric_expr] subq = select(inner_select_exprs) subq = subq.select_from(tbl) inner_time_filter = dttm_col.get_time_filter( inner_from_dttm or from_dttm, inner_to_dttm or to_dttm, ) subq = subq.where(and_(*(where_clause_and + [inner_time_filter]))) subq = subq.group_by(*inner_groupby_exprs) ob = inner_main_metric_expr if timeseries_limit_metric_expr is not None: ob = timeseries_limit_metric_expr subq = subq.order_by(desc(ob)) subq = subq.limit(timeseries_limit) on_clause = [] for i, gb in enumerate(groupby): on_clause.append( groupby_exprs[i] == column(gb + '__')) tbl = tbl.join(subq.alias(), and_(*on_clause)) return qry.select_from(tbl)