def cache_key(self, query_obj: QueryObject, **kwargs: Any) -> Optional[str]: extra_cache_keys = self.datasource.get_extra_cache_keys( query_obj.to_dict()) cache_key = (query_obj.cache_key( datasource=self.datasource.uid, extra_cache_keys=extra_cache_keys, rls=security_manager.get_rls_ids(self.datasource) if is_feature_enabled("ROW_LEVEL_SECURITY") and self.datasource.is_rls_supported else [], changed_on=self.datasource.changed_on, **kwargs) if query_obj else None) return cache_key
def cache_key(self, query_obj: QueryObject, **kwargs: Any) -> Optional[str]: extra_cache_keys = self.datasource.get_extra_cache_keys( query_obj.to_dict()) logger.info("EEEEEEEEEEEE {}".format(extra_cache_keys)) cache_key = (query_obj.cache_key( datasource=self.datasource.uid, extra_cache_keys=extra_cache_keys, rls=security_manager.get_rls_ids(self.datasource) if config["ENABLE_ROW_LEVEL_SECURITY"] and self.datasource.is_rls_supported else [], changed_on=self.datasource.changed_on, **kwargs) if query_obj else None) return cache_key
def get_query_result(self, query_object: QueryObject) -> Dict[str, Any]: """Returns a pandas dataframe based on the query object""" # Here, we assume that all the queries will use the same datasource, which is # a valid assumption for current setting. In the long term, we may # support multiple queries from different data sources. timestamp_format = None if self.datasource.type == "table": dttm_col = self.datasource.get_column(query_object.granularity) if dttm_col: timestamp_format = dttm_col.python_date_format # The datasource here can be different backend but the interface is common result = self.datasource.query(query_object.to_dict()) df = result.df # Transform the timestamp we received from database to pandas supported # datetime format. If no python_date_format is specified, the pattern will # be considered as the default ISO date format # If the datetime format is unix, the parse will use the corresponding # parsing logic if not df.empty: if DTTM_ALIAS in df.columns: if timestamp_format in ("epoch_s", "epoch_ms"): # Column has already been formatted as a timestamp. df[DTTM_ALIAS] = df[DTTM_ALIAS].apply(pd.Timestamp) else: df[DTTM_ALIAS] = pd.to_datetime( df[DTTM_ALIAS], utc=False, format=timestamp_format ) if self.datasource.offset: df[DTTM_ALIAS] += timedelta(hours=self.datasource.offset) df[DTTM_ALIAS] += query_object.time_shift if self.enforce_numerical_metrics: self.df_metrics_to_num(df, query_object) df.replace([np.inf, -np.inf], np.nan) df = query_object.exec_post_processing(df) return { "query": result.query, "status": result.status, "error_message": result.error_message, "df": df, }
def get_single_payload( self, query_obj: QueryObject, force_cached: Optional[bool] = False, ) -> Dict[str, Any]: """Return results payload for a single quey""" if self.result_type == utils.ChartDataResultType.QUERY: return { "query": self.datasource.get_query_str(query_obj.to_dict()), "language": self.datasource.query_language, } if self.result_type == utils.ChartDataResultType.SAMPLES: row_limit = query_obj.row_limit or math.inf query_obj = copy.copy(query_obj) query_obj.is_timeseries = False query_obj.orderby = [] query_obj.groupby = [] query_obj.metrics = [] query_obj.post_processing = [] query_obj.row_limit = min(row_limit, config["SAMPLES_ROW_LIMIT"]) query_obj.row_offset = 0 query_obj.columns = [ o.column_name for o in self.datasource.columns ] payload = self.get_df_payload(query_obj, force_cached=force_cached) df = payload["df"] status = payload["status"] if status != utils.QueryStatus.FAILED: payload["colnames"] = list(df.columns) payload["coltypes"] = utils.extract_dataframe_dtypes(df) payload["data"] = self.get_data(df) del payload["df"] filters = query_obj.filter filter_columns = cast(List[str], [flt.get("col") for flt in filters]) columns = set(self.datasource.column_names) applied_time_columns, rejected_time_columns = utils.get_time_filter_status( self.datasource, query_obj.applied_time_extras) payload["applied_filters"] = [{ "column": col } for col in filter_columns if col in columns] + applied_time_columns payload["rejected_filters"] = [{ "reason": "not_in_datasource", "column": col } for col in filter_columns if col not in columns ] + rejected_time_columns if (self.result_type == utils.ChartDataResultType.RESULTS and status != utils.QueryStatus.FAILED): return {"data": payload["data"]} return payload
def query_cache_key(self, query_obj: QueryObject, **kwargs: Any) -> Optional[str]: """ Returns a QueryObject cache key for objects in self.queries """ datasource = self._qc_datasource extra_cache_keys = datasource.get_extra_cache_keys(query_obj.to_dict()) cache_key = (query_obj.cache_key( datasource=datasource.uid, extra_cache_keys=extra_cache_keys, rls=security_manager.get_rls_cache_key(datasource), changed_on=datasource.changed_on, **kwargs, ) if query_obj else None) return cache_key
def create( # pylint: disable=too-many-arguments self, parent_result_type: ChartDataResultType, datasource: Optional[DatasourceDict] = None, extras: Optional[Dict[str, Any]] = None, row_limit: Optional[int] = None, time_range: Optional[str] = None, time_shift: Optional[str] = None, **kwargs: Any, ) -> QueryObject: datasource_model_instance = None if datasource: datasource_model_instance = self._convert_to_model(datasource) processed_extras = self._process_extras(extras) result_type = kwargs.setdefault("result_type", parent_result_type) row_limit = self._process_row_limit(row_limit, result_type) from_dttm, to_dttm = self._get_dttms(time_range, time_shift, processed_extras) kwargs["from_dttm"] = from_dttm kwargs["to_dttm"] = to_dttm return QueryObject( datasource=datasource_model_instance, extras=extras, row_limit=row_limit, time_range=time_range, time_shift=time_shift, **kwargs, )
def get_query_result(self, query_object: QueryObject) -> QueryResult: """Returns a pandas dataframe based on the query object""" query_context = self._query_context # Here, we assume that all the queries will use the same datasource, which is # a valid assumption for current setting. In the long term, we may # support multiple queries from different data sources. # The datasource here can be different backend but the interface is common # pylint: disable=import-outside-toplevel from superset.models.sql_lab import Query query = "" if isinstance(query_context.datasource, Query): # todo(hugh): add logic to manage all sip68 models here result = query_context.datasource.exc_query(query_object.to_dict()) else: result = query_context.datasource.query(query_object.to_dict()) query = result.query + ";\n\n" df = result.df # Transform the timestamp we received from database to pandas supported # datetime format. If no python_date_format is specified, the pattern will # be considered as the default ISO date format # If the datetime format is unix, the parse will use the corresponding # parsing logic if not df.empty: df = self.normalize_df(df, query_object) if query_object.time_offsets: time_offsets = self.processing_time_offsets(df, query_object) df = time_offsets["df"] queries = time_offsets["queries"] query += ";\n\n".join(queries) query += ";\n\n" # Re-raising QueryObjectValidationError try: df = query_object.exec_post_processing(df) except InvalidPostProcessingError as ex: raise QueryObjectValidationError from ex result.df = df result.query = query result.from_dttm = query_object.from_dttm result.to_dttm = query_object.to_dttm return result
def get_single_payload(self, query_obj: QueryObject) -> Dict[str, Any]: """Returns a payload of metadata and data""" if self.result_type == utils.ChartDataResultType.QUERY: return { "query": self.datasource.get_query_str(query_obj.to_dict()), "language": self.datasource.query_language, } if self.result_type == utils.ChartDataResultType.SAMPLES: row_limit = query_obj.row_limit or math.inf query_obj = copy.copy(query_obj) query_obj.groupby = [] query_obj.metrics = [] query_obj.post_processing = [] query_obj.row_limit = min(row_limit, config["SAMPLES_ROW_LIMIT"]) query_obj.row_offset = 0 query_obj.columns = [o.column_name for o in self.datasource.columns] payload = self.get_df_payload(query_obj) df = payload["df"] status = payload["status"] if status != utils.QueryStatus.FAILED: if df.empty: payload["error"] = "No data" else: payload["data"] = self.get_data(df) del payload["df"] if self.result_type == utils.ChartDataResultType.RESULTS: return {"data": payload["data"]} return payload
def query_cache_key(self, query_obj: QueryObject, **kwargs: Any) -> Optional[str]: """ Returns a QueryObject cache key for objects in self.queries """ datasource = self._qc_datasource extra_cache_keys = datasource.get_extra_cache_keys(query_obj.to_dict()) cache_key = (query_obj.cache_key( datasource=datasource.uid, extra_cache_keys=extra_cache_keys, rls=security_manager.get_rls_ids(datasource) if is_feature_enabled("ROW_LEVEL_SECURITY") and datasource.is_rls_supported else [], changed_on=datasource.changed_on, **kwargs, ) if query_obj else None) return cache_key
def _get_drill_detail( query_context: QueryContext, query_obj: QueryObject, force_cached: bool = False ) -> Dict[str, Any]: # todo(yongjie): Remove this function, # when determining whether samples should be applied to the time filter. datasource = _get_datasource(query_context, query_obj) query_obj = copy.copy(query_obj) query_obj.is_timeseries = False query_obj.orderby = [] query_obj.metrics = None query_obj.post_processing = [] qry_obj_cols = [] for o in datasource.columns: if isinstance(o, dict): qry_obj_cols.append(o.get("column_name")) else: qry_obj_cols.append(o.column_name) query_obj.columns = qry_obj_cols return _get_full(query_context, query_obj, force_cached)
def _get_query( query_context: QueryContext, query_obj: QueryObject, _: bool, ) -> Dict[str, Any]: datasource = _get_datasource(query_context, query_obj) result = {"language": datasource.query_language} try: result["query"] = datasource.get_query_str(query_obj.to_dict()) except QueryObjectValidationError as err: result["error"] = err.message return result
def _get_samples(query_context: QueryContext, query_obj: QueryObject, force_cached: bool = False) -> Dict[str, Any]: datasource = _get_datasource(query_context, query_obj) query_obj = copy.copy(query_obj) query_obj.is_timeseries = False query_obj.orderby = [] query_obj.metrics = None query_obj.post_processing = [] query_obj.columns = [o.column_name for o in datasource.columns] query_obj.from_dttm = None query_obj.to_dttm = None return _get_full(query_context, query_obj, force_cached)
def __init__( # pylint: disable=too-many-arguments self, datasource: Dict[str, Any], queries: List[Dict[str, Any]], force: bool = False, custom_cache_timeout: Optional[int] = None, result_type: Optional[utils.ChartDataResultType] = None, result_format: Optional[utils.ChartDataResultFormat] = None, ) -> None: self.datasource = ConnectorRegistry.get_datasource( str(datasource["type"]), int(datasource["id"]), db.session) self.queries = [QueryObject(**query_obj) for query_obj in queries] self.force = force self.custom_cache_timeout = custom_cache_timeout self.result_type = result_type or utils.ChartDataResultType.FULL self.result_format = result_format or utils.ChartDataResultFormat.JSON
def get_single_payload(self, query_obj: QueryObject) -> Dict[str, Any]: """Returns a payload of metadata and data""" if self.result_type == utils.ChartDataResultType.QUERY: return { "query": self.datasource.get_query_str(query_obj.to_dict()), "language": self.datasource.query_language, } if self.result_type == utils.ChartDataResultType.SAMPLES: row_limit = query_obj.row_limit or math.inf query_obj = copy.copy(query_obj) query_obj.orderby = [] query_obj.groupby = [] query_obj.metrics = [] query_obj.post_processing = [] query_obj.row_limit = min(row_limit, config["SAMPLES_ROW_LIMIT"]) query_obj.row_offset = 0 query_obj.columns = [ o.column_name for o in self.datasource.columns ] payload = self.get_df_payload(query_obj) # TODO: implement payload["annotation_data"] = [] df = payload["df"] status = payload["status"] if status != utils.QueryStatus.FAILED: payload["data"] = self.get_data(df) del payload["df"] filters = query_obj.filter filter_columns = cast(List[str], [flt.get("col") for flt in filters]) columns = set(self.datasource.column_names) applied_time_columns, rejected_time_columns = utils.get_time_filter_status( self.datasource, query_obj.applied_time_extras) payload["applied_filters"] = [{ "column": col } for col in filter_columns if col in columns] + applied_time_columns payload["rejected_filters"] = [{ "reason": "not_in_datasource", "column": col } for col in filter_columns if col not in columns ] + rejected_time_columns if self.result_type == utils.ChartDataResultType.RESULTS: return {"data": payload["data"]} return payload
def _get_samples( query_context: QueryContext, query_obj: QueryObject, force_cached: bool = False ) -> Dict[str, Any]: datasource = _get_datasource(query_context, query_obj) query_obj = copy.copy(query_obj) query_obj.is_timeseries = False query_obj.orderby = [] query_obj.metrics = None query_obj.post_processing = [] qry_obj_cols = [] for o in datasource.columns: if isinstance(o, dict): qry_obj_cols.append(o.get("column_name")) else: qry_obj_cols.append(o.column_name) query_obj.columns = qry_obj_cols query_obj.from_dttm = None query_obj.to_dttm = None return _get_full(query_context, query_obj, force_cached)
def __init__( self, datasource: DatasourceDict, queries: List[Dict[str, Any]], force: bool = False, custom_cache_timeout: Optional[int] = None, result_type: Optional[ChartDataResultType] = None, result_format: Optional[ChartDataResultFormat] = None, ) -> None: self.datasource = ConnectorRegistry.get_datasource( str(datasource["type"]), int(datasource["id"]), db.session) self.queries = [QueryObject(**query_obj) for query_obj in queries] self.force = force self.custom_cache_timeout = custom_cache_timeout self.result_type = result_type or ChartDataResultType.FULL self.result_format = result_format or ChartDataResultFormat.JSON self.cache_values = { "datasource": datasource, "queries": queries, "result_type": self.result_type, "result_format": self.result_format, }