def execute( self, execution_context: SqlJsonExecutionContext, rendered_query: str, log_params: Optional[Dict[str, Any]], ) -> SqlJsonExecutionStatus: query_id = execution_context.query.id logger.info("Query %i: Running query on a Celery worker", query_id) try: task = self._get_sql_results_task.delay( # type: ignore query_id, rendered_query, return_results=False, store_results=not execution_context.select_as_cta, username=get_username(), start_time=now_as_float(), expand_data=execution_context.expand_data, log_params=log_params, ) try: task.forget() except NotImplementedError: logger.warning( "Unable to forget Celery task as backend" "does not support this operation" ) except Exception as ex: logger.exception("Query %i: %s", query_id, str(ex)) message = __("Failed to start remote query on a worker.") error = SupersetError( message=message, error_type=SupersetErrorType.ASYNC_WORKERS_ERROR, level=ErrorLevel.ERROR, ) error_payload = dataclasses.asdict(error) query = execution_context.query query.set_extra_json_key("errors", [error_payload]) query.status = QueryStatus.FAILED query.error_message = message raise SupersetErrorException(error) from ex self._query_dao.update_saved_query_exec_info(query_id) return SqlJsonExecutionStatus.QUERY_IS_RUNNING
def execute_sql_statements( # pylint: disable=too-many-arguments, too-many-locals, too-many-statements, too-many-branches query_id: int, rendered_query: str, return_results: bool, store_results: bool, user_name: Optional[str], session: Session, start_time: Optional[float], expand_data: bool, log_params: Optional[Dict[str, Any]], ) -> Optional[Dict[str, Any]]: """Executes the sql query returns the results.""" if store_results and start_time: # only asynchronous queries stats_logger.timing("sqllab.query.time_pending", now_as_float() - start_time) query = get_query(query_id, session) payload: Dict[str, Any] = dict(query_id=query_id) database = query.database db_engine_spec = database.db_engine_spec db_engine_spec.patch() if database.allow_run_async and not results_backend: raise SupersetErrorException( SupersetError( message=__("Results backend is not configured."), error_type=SupersetErrorType.RESULTS_BACKEND_NOT_CONFIGURED_ERROR, level=ErrorLevel.ERROR, ) ) # Breaking down into multiple statements parsed_query = ParsedQuery(rendered_query, strip_comments=True) if not db_engine_spec.run_multiple_statements_as_one: statements = parsed_query.get_statements() logger.info( "Query %s: Executing %i statement(s)", str(query_id), len(statements) ) else: statements = [rendered_query] logger.info("Query %s: Executing query as a single statement", str(query_id)) logger.info("Query %s: Set query to 'running'", str(query_id)) query.status = QueryStatus.RUNNING query.start_running_time = now_as_float() session.commit() # Should we create a table or view from the select? if ( query.select_as_cta and query.ctas_method == CtasMethod.TABLE and not parsed_query.is_valid_ctas() ): raise SupersetErrorException( SupersetError( message=__( "CTAS (create table as select) can only be run with a query where " "the last statement is a SELECT. Please make sure your query has " "a SELECT as its last statement. Then, try running your query " "again." ), error_type=SupersetErrorType.INVALID_CTAS_QUERY_ERROR, level=ErrorLevel.ERROR, ) ) if ( query.select_as_cta and query.ctas_method == CtasMethod.VIEW and not parsed_query.is_valid_cvas() ): raise SupersetErrorException( SupersetError( message=__( "CVAS (create view as select) can only be run with a query with " "a single SELECT statement. Please make sure your query has only " "a SELECT statement. Then, try running your query again." ), error_type=SupersetErrorType.INVALID_CVAS_QUERY_ERROR, level=ErrorLevel.ERROR, ) ) engine = database.get_sqla_engine( schema=query.schema, nullpool=True, user_name=user_name, source=QuerySource.SQL_LAB, ) # Sharing a single connection and cursor across the # execution of all statements (if many) with closing(engine.raw_connection()) as conn: # closing the connection closes the cursor as well cursor = conn.cursor() statement_count = len(statements) for i, statement in enumerate(statements): # Check if stopped query = get_query(query_id, session) if query.status == QueryStatus.STOPPED: return None # For CTAS we create the table only on the last statement apply_ctas = query.select_as_cta and ( query.ctas_method == CtasMethod.VIEW or (query.ctas_method == CtasMethod.TABLE and i == len(statements) - 1) ) # Run statement msg = f"Running statement {i+1} out of {statement_count}" logger.info("Query %s: %s", str(query_id), msg) query.set_extra_json_key("progress", msg) session.commit() try: result_set = execute_sql_statement( statement, query, user_name, session, cursor, log_params, apply_ctas, ) except Exception as ex: # pylint: disable=broad-except msg = str(ex) prefix_message = ( f"[Statement {i+1} out of {statement_count}]" if statement_count > 1 else "" ) payload = handle_query_error( ex, query, session, payload, prefix_message ) return payload # Commit the connection so CTA queries will create the table. conn.commit() # Success, updating the query entry in database query.rows = result_set.size query.progress = 100 query.set_extra_json_key("progress", None) if query.select_as_cta: query.select_sql = database.select_star( query.tmp_table_name, schema=query.tmp_schema_name, limit=query.limit, show_cols=False, latest_partition=False, ) query.end_time = now_as_float() use_arrow_data = store_results and cast(bool, results_backend_use_msgpack) data, selected_columns, all_columns, expanded_columns = _serialize_and_expand_data( result_set, db_engine_spec, use_arrow_data, expand_data ) # TODO: data should be saved separately from metadata (likely in Parquet) payload.update( { "status": QueryStatus.SUCCESS, "data": data, "columns": all_columns, "selected_columns": selected_columns, "expanded_columns": expanded_columns, "query": query.to_dict(), } ) payload["query"]["state"] = QueryStatus.SUCCESS if store_results and results_backend: key = str(uuid.uuid4()) logger.info( "Query %s: Storing results in results backend, key: %s", str(query_id), key ) with stats_timing("sqllab.query.results_backend_write", stats_logger): with stats_timing( "sqllab.query.results_backend_write_serialization", stats_logger ): serialized_payload = _serialize_payload( payload, cast(bool, results_backend_use_msgpack) ) cache_timeout = database.cache_timeout if cache_timeout is None: cache_timeout = config["CACHE_DEFAULT_TIMEOUT"] compressed = zlib_compress(serialized_payload) logger.debug( "*** serialized payload size: %i", getsizeof(serialized_payload) ) logger.debug("*** compressed payload size: %i", getsizeof(compressed)) results_backend.set(key, compressed, cache_timeout) query.results_key = key query.status = QueryStatus.SUCCESS session.commit() if return_results: # since we're returning results we need to create non-arrow data if use_arrow_data: ( data, selected_columns, all_columns, expanded_columns, ) = _serialize_and_expand_data( result_set, db_engine_spec, False, expand_data ) payload.update( { "data": data, "columns": all_columns, "selected_columns": selected_columns, "expanded_columns": expanded_columns, } ) return payload return None
def execute_sql_statement( sql_statement: str, query: Query, user_name: Optional[str], session: Session, cursor: Any, log_params: Optional[Dict[str, Any]], apply_ctas: bool = False, ) -> SupersetResultSet: """Executes a single SQL statement""" database = query.database db_engine_spec = database.db_engine_spec parsed_query = ParsedQuery(sql_statement) sql = parsed_query.stripped() # This is a test to see if the query is being # limited by either the dropdown or the sql. # We are testing to see if more rows exist than the limit. increased_limit = None if query.limit is None else query.limit + 1 if not db_engine_spec.is_readonly_query(parsed_query) and not database.allow_dml: raise SupersetErrorException( SupersetError( message=__("Only SELECT statements are allowed against this database."), error_type=SupersetErrorType.DML_NOT_ALLOWED_ERROR, level=ErrorLevel.ERROR, ) ) if apply_ctas: if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = "tmp_{}_table_{}".format( query.user_id, start_dttm.strftime("%Y_%m_%d_%H_%M_%S") ) sql = parsed_query.as_create_table( query.tmp_table_name, schema_name=query.tmp_schema_name, method=query.ctas_method, ) query.select_as_cta_used = True # Do not apply limit to the CTA queries when SQLLAB_CTAS_NO_LIMIT is set to true if db_engine_spec.is_select_query(parsed_query) and not ( query.select_as_cta_used and SQLLAB_CTAS_NO_LIMIT ): if SQL_MAX_ROW and (not query.limit or query.limit > SQL_MAX_ROW): query.limit = SQL_MAX_ROW if query.limit: # We are fetching one more than the requested limit in order # to test whether there are more rows than the limit. # Later, the extra row will be dropped before sending # the results back to the user. sql = database.apply_limit_to_sql(sql, increased_limit, force=True) # Hook to allow environment-specific mutation (usually comments) to the SQL sql = SQL_QUERY_MUTATOR(sql, user_name, security_manager, database) try: query.executed_sql = sql if log_query: log_query( query.database.sqlalchemy_uri, query.executed_sql, query.schema, user_name, __name__, security_manager, log_params, ) session.commit() with stats_timing("sqllab.query.time_executing_query", stats_logger): logger.debug("Query %d: Running query: %s", query.id, sql) db_engine_spec.execute(cursor, sql, async_=True) logger.debug("Query %d: Handling cursor", query.id) db_engine_spec.handle_cursor(cursor, query, session) with stats_timing("sqllab.query.time_fetching_results", stats_logger): logger.debug( "Query %d: Fetching data for query object: %s", query.id, str(query.to_dict()), ) data = db_engine_spec.fetch_data(cursor, increased_limit) if query.limit is None or len(data) <= query.limit: query.limiting_factor = LimitingFactor.NOT_LIMITED else: # return 1 row less than increased_query data = data[:-1] except SoftTimeLimitExceeded as ex: logger.warning("Query %d: Time limit exceeded", query.id) logger.debug("Query %d: %s", query.id, ex) raise SupersetErrorException( SupersetError( message=__( f"The query was killed after {SQLLAB_TIMEOUT} seconds. It might " "be too complex, or the database might be under heavy load." ), error_type=SupersetErrorType.SQLLAB_TIMEOUT_ERROR, level=ErrorLevel.ERROR, ) ) except Exception as ex: logger.error("Query %d: %s", query.id, type(ex), exc_info=True) logger.debug("Query %d: %s", query.id, ex) raise SqlLabException(db_engine_spec.extract_error_message(ex)) logger.debug("Query %d: Fetching cursor description", query.id) cursor_description = cursor.description return SupersetResultSet(data, cursor_description, db_engine_spec)
def execute_sql_statement( # pylint: disable=too-many-arguments,too-many-statements sql_statement: str, query: Query, session: Session, cursor: Any, log_params: Optional[Dict[str, Any]], apply_ctas: bool = False, ) -> SupersetResultSet: """Executes a single SQL statement""" database: Database = query.database db_engine_spec = database.db_engine_spec parsed_query = ParsedQuery(sql_statement) if is_feature_enabled("RLS_IN_SQLLAB"): # Insert any applicable RLS predicates parsed_query = ParsedQuery( str( insert_rls( parsed_query._parsed[0], # pylint: disable=protected-access database.id, query.schema, ))) sql = parsed_query.stripped() # This is a test to see if the query is being # limited by either the dropdown or the sql. # We are testing to see if more rows exist than the limit. increased_limit = None if query.limit is None else query.limit + 1 if not db_engine_spec.is_readonly_query( parsed_query) and not database.allow_dml: raise SupersetErrorException( SupersetError( message=__( "Only SELECT statements are allowed against this database." ), error_type=SupersetErrorType.DML_NOT_ALLOWED_ERROR, level=ErrorLevel.ERROR, )) if apply_ctas: if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = "tmp_{}_table_{}".format( query.user_id, start_dttm.strftime("%Y_%m_%d_%H_%M_%S")) sql = parsed_query.as_create_table( query.tmp_table_name, schema_name=query.tmp_schema_name, method=query.ctas_method, ) query.select_as_cta_used = True # Do not apply limit to the CTA queries when SQLLAB_CTAS_NO_LIMIT is set to true if db_engine_spec.is_select_query(parsed_query) and not ( query.select_as_cta_used and SQLLAB_CTAS_NO_LIMIT): if SQL_MAX_ROW and (not query.limit or query.limit > SQL_MAX_ROW): query.limit = SQL_MAX_ROW sql = apply_limit_if_exists(database, increased_limit, query, sql) # Hook to allow environment-specific mutation (usually comments) to the SQL sql = SQL_QUERY_MUTATOR( sql, user_name=get_username(), # TODO(john-bodley): Deprecate in 3.0. security_manager=security_manager, database=database, ) try: query.executed_sql = sql if log_query: log_query( query.database.sqlalchemy_uri, query.executed_sql, query.schema, get_username(), __name__, security_manager, log_params, ) session.commit() with stats_timing("sqllab.query.time_executing_query", stats_logger): logger.debug("Query %d: Running query: %s", query.id, sql) db_engine_spec.execute(cursor, sql, async_=True) logger.debug("Query %d: Handling cursor", query.id) db_engine_spec.handle_cursor(cursor, query, session) with stats_timing("sqllab.query.time_fetching_results", stats_logger): logger.debug( "Query %d: Fetching data for query object: %s", query.id, str(query.to_dict()), ) data = db_engine_spec.fetch_data(cursor, increased_limit) if query.limit is None or len(data) <= query.limit: query.limiting_factor = LimitingFactor.NOT_LIMITED else: # return 1 row less than increased_query data = data[:-1] except SoftTimeLimitExceeded as ex: query.status = QueryStatus.TIMED_OUT logger.warning("Query %d: Time limit exceeded", query.id) logger.debug("Query %d: %s", query.id, ex) raise SupersetErrorException( SupersetError( message=__( "The query was killed after %(sqllab_timeout)s seconds. It might " "be too complex, or the database might be under heavy load.", sqllab_timeout=SQLLAB_TIMEOUT, ), error_type=SupersetErrorType.SQLLAB_TIMEOUT_ERROR, level=ErrorLevel.ERROR, )) from ex except Exception as ex: # query is stopped in another thread/worker # stopping raises expected exceptions which we should skip session.refresh(query) if query.status == QueryStatus.STOPPED: raise SqlLabQueryStoppedException() from ex logger.error("Query %d: %s", query.id, type(ex), exc_info=True) logger.debug("Query %d: %s", query.id, ex) raise SqlLabException(db_engine_spec.extract_error_message(ex)) from ex logger.debug("Query %d: Fetching cursor description", query.id) cursor_description = cursor.description return SupersetResultSet(data, cursor_description, db_engine_spec)