def update_query_execution_by_id(query_execution_id, session=None): index_name = ES_CONFIG["query_executions"]["index_name"] query_execution = get_query_execution_by_id(query_execution_id, session=session) if query_execution is None or query_execution.status != QueryExecutionStatus.DONE: try: _delete(index_name, id=query_execution_id) except Exception: LOG.error( "failed to delete {}. Will pass.".format(query_execution_id)) else: data_cell = get_data_cell_by_query_execution_id(query_execution_id, session=session) formatted_object = query_execution_to_es(query_execution, data_cell=data_cell, session=session) try: # Try to update if present updated_body = { "doc": formatted_object, "doc_as_upsert": True, } # ES requires this format for updates _update(index_name, query_execution_id, updated_body) except Exception: LOG.error( "failed to upsert {}. Will pass.".format(query_execution_id))
def create_data_doc_from_execution( environment_id, execution_id, engine_id, query_string, title=None, ): with DBSession() as session: verify_environment_permission([environment_id]) environment = Environment.get(id=environment_id, session=session) execution = get_query_execution_by_id(execution_id, session=session) uid = current_user.id api_assert(execution.uid == uid, "You can only create from your own executions.") return logic.create_data_doc_from_execution( environment_id=environment_id, owner_uid=uid, engine_id=engine_id, query_string=query_string, execution_id=execution_id, public=environment.shareable, archived=False, title=title, meta={}, session=session, )
def verify_query_execution_owner(execution_id, session=None): execution = query_execution_logic.get_query_execution_by_id( execution_id, session=session) api_assert( current_user.id == getattr(execution, "uid", None), "Action can only be preformed by execution owner", )
def notifiy_on_execution_completion(query_execution_id, session=None): query_execution = qe_logic.get_query_execution_by_id(query_execution_id, session=session) notifications = query_execution.notifications if len(notifications): data_cell = next(iter(query_execution.cells), None) # TODO: this should be determined by the notification.user? # Come up with a more efficient way to determine env per user env_name = getattr( qe_perm_logic.get_default_user_environment_by_execution_id( execution_id=query_execution_id, uid=query_execution.uid, session=session, ), "name", None, ) # If the query execution is not associated with any environment # then no notification can be done if not env_name: return for notification in notifications: uid = notification.user user = user_logic.get_user_by_id(uid, session=session) doc_id = None cell_id = None query_title = "Untitled" if data_cell is not None: cell_id = data_cell.id doc_id = data_cell.doc.id query_title = data_cell.meta.get("title", query_title) notify_user( user=user, template_name="query_completion_notification", template_params=dict( query_execution=query_execution, doc_id=doc_id, cell_id=cell_id, query_title=query_title, public_url=QuerybookSettings.PUBLIC_URL, env_name=env_name, ), session=session, )
def cancel_query_execution(query_execution_id): with DBSession() as session: execution = logic.get_query_execution_by_id(query_execution_id, session=session) verify_query_engine_permission(execution.engine_id, session=session) execution_dict = execution.to_dict(True) if execution is not None else None requestor = current_user.id api_assert( requestor == execution_dict["uid"], "You can only cancel your own queries" ) if execution_dict and "task_id" in execution_dict: task = run_query_task.AsyncResult(execution_dict["task_id"]) if task is not None: task.abort()
def on_join_room(query_execution_id): with DBSession() as session: execution = qe_logic.get_query_execution_by_id( query_execution_id, session=session ) assert execution, "Invalid execution" verify_query_engine_permission(execution.engine_id, session=session) execution_dict = execution.to_dict(True) if execution is not None else None join_room(query_execution_id) if execution_dict and len(execution_dict.get("statement_executions", [])): statement_execution = execution_dict["statement_executions"][-1] # Format statement execution's logs if statement_execution["has_log"]: logs = qe_logic.get_statement_execution_stream_logs( statement_execution["id"], from_end=True, session=session ) statement_execution["log"] = [log.log for log in logs] # Getting task's running data if ( "task_id" in execution_dict and execution_dict.get("status", None) == QueryExecutionStatus.RUNNING.value ): task = tasks.run_query_task.AsyncResult(execution_dict["task_id"]) try: if task is not None and task.info is not None: progress = task.info if str(statement_execution["id"]) in progress: statement_execution["percent_complete"] = progress[ str(statement_execution["id"]) ].get("percent_complete") execution_dict["total"] = progress.get("total", 0) except Exception as e: LOG.info(e) emit( "query", execution_dict, namespace=QUERY_EXECUTION_NAMESPACE, room=query_execution_id, )
def user_can_access_query_execution(uid, execution_id, session=None): execution = query_execution_logic.get_query_execution_by_id( execution_id, session=session ) if execution.uid == uid: return True execution_data_doc_ids = query_execution_logic.get_datadoc_id_from_query_execution_id( execution_id, session=session ) if execution_data_doc_ids: for data_doc_pair in execution_data_doc_ids: doc_id, cell_id = data_doc_pair if user_can_read(doc_id=doc_id, uid=uid, session=session): return True return ( QueryExecutionViewer.get(uid=uid, query_execution_id=execution_id) is not None )
def send_query_execution_access_request_notification(execution_id, uid, session=None): requestor = user_logic.get_user_by_id(uid, session=session) query_execution = logic.get_query_execution_by_id(execution_id, session=session) environment = get_default_user_environment_by_execution_id( execution_id=execution_id, uid=uid, session=session ) execution_url = f"{QuerybookSettings.PUBLIC_URL}/{environment.name}/query_execution/{execution_id}/" owner = user_logic.get_user_by_id(query_execution.uid, session=session) requestor_username = requestor.get_name() notify_user( user=owner, template_name="query_execution_access_request", template_params=dict( username=requestor_username, execution_id=execution_id, execution_url=execution_url, ), )
def _get_query_execution_info(query_execution_id, session=None): query_execution = qe_logic.get_query_execution_by_id(query_execution_id, session=session) if not query_execution: raise InvalidQueryExecution( f"Query {query_execution_id} does not exist") if query_execution.status != QueryExecutionStatus.INITIALIZED: # Double check to see query has been executed since # it could be re-inserted after celery worker failure raise AlreadyExecutedException( f"Query {query_execution_id} is already executed. This is likely caused by a worker crash." ) query = query_execution.query statement_ranges = get_statement_ranges(query) uid = query_execution.uid engine_id = query_execution.engine_id _assert_safe_query(query, engine_id, session=session) return query, statement_ranges, uid, engine_id
def get_query_execution_final_status(query_execution_id, executor, error_message, session=None): final_query_status = QueryExecutionStatus.INITIALIZED if executor: final_query_status = executor.status else: # If the error happens before the executor is initialized # we check the existing query execution status in db # for reference query_execution = qe_logic.get_query_execution_by_id( query_execution_id, session=session) if query_execution is not None: final_query_status = query_execution.status log_if_incomplete_query_status(final_query_status, query_execution_id, error_message, session=session) return final_query_status
def log_query_per_table_task(self, query_execution_id): with DBSession() as session: query_execution = qe_logic.get_query_execution_by_id( query_execution_id, session=session) assert query_execution.status == QueryExecutionStatus.DONE metastore_id = query_execution.engine.metastore_id if metastore_id is None: # This query engine has no metastore configured return statement_types = get_table_statement_type(query_execution.query) table_per_statement, _ = process_query(query_execution.query, query_execution.engine.language) sync_table_to_metastore(table_per_statement, statement_types, metastore_id, session=session) datadoc_cell = next(iter(query_execution.cells), None) if any(statement in statement_types for statement in ["CREATE", "INSERT"]): create_lineage_from_query(query_execution, metastore_id, datadoc_cell, session=session) if datadoc_cell is None or not datadoc_cell.doc.public: return log_table_per_statement( table_per_statement, statement_types, query_execution_id, metastore_id, datadoc_cell.id, session=session, )
def get_query_execution(query_execution_id): verify_query_execution_permission(query_execution_id) execution = logic.get_query_execution_by_id(query_execution_id) execution_dict = execution.to_dict(True) if execution is not None else None return execution_dict