示例#1
0
def get_sql_results(self, query_id, return_results=True, store_results=False):
    """Executes the sql query returns the results."""
    if not self.request.called_directly:
        engine = sqlalchemy.create_engine(
            app.config.get('SQLALCHEMY_DATABASE_URI'), poolclass=NullPool)
        session_class = sessionmaker()
        session_class.configure(bind=engine)
        session = session_class()
    else:
        session = db.session()
        session.commit()  # HACK
    try:
        query = session.query(models.Query).filter_by(id=query_id).one()
    except Exception as e:
        logging.error("Query with id `{}` could not be retrieved".format(query_id))
        logging.error("Sleeping for a sec and retrying...")
        # Nasty hack to get around a race condition where the worker
        # cannot find the query it's supposed to run
        sleep(1)
        query = session.query(models.Query).filter_by(id=query_id).one()

    database = query.database
    db_engine_spec = database.db_engine_spec
    db_engine_spec.patch()

    def handle_error(msg):
        """Local method handling error while processing the SQL"""
        query.error_message = msg
        query.status = QueryStatus.FAILED
        query.tmp_table_name = None
        session.commit()
        raise Exception(query.error_message)

    if store_results and not results_backend:
        handle_error("Results backend isn't configured.")

    # Limit enforced only for retrieving the data, not for the CTA queries.
    superset_query = SupersetQuery(query.sql)
    executed_sql = superset_query.stripped()
    if not superset_query.is_select() and not database.allow_dml:
        handle_error(
            "Only `SELECT` statements are allowed against this database")
    if query.select_as_cta:
        if not superset_query.is_select():
            handle_error(
                "Only `SELECT` statements can be used with the CREATE TABLE "
                "feature.")
        if not query.tmp_table_name:
            start_dttm = datetime.fromtimestamp(query.start_time)
            query.tmp_table_name = 'tmp_{}_table_{}'.format(
                query.user_id,
                start_dttm.strftime('%Y_%m_%d_%H_%M_%S'))
        executed_sql = superset_query.as_create_table(query.tmp_table_name)
        query.select_as_cta_used = True
    elif (
            query.limit and superset_query.is_select() and
            db_engine_spec.limit_method == LimitMethod.WRAP_SQL):
        executed_sql = database.wrap_sql_limit(executed_sql, query.limit)
        query.limit_used = True
    try:
        template_processor = get_template_processor(
            database=database, query=query)
        executed_sql = template_processor.process_template(executed_sql)
        executed_sql = db_engine_spec.sql_preprocessor(executed_sql)
    except Exception as e:
        logging.exception(e)
        msg = "Template rendering failed: " + utils.error_msg_from_exception(e)
        handle_error(msg)

    query.executed_sql = executed_sql
    query.status = QueryStatus.RUNNING
    query.start_running_time = utils.now_as_float()
    session.merge(query)
    session.commit()
    logging.info("Set query to 'running'")

    engine = database.get_sqla_engine(schema=query.schema)
    conn = engine.raw_connection()
    cursor = conn.cursor()
    logging.info("Running query: \n{}".format(executed_sql))
    try:
        logging.info(query.executed_sql)
        cursor.execute(
            query.executed_sql, **db_engine_spec.cursor_execute_kwargs)
    except Exception as e:
        logging.exception(e)
        conn.close()
        handle_error(db_engine_spec.extract_error_message(e))

    try:
        logging.info("Handling cursor")
        db_engine_spec.handle_cursor(cursor, query, session)
        logging.info("Fetching data: {}".format(query.to_dict()))
        data = db_engine_spec.fetch_data(cursor, query.limit)
    except Exception as e:
        logging.exception(e)
        conn.close()
        handle_error(db_engine_spec.extract_error_message(e))

    conn.commit()
    conn.close()

    if query.status == utils.QueryStatus.STOPPED:
        return json.dumps({
            'query_id': query.id,
            'status': query.status,
            'query': query.to_dict(),
        }, default=utils.json_iso_dttm_ser)

    column_names = (
        [col[0] for col in cursor.description] if cursor.description else [])
    column_names = dedup(column_names)
    cdf = dataframe.SupersetDataFrame(pd.DataFrame(
        list(data), columns=column_names))

    query.rows = cdf.size
    query.progress = 100
    query.status = QueryStatus.SUCCESS
    if query.select_as_cta:
        query.select_sql = '{}'.format(database.select_star(
            query.tmp_table_name,
            limit=query.limit,
            schema=database.force_ctas_schema
        ))
    query.end_time = utils.now_as_float()
    session.merge(query)
    session.flush()

    payload = {
        'query_id': query.id,
        'status': query.status,
        'data': cdf.data if cdf.data else [],
        'columns': cdf.columns if cdf.columns else [],
        'query': query.to_dict(),
    }
    payload = json.dumps(payload, default=utils.json_iso_dttm_ser)

    if store_results:
        key = '{}'.format(uuid.uuid4())
        logging.info("Storing results in results backend, key: {}".format(key))
        results_backend.set(key, zlib.compress(payload))
        query.results_key = key

    session.merge(query)
    session.commit()

    if return_results:
        return payload
示例#2
0
def get_sql_results(self, query_id, return_results=True, store_results=False):
    """Executes the sql query returns the results."""
    if not self.request.called_directly:
        engine = sqlalchemy.create_engine(
            app.config.get('SQLALCHEMY_DATABASE_URI'), poolclass=NullPool)
        session_class = sessionmaker()
        session_class.configure(bind=engine)
        session = session_class()
    else:
        session = db.session()
        session.commit()  # HACK
    query = session.query(models.Query).filter_by(id=query_id).one()
    database = query.database
    db_engine_spec = database.db_engine_spec

    def handle_error(msg):
        """Local method handling error while processing the SQL"""
        query.error_message = msg
        query.status = QueryStatus.FAILED
        query.tmp_table_name = None
        session.commit()
        raise Exception(query.error_message)

    if store_results and not results_backend:
        handle_error("Results backend isn't configured.")

    # Limit enforced only for retrieving the data, not for the CTA queries.
    superset_query = SupersetQuery(query.sql)
    executed_sql = superset_query.stripped()
    if not superset_query.is_select() and not database.allow_dml:
        handle_error(
            "Only `SELECT` statements are allowed against this database")
    if query.select_as_cta:
        if not superset_query.is_select():
            handle_error(
                "Only `SELECT` statements can be used with the CREATE TABLE "
                "feature.")
        if not query.tmp_table_name:
            start_dttm = datetime.fromtimestamp(query.start_time)
            query.tmp_table_name = 'tmp_{}_table_{}'.format(
                query.user_id,
                start_dttm.strftime('%Y_%m_%d_%H_%M_%S'))
        executed_sql = superset_query.as_create_table(query.tmp_table_name)
        query.select_as_cta_used = True
    elif (
            query.limit and superset_query.is_select() and
            db_engine_spec.limit_method == LimitMethod.WRAP_SQL):
        executed_sql = database.wrap_sql_limit(executed_sql, query.limit)
        query.limit_used = True
    engine = database.get_sqla_engine(schema=query.schema)
    try:
        template_processor = get_template_processor(
            database=database, query=query)
        executed_sql = template_processor.process_template(executed_sql)
        executed_sql = db_engine_spec.sql_preprocessor(executed_sql)
    except Exception as e:
        logging.exception(e)
        msg = "Template rendering failed: " + utils.error_msg_from_exception(e)
        handle_error(msg)

    query.executed_sql = executed_sql
    logging.info("Running query: \n{}".format(executed_sql))
    try:
        result_proxy = engine.execute(query.executed_sql, schema=query.schema)
    except Exception as e:
        logging.exception(e)
        handle_error(db_engine_spec.extract_error_message(e))

    cursor = result_proxy.cursor
    query.status = QueryStatus.RUNNING
    session.flush()
    db_engine_spec.handle_cursor(cursor, query, session)

    cdf = None
    if result_proxy.cursor:
        column_names = [col[0] for col in result_proxy.cursor.description]
        column_names = dedup(column_names)
        if db_engine_spec.limit_method == LimitMethod.FETCH_MANY:
            data = result_proxy.fetchmany(query.limit)
        else:
            data = result_proxy.fetchall()
        cdf = dataframe.SupersetDataFrame(
            pd.DataFrame(data, columns=column_names))

    query.rows = result_proxy.rowcount
    query.progress = 100
    query.status = QueryStatus.SUCCESS
    if query.rows == -1 and cdf:
        # Presto doesn't provide result_proxy.row_count
        query.rows = cdf.size
    if query.select_as_cta:
        query.select_sql = '{}'.format(database.select_star(
            query.tmp_table_name,
            limit=query.limit,
            schema=database.force_ctas_schema
        ))
    query.end_time = utils.now_as_float()
    session.flush()

    payload = {
        'query_id': query.id,
        'status': query.status,
        'data': [],
    }
    payload['data'] = cdf.data if cdf else []
    payload['columns'] = cdf.columns_dict if cdf else []
    payload['query'] = query.to_dict()
    payload = json.dumps(payload, default=utils.json_iso_dttm_ser)

    if store_results:
        key = '{}'.format(uuid.uuid4())
        logging.info("Storing results in results backend, key: {}".format(key))
        results_backend.set(key, zlib.compress(payload))
        query.results_key = key

    session.flush()
    session.commit()

    if return_results:
        return payload
示例#3
0
def execute_sql(
    ctask, query_id, rendered_query, return_results=True, store_results=False,
    user_name=None, session=None, start_time=None,
):
    """Executes the sql query returns the results."""
    if store_results and start_time:
        # only asynchronous queries
        stats_logger.timing(
            'sqllab.query.time_pending', now_as_float() - start_time)
    query = get_query(query_id, session)
    payload = dict(query_id=query_id)

    database = query.database
    db_engine_spec = database.db_engine_spec
    db_engine_spec.patch()

    def handle_error(msg):
        """Local method handling error while processing the SQL"""
        troubleshooting_link = config['TROUBLESHOOTING_LINK']
        query.error_message = msg
        query.status = QueryStatus.FAILED
        query.tmp_table_name = None
        session.commit()
        payload.update({
            'status': query.status,
            'error': msg,
        })
        if troubleshooting_link:
            payload['link'] = troubleshooting_link
        return payload

    if store_results and not results_backend:
        return handle_error("Results backend isn't configured.")

    # Limit enforced only for retrieving the data, not for the CTA queries.
    superset_query = SupersetQuery(rendered_query)
    executed_sql = superset_query.stripped()
    SQL_MAX_ROWS = app.config.get('SQL_MAX_ROW')
    if not superset_query.is_readonly() and not database.allow_dml:
        return handle_error(
            'Only `SELECT` statements are allowed against this database')
    if query.select_as_cta:
        if not superset_query.is_select():
            return handle_error(
                'Only `SELECT` statements can be used with the CREATE TABLE '
                'feature.')
        if not query.tmp_table_name:
            start_dttm = datetime.fromtimestamp(query.start_time)
            query.tmp_table_name = 'tmp_{}_table_{}'.format(
                query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S'))
        executed_sql = superset_query.as_create_table(query.tmp_table_name)
        query.select_as_cta_used = True
    if (superset_query.is_select() and SQL_MAX_ROWS and
            (not query.limit or query.limit > SQL_MAX_ROWS)):
        query.limit = SQL_MAX_ROWS
        executed_sql = database.apply_limit_to_sql(executed_sql, query.limit)

    # Hook to allow environment-specific mutation (usually comments) to the SQL
    SQL_QUERY_MUTATOR = config.get('SQL_QUERY_MUTATOR')
    if SQL_QUERY_MUTATOR:
        executed_sql = SQL_QUERY_MUTATOR(
            executed_sql, user_name, security_manager, database)

    query.executed_sql = executed_sql
    query.status = QueryStatus.RUNNING
    query.start_running_time = now_as_float()
    session.merge(query)
    session.commit()
    logging.info("Set query to 'running'")
    conn = None
    try:
        engine = database.get_sqla_engine(
            schema=query.schema,
            nullpool=True,
            user_name=user_name,
        )
        conn = engine.raw_connection()
        cursor = conn.cursor()
        logging.info('Running query: \n{}'.format(executed_sql))
        logging.info(query.executed_sql)
        query_start_time = now_as_float()
        db_engine_spec.execute(cursor, query.executed_sql, async_=True)
        logging.info('Handling cursor')
        db_engine_spec.handle_cursor(cursor, query, session)
        logging.info('Fetching data: {}'.format(query.to_dict()))
        stats_logger.timing(
            'sqllab.query.time_executing_query',
            now_as_float() - query_start_time)
        fetching_start_time = now_as_float()
        data = db_engine_spec.fetch_data(cursor, query.limit)
        stats_logger.timing(
            'sqllab.query.time_fetching_results',
            now_as_float() - fetching_start_time)
    except SoftTimeLimitExceeded as e:
        logging.exception(e)
        if conn is not None:
            conn.close()
        return handle_error(
            "SQL Lab timeout. This environment's policy is to kill queries "
            'after {} seconds.'.format(SQLLAB_TIMEOUT))
    except Exception as e:
        logging.exception(e)
        if conn is not None:
            conn.close()
        return handle_error(db_engine_spec.extract_error_message(e))

    logging.info('Fetching cursor description')
    cursor_description = cursor.description
    if conn is not None:
        conn.commit()
        conn.close()

    if query.status == QueryStatus.STOPPED:
        return handle_error('The query has been stopped')

    cdf = dataframe.SupersetDataFrame(data, cursor_description, db_engine_spec)

    query.rows = cdf.size
    query.progress = 100
    query.status = QueryStatus.SUCCESS
    if query.select_as_cta:
        query.select_sql = '{}'.format(
            database.select_star(
                query.tmp_table_name,
                limit=query.limit,
                schema=database.force_ctas_schema,
                show_cols=False,
                latest_partition=False))
    query.end_time = now_as_float()
    session.merge(query)
    session.flush()

    payload.update({
        'status': query.status,
        'data': cdf.data if cdf.data else [],
        'columns': cdf.columns if cdf.columns else [],
        'query': query.to_dict(),
    })
    if store_results:
        key = '{}'.format(uuid.uuid4())
        logging.info('Storing results in results backend, key: {}'.format(key))
        write_to_results_backend_start = now_as_float()
        json_payload = json.dumps(
            payload, default=json_iso_dttm_ser, ignore_nan=True)
        cache_timeout = database.cache_timeout
        if cache_timeout is None:
            cache_timeout = config.get('CACHE_DEFAULT_TIMEOUT', 0)
        results_backend.set(key, zlib_compress(json_payload), cache_timeout)
        query.results_key = key
        stats_logger.timing(
            'sqllab.query.results_backend_write',
            now_as_float() - write_to_results_backend_start)
    session.merge(query)
    session.commit()

    if return_results:
        return payload
示例#4
0
def execute_sql(
    ctask,
    query_id,
    return_results=True,
    store_results=False,
    user_name=None,
    template_params=None,
):
    """Executes the sql query returns the results."""
    session = get_session(not ctask.request.called_directly)

    query = get_query(query_id, session)
    payload = dict(query_id=query_id)

    database = query.database
    db_engine_spec = database.db_engine_spec
    db_engine_spec.patch()

    def handle_error(msg):
        """Local method handling error while processing the SQL"""
        troubleshooting_link = config['TROUBLESHOOTING_LINK']
        msg = 'Error: {}. You can find common superset errors and their \
            resolutions at: {}'                               .format(msg, troubleshooting_link) \
            if troubleshooting_link else msg
        query.error_message = msg
        query.status = QueryStatus.FAILED
        query.tmp_table_name = None
        session.commit()
        payload.update({
            'status': query.status,
            'error': msg,
        })
        return payload

    if store_results and not results_backend:
        return handle_error("Results backend isn't configured.")

    try:
        template_processor = get_template_processor(database=database,
                                                    query=query)
        tp = template_params or {}
        rendered_query = template_processor.process_template(query.sql, **tp)
    except Exception as e:
        logging.exception(e)
        msg = 'Template rendering failed: ' + utils.error_msg_from_exception(e)
        return handle_error(msg)

    # Limit enforced only for retrieving the data, not for the CTA queries.
    superset_query = SupersetQuery(rendered_query)
    executed_sql = superset_query.stripped()
    if not superset_query.is_select() and not database.allow_dml:
        return handle_error(
            'Only `SELECT` statements are allowed against this database')
    if query.select_as_cta:
        if not superset_query.is_select():
            return handle_error(
                'Only `SELECT` statements can be used with the CREATE TABLE '
                'feature.')
        if not query.tmp_table_name:
            start_dttm = datetime.fromtimestamp(query.start_time)
            query.tmp_table_name = 'tmp_{}_table_{}'.format(
                query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S'))
        executed_sql = superset_query.as_create_table(query.tmp_table_name)
        query.select_as_cta_used = True
    elif (query.limit and superset_query.is_select()
          and db_engine_spec.limit_method == LimitMethod.WRAP_SQL):
        executed_sql = database.wrap_sql_limit(executed_sql, query.limit)
        query.limit_used = True

    # Hook to allow environment-specific mutation (usually comments) to the SQL
    SQL_QUERY_MUTATOR = config.get('SQL_QUERY_MUTATOR')
    if SQL_QUERY_MUTATOR:
        executed_sql = SQL_QUERY_MUTATOR(executed_sql, user_name,
                                         security_manager, database)

    query.executed_sql = executed_sql
    query.status = QueryStatus.RUNNING
    query.start_running_time = utils.now_as_float()
    session.merge(query)
    session.commit()
    logging.info("Set query to 'running'")
    conn = None
    try:
        engine = database.get_sqla_engine(
            schema=query.schema,
            nullpool=not ctask.request.called_directly,
            user_name=user_name,
        )
        conn = engine.raw_connection()
        cursor = conn.cursor()
        logging.info('Running query: \n{}'.format(executed_sql))
        logging.info(query.executed_sql)
        cursor.execute(query.executed_sql,
                       **db_engine_spec.cursor_execute_kwargs)
        logging.info('Handling cursor')
        db_engine_spec.handle_cursor(cursor, query, session)
        logging.info('Fetching data: {}'.format(query.to_dict()))
        data = db_engine_spec.fetch_data(cursor, query.limit)
    except SoftTimeLimitExceeded as e:
        logging.exception(e)
        if conn is not None:
            conn.close()
        return handle_error(
            "SQL Lab timeout. This environment's policy is to kill queries "
            'after {} seconds.'.format(SQLLAB_TIMEOUT))
    except Exception as e:
        logging.exception(e)
        if conn is not None:
            conn.close()
        return handle_error(db_engine_spec.extract_error_message(e))

    logging.info('Fetching cursor description')
    cursor_description = cursor.description

    if conn is not None:
        conn.commit()
        conn.close()

    if query.status == utils.QueryStatus.STOPPED:
        return json.dumps(
            {
                'query_id': query.id,
                'status': query.status,
                'query': query.to_dict(),
            },
            default=utils.json_iso_dttm_ser)

    cdf = convert_results_to_df(cursor_description, data)

    query.rows = cdf.size
    query.progress = 100
    query.status = QueryStatus.SUCCESS
    if query.select_as_cta:
        query.select_sql = '{}'.format(
            database.select_star(query.tmp_table_name,
                                 limit=query.limit,
                                 schema=database.force_ctas_schema,
                                 show_cols=False,
                                 latest_partition=False))
    query.end_time = utils.now_as_float()
    session.merge(query)
    session.flush()

    payload.update({
        'status': query.status,
        'data': cdf.data if cdf.data else [],
        'columns': cdf.columns if cdf.columns else [],
        'query': query.to_dict(),
    })
    if store_results:
        key = '{}'.format(uuid.uuid4())
        logging.info('Storing results in results backend, key: {}'.format(key))
        json_payload = json.dumps(payload, default=utils.json_iso_dttm_ser)
        cache_timeout = database.cache_timeout
        if cache_timeout is None:
            cache_timeout = config.get('CACHE_DEFAULT_TIMEOUT', 0)
        results_backend.set(key, utils.zlib_compress(json_payload),
                            cache_timeout)
        query.results_key = key
        query.end_result_backend_time = utils.now_as_float()

    session.merge(query)
    session.commit()

    if return_results:
        return payload
示例#5
0
def execute_sql(ctask,
                query_id,
                return_results=True,
                store_results=False,
                user_name=None):
    """Executes the sql query returns the results."""
    session = get_session(not ctask.request.called_directly)

    query = get_query(query_id, session)
    payload = dict(query_id=query_id)

    database = query.database
    db_engine_spec = database.db_engine_spec
    db_engine_spec.patch()

    def handle_error(msg):
        """Local method handling error while processing the SQL"""
        query.error_message = msg
        query.status = QueryStatus.FAILED
        query.tmp_table_name = None
        session.commit()
        payload.update({
            'status': query.status,
            'error': msg,
        })
        return payload

    if store_results and not results_backend:
        return handle_error("Results backend isn't configured.")

    # Limit enforced only for retrieving the data, not for the CTA queries.
    superset_query = SupersetQuery(query.sql)
    executed_sql = superset_query.stripped()
    if not superset_query.is_select() and not database.allow_dml:
        return handle_error(
            "Only `SELECT` statements are allowed against this database")
    if query.select_as_cta:
        if not superset_query.is_select():
            return handle_error(
                "Only `SELECT` statements can be used with the CREATE TABLE "
                "feature.")
            return
        if not query.tmp_table_name:
            start_dttm = datetime.fromtimestamp(query.start_time)
            query.tmp_table_name = 'tmp_{}_table_{}'.format(
                query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S'))
        executed_sql = superset_query.as_create_table(query.tmp_table_name)
        query.select_as_cta_used = True
    elif (query.limit and superset_query.is_select()
          and db_engine_spec.limit_method == LimitMethod.WRAP_SQL):
        executed_sql = database.wrap_sql_limit(executed_sql, query.limit)
        query.limit_used = True
    try:
        template_processor = get_template_processor(database=database,
                                                    query=query)
        executed_sql = template_processor.process_template(executed_sql)
    except Exception as e:
        logging.exception(e)
        msg = "Template rendering failed: " + utils.error_msg_from_exception(e)
        return handle_error(msg)

    query.executed_sql = executed_sql
    query.status = QueryStatus.RUNNING
    query.start_running_time = utils.now_as_float()
    session.merge(query)
    session.commit()
    logging.info("Set query to 'running'")

    engine = database.get_sqla_engine(
        schema=query.schema,
        nullpool=not ctask.request.called_directly,
        user_name=user_name)
    try:
        engine = database.get_sqla_engine(
            schema=query.schema,
            nullpool=not ctask.request.called_directly,
            user_name=user_name)
        conn = engine.raw_connection()
        cursor = conn.cursor()
        logging.info("Running query: \n{}".format(executed_sql))
        logging.info(query.executed_sql)
        cursor.execute(query.executed_sql,
                       **db_engine_spec.cursor_execute_kwargs)
        logging.info("Handling cursor")
        db_engine_spec.handle_cursor(cursor, query, session)
        logging.info("Fetching data: {}".format(query.to_dict()))
        data = db_engine_spec.fetch_data(cursor, query.limit)
    except SoftTimeLimitExceeded as e:
        logging.exception(e)
        conn.close()
        return handle_error(
            "SQL Lab timeout. This environment's policy is to kill queries "
            "after {} seconds.".format(SQLLAB_TIMEOUT))
    except Exception as e:
        logging.exception(e)
        conn.close()
        return handle_error(db_engine_spec.extract_error_message(e))

    logging.info("Fetching cursor description")
    cursor_description = cursor.description

    conn.commit()
    conn.close()

    if query.status == utils.QueryStatus.STOPPED:
        return json.dumps(
            {
                'query_id': query.id,
                'status': query.status,
                'query': query.to_dict(),
            },
            default=utils.json_iso_dttm_ser)

    column_names = ([col[0] for col in cursor_description]
                    if cursor_description else [])
    column_names = dedup(column_names)
    cdf = dataframe.SupersetDataFrame(
        pd.DataFrame(list(data), columns=column_names))

    query.rows = cdf.size
    query.progress = 100
    query.status = QueryStatus.SUCCESS
    if query.select_as_cta:
        query.select_sql = '{}'.format(
            database.select_star(
                query.tmp_table_name,
                limit=query.limit,
                schema=database.force_ctas_schema,
                show_cols=False,
                latest_partition=False,
            ))
    query.end_time = utils.now_as_float()
    session.merge(query)
    session.flush()

    payload.update({
        'status': query.status,
        'data': cdf.data if cdf.data else [],
        'columns': cdf.columns if cdf.columns else [],
        'query': query.to_dict(),
    })
    if store_results:
        key = '{}'.format(uuid.uuid4())
        logging.info("Storing results in results backend, key: {}".format(key))
        json_payload = json.dumps(payload, default=utils.json_iso_dttm_ser)
        results_backend.set(key, utils.zlib_compress(json_payload))
        query.results_key = key
        query.end_result_backend_time = utils.now_as_float()

    session.merge(query)
    session.commit()

    if return_results:
        return payload
示例#6
0
def execute_norm(ctask,
                 query_id,
                 rendered_query,
                 return_results=True,
                 store_results=False,
                 user_name=None,
                 session=None):
    """ Executes the norm script and returns the results"""
    if rendered_query.lower().find('select') >= 0:
        return execute_sql(ctask, query_id, rendered_query, return_results,
                           store_results, user_name, session)

    query = get_query(query_id, session)
    payload = dict(query_id=query_id)

    def handle_error(msg):
        """Local method handling error while processing the SQL"""
        troubleshooting_link = config['TROUBLESHOOTING_LINK']
        query.error_message = msg
        query.status = QueryStatus.FAILED
        query.tmp_table_name = None
        session.commit()
        payload.update({
            'status': query.status,
            'error': msg,
        })
        if troubleshooting_link:
            payload['link'] = troubleshooting_link
        return payload

    if store_results and not results_backend:
        return handle_error("Results backend isn't configured.")

    query.executed_sql = rendered_query
    query.status = QueryStatus.RUNNING
    query.start_running_time = utils.now_as_float()
    session.merge(query)
    session.commit()
    logging.info("Set query to 'running'")

    database = query.database
    db_engine_spec = database.db_engine_spec
    db_engine_spec.patch()
    try:
        data = norm.execute(query.executed_sql, session, query.user)
    except SoftTimeLimitExceeded as e:
        logging.exception(e)
        return handle_error(
            "SQL Lab timeout. This environment's policy is to kill queries "
            'after {} seconds.'.format(SQLLAB_TIMEOUT))
    except Exception as e:
        logging.exception(e)
        return handle_error(db_engine_spec.extract_error_message(e))

    if query.status == utils.QueryStatus.STOPPED:
        return handle_error('The query has been stopped')

    cdf = dataframe.SupersetDataFrame(data)

    query.rows = cdf.size
    query.progress = 100
    query.status = QueryStatus.SUCCESS
    query.end_time = utils.now_as_float()
    session.merge(query)
    session.flush()

    payload.update({
        'status': query.status,
        'data': cdf.data if cdf.data else [],
        'columns': cdf.columns if cdf.columns else [],
        'query': query.to_dict(),
    })
    if store_results:
        key = '{}'.format(uuid.uuid4())
        logging.info('Storing results in results backend, key: {}'.format(key))
        json_payload = json.dumps(payload, default=utils.json_iso_dttm_ser)
        cache_timeout = config.get('CACHE_DEFAULT_TIMEOUT', 0)
        results_backend.set(key, utils.zlib_compress(json_payload),
                            cache_timeout)
        query.results_key = key
        query.end_result_backend_time = utils.now_as_float()

    session.merge(query)
    session.commit()

    if return_results:
        return payload
示例#7
0
def execute_sql(
    ctask, query_id, rendered_query, return_results=True, store_results=False,
    user_name=None, session=None,
):
    """Executes the sql query returns the results."""

    query = get_query(query_id, session)
    payload = dict(query_id=query_id)

    database = query.database
    db_engine_spec = database.db_engine_spec
    db_engine_spec.patch()

    def handle_error(msg):
        """Local method handling error while processing the SQL"""
        troubleshooting_link = config['TROUBLESHOOTING_LINK']
        query.error_message = msg
        query.status = QueryStatus.FAILED
        query.tmp_table_name = None
        session.commit()
        payload.update({
            'status': query.status,
            'error': msg,
        })
        if troubleshooting_link:
            payload['link'] = troubleshooting_link
        return payload

    if store_results and not results_backend:
        return handle_error("Results backend isn't configured.")

    # Limit enforced only for retrieving the data, not for the CTA queries.
    superset_query = SupersetQuery(rendered_query)
    executed_sql = superset_query.stripped()
    SQL_MAX_ROWS = app.config.get('SQL_MAX_ROW')
    if not superset_query.is_select() and not database.allow_dml:
        return handle_error(
            'Only `SELECT` statements are allowed against this database')
    if query.select_as_cta:
        if not superset_query.is_select():
            return handle_error(
                'Only `SELECT` statements can be used with the CREATE TABLE '
                'feature.')
        if not query.tmp_table_name:
            start_dttm = datetime.fromtimestamp(query.start_time)
            query.tmp_table_name = 'tmp_{}_table_{}'.format(
                query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S'))
        executed_sql = superset_query.as_create_table(query.tmp_table_name)
        query.select_as_cta_used = True
    if (superset_query.is_select() and SQL_MAX_ROWS and
            (not query.limit or query.limit > SQL_MAX_ROWS)):
        query.limit = SQL_MAX_ROWS
        executed_sql = database.apply_limit_to_sql(executed_sql, query.limit)

    # Hook to allow environment-specific mutation (usually comments) to the SQL
    SQL_QUERY_MUTATOR = config.get('SQL_QUERY_MUTATOR')
    if SQL_QUERY_MUTATOR:
        executed_sql = SQL_QUERY_MUTATOR(
            executed_sql, user_name, security_manager, database)

    query.executed_sql = executed_sql
    query.status = QueryStatus.RUNNING
    query.start_running_time = utils.now_as_float()
    session.merge(query)
    session.commit()
    logging.info("Set query to 'running'")
    conn = None
    try:
        engine = database.get_sqla_engine(
            schema=query.schema,
            nullpool=not ctask.request.called_directly,
            user_name=user_name,
        )
        conn = engine.raw_connection()
        cursor = conn.cursor()
        logging.info('Running query: \n{}'.format(executed_sql))
        logging.info(query.executed_sql)
        cursor.execute(query.executed_sql,
                       **db_engine_spec.cursor_execute_kwargs)
        logging.info('Handling cursor')
        db_engine_spec.handle_cursor(cursor, query, session)
        logging.info('Fetching data: {}'.format(query.to_dict()))
        data = db_engine_spec.fetch_data(cursor, query.limit)
    except SoftTimeLimitExceeded as e:
        logging.exception(e)
        if conn is not None:
            conn.close()
        return handle_error(
            "SQL Lab timeout. This environment's policy is to kill queries "
            'after {} seconds.'.format(SQLLAB_TIMEOUT))
    except Exception as e:
        logging.exception(e)
        if conn is not None:
            conn.close()
        return handle_error(db_engine_spec.extract_error_message(e))

    logging.info('Fetching cursor description')
    column_names = db_engine_spec.get_normalized_column_names(cursor.description)

    if conn is not None:
        conn.commit()
        conn.close()

    if query.status == utils.QueryStatus.STOPPED:
        return handle_error('The query has been stopped')

    cdf = convert_results_to_df(column_names, data)

    query.rows = cdf.size
    query.progress = 100
    query.status = QueryStatus.SUCCESS
    if query.select_as_cta:
        query.select_sql = '{}'.format(
            database.select_star(
                query.tmp_table_name,
                limit=query.limit,
                schema=database.force_ctas_schema,
                show_cols=False,
                latest_partition=False))
    query.end_time = utils.now_as_float()
    session.merge(query)
    session.flush()

    payload.update({
        'status': query.status,
        'data': cdf.data if cdf.data else [],
        'columns': cdf.columns if cdf.columns else [],
        'query': query.to_dict(),
    })
    if store_results:
        key = '{}'.format(uuid.uuid4())
        logging.info('Storing results in results backend, key: {}'.format(key))
        json_payload = json.dumps(payload, default=utils.json_iso_dttm_ser)
        cache_timeout = database.cache_timeout
        if cache_timeout is None:
            cache_timeout = config.get('CACHE_DEFAULT_TIMEOUT', 0)
        results_backend.set(key, utils.zlib_compress(json_payload), cache_timeout)
        query.results_key = key
        query.end_result_backend_time = utils.now_as_float()

    session.merge(query)
    session.commit()

    if return_results:
        return payload
示例#8
0
def execute_sql(
    ctask, query_id, return_results=True, store_results=False, user_name=None,
    template_params=None,
):
    """Executes the sql query returns the results."""
    session = get_session(not ctask.request.called_directly)

    query = get_query(query_id, session)
    payload = dict(query_id=query_id)

    database = query.database
    db_engine_spec = database.db_engine_spec
    db_engine_spec.patch()

    def handle_error(msg):
        """Local method handling error while processing the SQL"""
        troubleshooting_link = config['TROUBLESHOOTING_LINK']
        msg = 'Error: {}. You can find common superset errors and their \
            resolutions at: {}'.format(msg, troubleshooting_link) \
            if troubleshooting_link else msg
        query.error_message = msg
        query.status = QueryStatus.FAILED
        query.tmp_table_name = None
        session.commit()
        payload.update({
            'status': query.status,
            'error': msg,
        })
        return payload

    if store_results and not results_backend:
        return handle_error("Results backend isn't configured.")

    # Limit enforced only for retrieving the data, not for the CTA queries.
    superset_query = SupersetQuery(query.sql)
    executed_sql = superset_query.stripped()
    if not superset_query.is_select() and not database.allow_dml:
        return handle_error(
            'Only `SELECT` statements are allowed against this database')
    if query.select_as_cta:
        if not superset_query.is_select():
            return handle_error(
                'Only `SELECT` statements can be used with the CREATE TABLE '
                'feature.')
            return
        if not query.tmp_table_name:
            start_dttm = datetime.fromtimestamp(query.start_time)
            query.tmp_table_name = 'tmp_{}_table_{}'.format(
                query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S'))
        executed_sql = superset_query.as_create_table(query.tmp_table_name)
        query.select_as_cta_used = True
    elif (query.limit and superset_query.is_select() and
            db_engine_spec.limit_method == LimitMethod.WRAP_SQL):
        executed_sql = database.wrap_sql_limit(executed_sql, query.limit)
        query.limit_used = True
    try:
        template_processor = get_template_processor(
            database=database, query=query)
        tp = template_params or {}
        executed_sql = template_processor.process_template(
            executed_sql, **tp)
    except Exception as e:
        logging.exception(e)
        msg = 'Template rendering failed: ' + utils.error_msg_from_exception(e)
        return handle_error(msg)

    query.executed_sql = executed_sql
    query.status = QueryStatus.RUNNING
    query.start_running_time = utils.now_as_float()
    session.merge(query)
    session.commit()
    logging.info("Set query to 'running'")
    conn = None
    try:
        engine = database.get_sqla_engine(
            schema=query.schema,
            nullpool=not ctask.request.called_directly,
            user_name=user_name,
        )
        conn = engine.raw_connection()
        cursor = conn.cursor()
        logging.info('Running query: \n{}'.format(executed_sql))
        logging.info(query.executed_sql)
        cursor.execute(query.executed_sql,
                       **db_engine_spec.cursor_execute_kwargs)
        logging.info('Handling cursor')
        db_engine_spec.handle_cursor(cursor, query, session)
        logging.info('Fetching data: {}'.format(query.to_dict()))
        data = db_engine_spec.fetch_data(cursor, query.limit)
    except SoftTimeLimitExceeded as e:
        logging.exception(e)
        if conn is not None:
            conn.close()
        return handle_error(
            "SQL Lab timeout. This environment's policy is to kill queries "
            'after {} seconds.'.format(SQLLAB_TIMEOUT))
    except Exception as e:
        logging.exception(e)
        if conn is not None:
            conn.close()
        return handle_error(db_engine_spec.extract_error_message(e))

    logging.info('Fetching cursor description')
    cursor_description = cursor.description

    if conn is not None:
        conn.commit()
        conn.close()

    if query.status == utils.QueryStatus.STOPPED:
        return json.dumps(
            {
                'query_id': query.id,
                'status': query.status,
                'query': query.to_dict(),
            },
            default=utils.json_iso_dttm_ser)

    cdf = convert_results_to_df(cursor_description, data)

    query.rows = cdf.size
    query.progress = 100
    query.status = QueryStatus.SUCCESS
    if query.select_as_cta:
        query.select_sql = '{}'.format(
            database.select_star(
                query.tmp_table_name,
                limit=query.limit,
                schema=database.force_ctas_schema,
                show_cols=False,
                latest_partition=False))
    query.end_time = utils.now_as_float()
    session.merge(query)
    session.flush()

    payload.update({
        'status': query.status,
        'data': cdf.data if cdf.data else [],
        'columns': cdf.columns if cdf.columns else [],
        'query': query.to_dict(),
    })
    if store_results:
        key = '{}'.format(uuid.uuid4())
        logging.info('Storing results in results backend, key: {}'.format(key))
        json_payload = json.dumps(payload, default=utils.json_iso_dttm_ser)
        cache_timeout = database.cache_timeout
        if cache_timeout is None:
            cache_timeout = config.get('CACHE_DEFAULT_TIMEOUT', 0)
        results_backend.set(key, utils.zlib_compress(json_payload), cache_timeout)
        query.results_key = key
        query.end_result_backend_time = utils.now_as_float()

    session.merge(query)
    session.commit()

    if return_results:
        return payload
示例#9
0
def get_sql_results(self, query_id, return_results=True, store_results=False):
    """Executes the sql query returns the results."""
    if not self.request.called_directly:
        engine = sqlalchemy.create_engine(
            app.config.get('SQLALCHEMY_DATABASE_URI'), poolclass=NullPool)
        session_class = sessionmaker()
        session_class.configure(bind=engine)
        session = session_class()
    else:
        session = db.session()
        session.commit()  # HACK
    query = session.query(models.Query).filter_by(id=query_id).one()
    database = query.database
    executed_sql = query.sql.strip().strip(';')
    db_engine_spec = database.db_engine_spec

    def handle_error(msg):
        """Local method handling error while processing the SQL"""
        query.error_message = msg
        query.status = QueryStatus.FAILED
        query.tmp_table_name = None
        session.commit()
        raise Exception(query.error_message)

    # Limit enforced only for retrieving the data, not for the CTA queries.
    is_select = is_query_select(executed_sql);
    if not is_select and not database.allow_dml:
        handle_error(
            "Only `SELECT` statements are allowed against this database")
    if query.select_as_cta:
        if not is_select:
            handle_error(
                "Only `SELECT` statements can be used with the CREATE TABLE "
                "feature.")
        if not query.tmp_table_name:
            start_dttm = datetime.fromtimestamp(query.start_time)
            query.tmp_table_name = 'tmp_{}_table_{}'.format(
                query.user_id,
                start_dttm.strftime('%Y_%m_%d_%H_%M_%S'))
        executed_sql = create_table_as(
            executed_sql, query.tmp_table_name, database.force_ctas_schema)
        query.select_as_cta_used = True
    elif (
            query.limit and is_select and
            db_engine_spec.limit_method == LimitMethod.WRAP_SQL):
        executed_sql = database.wrap_sql_limit(executed_sql, query.limit)
        query.limit_used = True
    engine = database.get_sqla_engine(schema=query.schema)
    try:
        template_processor = get_template_processor(
            database=database, query=query)
        executed_sql = template_processor.process_template(executed_sql)
    except Exception as e:
        logging.exception(e)
        msg = "Template rendering failed: " + utils.error_msg_from_exception(e)
        handle_error(msg)
    try:
        query.executed_sql = executed_sql
        logging.info("Running query: \n{}".format(executed_sql))
        result_proxy = engine.execute(query.executed_sql, schema=query.schema)
    except Exception as e:
        logging.exception(e)
        handle_error(utils.error_msg_from_exception(e))

    cursor = result_proxy.cursor
    query.status = QueryStatus.RUNNING
    session.flush()
    db_engine_spec.handle_cursor(cursor, query, session)

    cdf = None
    if result_proxy.cursor:
        column_names = [col[0] for col in result_proxy.cursor.description]
        if db_engine_spec.limit_method == LimitMethod.FETCH_MANY:
            data = result_proxy.fetchmany(query.limit)
        else:
            data = result_proxy.fetchall()
        cdf = dataframe.SupersetDataFrame(
            pd.DataFrame(data, columns=column_names))

    query.rows = result_proxy.rowcount
    query.progress = 100
    query.status = QueryStatus.SUCCESS
    if query.rows == -1 and cdf:
        # Presto doesn't provide result_proxy.row_count
        query.rows = cdf.size
    if query.select_as_cta:
        query.select_sql = '{}'.format(database.select_star(
            query.tmp_table_name, limit=query.limit))
    query.end_time = utils.now_as_float()
    session.flush()

    payload = {
        'query_id': query.id,
        'status': query.status,
        'data': [],
    }
    payload['data'] = cdf.data if cdf else []
    payload['columns'] = cdf.columns_dict if cdf else []
    payload['query'] = query.to_dict()
    payload = json.dumps(payload, default=utils.json_iso_dttm_ser)

    if store_results and results_backend:
        key = '{}'.format(uuid.uuid4())
        logging.info("Storing results in results backend, key: {}".format(key))
        results_backend.set(key, zlib.compress(payload))
        query.results_key = key

    session.flush()
    session.commit()

    if return_results:
        return payload
示例#10
0
def get_one_report(id):
    o = db.session.query(SavedQuery).filter_by(id=id).first()
    desc = {}
    try:
        desc = json.loads(o.description)
    except ValueError:
        pass

    if request.method == 'GET':
        return jsonify({
            'id': o.id,
            'created_on': o.created_on.strftime('%Y-%m-%d'),
            'changed_on': o.changed_on.strftime('%Y-%m-%d'),
            'user_id': o.user_id or '',
            'db_id': o.db_id or '',
            'label': o.label or '',
            'schema': o.schema or '',
            'sql': o.sql or '',
            'description': desc,
        })

    elif request.method == 'POST':
        qjson = request.json

        sql = o.sql
        database_id = o.db_id
        schema = o.schema
        label = o.label

        session = db.session()
        mydb = session.query(models.Database).filter_by(id=database_id).first()

        # paginate
        page = qjson.get('page', 1)
        per_page = qjson.get('per_page', app.config['REPORT_PER_PAGE'])

        hkey = get_hash_key()

        # # parse config; filters and fields and sorts
        # qsort = ["ds","desc"]
        qsort = qjson.get('sort', [])
        sort = " order by _.%s %s" % (qsort[0], qsort[1]) if qsort else ""
        # date transfer problem solve after
        #filters = [ {"field":"ds", "type":"range", "value1":"2016-01-01", "value2":"2017-01-03", "help":u"date字段"} ]
        filters = qjson.get('filterfield_set', [])

        fs = []
        for f in filters:
            if f['type'] == 'range':
                fs.append(
                    "(_.%(field)s >= '%(value1)s' and _.%(field)s < '%(value2)s')"
                    % f)
            elif f['type'] == 'like':
                fs.append("_.%(field)s like '%%%(value1)s%%'" % f)
            else:
                fs.append("_.%(field)s %(type)s '%(value1)s'")

        where = " where " + (" and ".join(fs)) if fs else ""

        # count_sql = "SELECT count(1) as num FROM (%s) _"%sql
        # sql = "SELECT * FROM (%s) _ LIMIT %s,%s"%(sql, (page-1)*per_page, per_page)
        # # sql can't end with `;` , complicated sql use select .. as ..

        count_sql = "SELECT count(1) as num FROM (%s) _ %s" % (sql, where)
        sql = "SELECT * FROM (%s) _ %s %s LIMIT %s,%s" % (sql, where, sort,
                                                          (page - 1) *
                                                          per_page, per_page)

        if True:
            query = Query(
                database_id=int(database_id),
                limit=1000000,  #int(app.config.get('SQL_MAX_ROW', None)),
                sql=sql,
                schema=schema,
                select_as_cta=False,
                start_time=utils.now_as_float(),
                tab_name=label,
                status=QueryStatus.RUNNING,
                sql_editor_id=hkey[0] + hkey[1],
                tmp_table_name='',
                user_id=int(g.user.get_id()),
                client_id=hkey[2] + hkey[3],
            )
            session.add(query)

            cquery = Query(
                database_id=int(database_id),
                limit=1000000,  #int(app.config.get('SQL_MAX_ROW', None)),
                sql=count_sql,
                schema=schema,
                select_as_cta=False,
                start_time=utils.now_as_float(),
                tab_name=label,
                status=QueryStatus.RUNNING,
                sql_editor_id=hkey[0] + hkey[1],
                tmp_table_name='',
                user_id=int(g.user.get_id()),
                client_id=hkey[0] + hkey[1],
            )
            session.add(cquery)

            session.flush()
            db.session.commit()
            query_id = query.id
            cquery_id = cquery.id

            data = sql_lab.get_sql_results(query_id=query_id,
                                           return_results=True,
                                           template_params={})

            cdata = sql_lab.get_sql_results(query_id=cquery_id,
                                            return_results=True,
                                            template_params={})

            return jsonify({
                'data':
                data['data'],
                'id':
                id,
                'label':
                label,
                'query_id':
                data['query_id'],
                'limit':
                data['query']['limit'],
                'limit_reached':
                False,
                'page':
                page,
                'per_page':
                per_page,
                'pages':
                get_pages(cdata['data'][0]['num'], per_page),
                'total':
                cdata['data'][0]['num'],
                'rows':
                data['query']['rows'],
                'sort':
                qsort,
                'changed_on':
                data['query']['changed_on'],
                'displayfield_set':
                desc['displayfield_set'],
                'report_file':
                url_for('download_one_report',
                        id=id,
                        query_id=data['query_id']),
                'status':
                'success',
            })

        return 'ok'