def test_get_columns_with_int(self):
     data = [
         ('a1', 1),
         ('a2', 2),
     ]
     cursor_descr = (
         ('a', 'string'),
         ('b', 'int'),
     )
     cdf = SupersetDataFrame(data, cursor_descr, BaseEngineSpec)
     self.assertEqual(
         cdf.columns,
         [
             {
                 'is_date': False,
                 'type': 'STRING',
                 'name': 'a',
                 'is_dim': True,
             },
             {
                 'is_date': False,
                 'type': 'INT',
                 'name': 'b',
                 'is_dim': False,
                 'agg': 'sum',
             },
         ],
     )
    def test_df_conversion_no_dict(self):
        cols = [["string_col", "string"], ["int_col", "int"], ["float_col", "float"]]
        data = [["a", 4, 4.0]]
        cdf = SupersetDataFrame(data, cols, BaseEngineSpec)

        self.assertEqual(len(data), cdf.size)
        self.assertEqual(len(cols), len(cdf.columns))
Пример #3
0
    def test_df_conversion_dict(self):
        cols = ["string_col", "dict_col", "int_col"]
        data = [["a", {"c1": 1, "c2": 2, "c3": 3}, 4]]
        cdf = SupersetDataFrame(data, cols, BaseEngineSpec)

        self.assertEquals(len(data), cdf.size)
        self.assertEquals(len(cols), len(cdf.columns))
 def test_get_columns_basic(self):
     data = [
         ('a1', 'b1', 'c1'),
         ('a2', 'b2', 'c2'),
     ]
     cursor_descr = (
         ('a', 'string'),
         ('b', 'string'),
         ('c', 'string'),
     )
     cdf = SupersetDataFrame(data, cursor_descr, BaseEngineSpec)
     self.assertEqual(
         cdf.columns,
         [
             {
                 'is_date': False,
                 'type': 'STRING',
                 'name': 'a',
                 'is_dim': True,
             },
             {
                 'is_date': False,
                 'type': 'STRING',
                 'name': 'b',
                 'is_dim': True,
             },
             {
                 'is_date': False,
                 'type': 'STRING',
                 'name': 'c',
                 'is_dim': True,
             },
         ],
     )
Пример #5
0
    def test_df_conversion_tuple(self):
        cols = ["string_col", "int_col", "list_col", "float_col"]
        data = [("Text", 111, [123], 1.0)]
        cdf = SupersetDataFrame(data, cols, BaseEngineSpec)

        self.assertEquals(len(data), cdf.size)
        self.assertEquals(len(cols), len(cdf.columns))
Пример #6
0
    def test_df_conversion_tuple(self):
        cols = ['string_col', 'int_col', 'list_col', 'float_col']
        data = [(u'Text', 111, [123], 1.0)]
        cdf = SupersetDataFrame(data, cols, BaseEngineSpec)

        self.assertEquals(len(data), cdf.size)
        self.assertEquals(len(cols), len(cdf.columns))
Пример #7
0
 def test_get_columns_basic(self):
     data = [("a1", "b1", "c1"), ("a2", "b2", "c2")]
     cursor_descr = (("a", "string"), ("b", "string"), ("c", "string"))
     cdf = SupersetDataFrame(data, cursor_descr, BaseEngineSpec)
     self.assertEqual(
         cdf.columns,
         [
             {
                 "is_date": False,
                 "type": "STRING",
                 "name": "a",
                 "is_dim": True
             },
             {
                 "is_date": False,
                 "type": "STRING",
                 "name": "b",
                 "is_dim": True
             },
             {
                 "is_date": False,
                 "type": "STRING",
                 "name": "c",
                 "is_dim": True
             },
         ],
     )
Пример #8
0
    def test_df_conversion_dict(self):
        cols = ['string_col', 'dict_col', 'int_col']
        data = [['a', {'c1': 1, 'c2': 2, 'c3': 3}, 4]]
        cdf = SupersetDataFrame(data, cols, BaseEngineSpec)

        self.assertEquals(len(data), cdf.size)
        self.assertEquals(len(cols), len(cdf.columns))
Пример #9
0
    def test_msgpack_payload_serialization(self):
        use_new_deserialization = True
        data = [("a", 4, 4.0, datetime.datetime(2019, 8, 18, 16, 39, 16,
                                                660000))]
        cursor_descr = (
            ("a", "string"),
            ("b", "int"),
            ("c", "float"),
            ("d", "datetime"),
        )
        db_engine_spec = BaseEngineSpec()
        cdf = SupersetDataFrame(data, cursor_descr, db_engine_spec)
        query = {
            "database_id": 1,
            "sql": "SELECT * FROM birth_names LIMIT 100",
            "status": QueryStatus.PENDING,
        }
        serialized_data, selected_columns, all_columns, expanded_columns = sql_lab._serialize_and_expand_data(
            cdf, db_engine_spec, use_new_deserialization)
        payload = {
            "query_id": 1,
            "status": QueryStatus.SUCCESS,
            "state": QueryStatus.SUCCESS,
            "data": serialized_data,
            "columns": all_columns,
            "selected_columns": selected_columns,
            "expanded_columns": expanded_columns,
            "query": query,
        }

        serialized = sql_lab._serialize_payload(payload,
                                                use_new_deserialization)
        self.assertIsInstance(serialized, bytes)
 def test_get_columns_type_inference(self):
     data = [
         (1.2, 1),
         (3.14, 2),
     ]
     cursor_descr = (
         ('a', None),
         ('b', None),
     )
     cdf = SupersetDataFrame(data, cursor_descr, BaseEngineSpec)
     self.assertEqual(
         cdf.columns,
         [
             {
                 'is_date': False,
                 'type': 'FLOAT',
                 'name': 'a',
                 'is_dim': False,
                 'agg': 'sum',
             },
             {
                 'is_date': False,
                 'type': 'INT',
                 'name': 'b',
                 'is_dim': False,
                 'agg': 'sum',
             },
         ],
     )
Пример #11
0
 def test_empty_data(self):
     data = []
     cursor_descr = [
         ("one", "varchar", None, None, None, None, True),
         ("two", "integer", None, None, None, None, True),
     ]
     cdf = SupersetDataFrame(data, cursor_descr, PrestoEngineSpec)
     self.assertEqual(cdf.raw_df.dtypes[0], np.dtype("O"))
     self.assertEqual(cdf.raw_df.dtypes[1], pd.Int64Dtype())
    def test_int64_with_missing_data(self):
        data = [(None,), (1239162456494753670,), (None,), (None,), (None,), (None,)]
        cursor_descr = [("user_id", "bigint", None, None, None, None, True)]

        # the base engine spec does not provide a dtype based on the cursor
        # description, so the column is inferred as float64 because of the
        # missing data
        cdf = SupersetDataFrame(data, cursor_descr, BaseEngineSpec)
        np.testing.assert_array_equal(
            cdf.raw_df.values.tolist(),
            [[np.nan], [1.2391624564947538e18], [np.nan], [np.nan], [np.nan], [np.nan]],
        )

        # currently only Presto provides a dtype based on the cursor description
        cdf = SupersetDataFrame(data, cursor_descr, PrestoEngineSpec)
        np.testing.assert_array_equal(
            cdf.raw_df.values.tolist(),
            [[np.nan], [1239162456494753670], [np.nan], [np.nan], [np.nan], [np.nan]],
        )
Пример #13
0
    def test_df_conversion_no_dict(self):
        cols = [
            ['string_col', 'string'],
            ['int_col', 'int'],
            ['float_col', 'float'],
        ]
        data = [['a', 4, 4.0]]
        cdf = SupersetDataFrame(data, cols, BaseEngineSpec)

        self.assertEquals(len(data), cdf.size)
        self.assertEquals(len(cols), len(cdf.columns))
 def test_dedup_with_data(self):
     data = [
         ('a', 1),
         ('a', 2),
     ]
     cursor_descr = (
         ('a', 'string'),
         ('a', 'string'),
     )
     cdf = SupersetDataFrame(data, cursor_descr, BaseEngineSpec)
     self.assertListEqual(cdf.column_names, ['a', 'a__1'])
 def test_get_columns_with_int(self):
     data = [("a1", 1), ("a2", 2)]
     cursor_descr = (("a", "string"), ("b", "int"))
     cdf = SupersetDataFrame(data, cursor_descr, BaseEngineSpec)
     self.assertEqual(
         cdf.columns,
         [
             {"is_date": False, "type": "STRING", "name": "a", "is_dim": True},
             {
                 "is_date": False,
                 "type": "INT",
                 "name": "b",
                 "is_dim": False,
                 "agg": "sum",
             },
         ],
     )
Пример #16
0
    def test_new_data_serialization(self):
        data = [("a", 4, 4.0, datetime.datetime(2019, 8, 18, 16, 39, 16,
                                                660000))]
        cursor_descr = (
            ("a", "string"),
            ("b", "int"),
            ("c", "float"),
            ("d", "datetime"),
        )
        db_engine_spec = BaseEngineSpec()
        cdf = SupersetDataFrame(data, cursor_descr, db_engine_spec)

        with mock.patch.object(
                db_engine_spec, "expand_data",
                wraps=db_engine_spec.expand_data) as expand_data:
            data, selected_columns, all_columns, expanded_columns = sql_lab._serialize_and_expand_data(
                cdf, db_engine_spec, True)
            expand_data.assert_not_called()

        self.assertIsInstance(data, bytes)
Пример #17
0
 def test_get_columns_type_inference(self):
     data = [(1.2, 1), (3.14, 2)]
     cursor_descr = (("a", None), ("b", None))
     cdf = SupersetDataFrame(data, cursor_descr, BaseEngineSpec)
     self.assertEqual(
         cdf.columns,
         [
             {
                 "is_date": False,
                 "type": "FLOAT",
                 "name": "a",
                 "is_dim": False,
                 "agg": "sum",
             },
             {
                 "is_date": False,
                 "type": "INT",
                 "name": "b",
                 "is_dim": False,
                 "agg": "sum",
             },
         ],
     )
Пример #18
0
def execute_sql_statement(sql_statement, query, user_name, session, cursor):
    """Executes a single SQL statement"""
    database = query.database
    db_engine_spec = database.db_engine_spec
    parsed_query = ParsedQuery(sql_statement)
    sql = parsed_query.stripped()

    if not parsed_query.is_readonly() and not database.allow_dml:
        raise SqlLabSecurityException(
            _("Only `SELECT` statements are allowed against this database"))
    if query.select_as_cta:
        if not parsed_query.is_select():
            raise SqlLabException(
                _("Only `SELECT` statements can be used with the CREATE TABLE "
                  "feature."))
        if not query.tmp_table_name:
            start_dttm = datetime.fromtimestamp(query.start_time)
            query.tmp_table_name = "tmp_{}_table_{}".format(
                query.user_id, start_dttm.strftime("%Y_%m_%d_%H_%M_%S"))
        sql = parsed_query.as_create_table(query.tmp_table_name)
        query.select_as_cta_used = True
    if parsed_query.is_select():
        if SQL_MAX_ROW and (not query.limit or query.limit > SQL_MAX_ROW):
            query.limit = SQL_MAX_ROW
        if query.limit:
            sql = database.apply_limit_to_sql(sql, query.limit)

    # Hook to allow environment-specific mutation (usually comments) to the SQL
    if SQL_QUERY_MUTATOR:
        sql = SQL_QUERY_MUTATOR(sql, user_name, security_manager, database)

    try:
        if log_query:
            log_query(
                query.database.sqlalchemy_uri,
                query.executed_sql,
                query.schema,
                user_name,
                __name__,
                security_manager,
            )
        query.executed_sql = sql
        session.commit()
        with stats_timing("sqllab.query.time_executing_query", stats_logger):
            logger.info(f"Query {query.id}: Running query: \n{sql}")
            db_engine_spec.execute(cursor, sql, async_=True)
            logger.info(f"Query {query.id}: Handling cursor")
            db_engine_spec.handle_cursor(cursor, query, session)

        with stats_timing("sqllab.query.time_fetching_results", stats_logger):
            logger.debug(
                "Query %d: Fetching data for query object: %s",
                query.id,
                str(query.to_dict()),
            )
            data = db_engine_spec.fetch_data(cursor, query.limit)

    except SoftTimeLimitExceeded as e:
        logger.exception(f"Query {query.id}: {e}")
        raise SqlLabTimeoutException(
            "SQL Lab timeout. This environment's policy is to kill queries "
            "after {} seconds.".format(SQLLAB_TIMEOUT))
    except Exception as e:
        logger.exception(f"Query {query.id}: {e}")
        raise SqlLabException(db_engine_spec.extract_error_message(e))

    logger.debug(f"Query {query.id}: Fetching cursor description")
    cursor_description = cursor.description
    return SupersetDataFrame(data, cursor_description, db_engine_spec)
Пример #19
0
 def test_dedup_with_data(self):
     data = [("a", 1), ("a", 2)]
     cursor_descr = (("a", "string"), ("a", "string"))
     cdf = SupersetDataFrame(data, cursor_descr, BaseEngineSpec)
     self.assertListEqual(cdf.column_names, ["a", "a__1"])
Пример #20
0
 def test_pandas_datetime64(self):
     data = [(None, )]
     cursor_descr = [("ds", "timestamp", None, None, None, None, True)]
     cdf = SupersetDataFrame(data, cursor_descr, PrestoEngineSpec)
     self.assertEqual(cdf.raw_df.dtypes[0], np.dtype("<M8[ns]"))