def test_nullable_bool(self):
     data = [(None, ), (True, ), (None, ), (None, ), (None, ), (None, )]
     cursor_descr = [("is_test", "bool", None, None, None, None, True)]
     results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
     self.assertEqual(results.columns[0]["type"], "BOOL")
     df = results.to_pandas_df()
     self.assertEqual(
         df_to_records(df),
         [
             {
                 "is_test": None
             },
             {
                 "is_test": True
             },
             {
                 "is_test": None
             },
             {
                 "is_test": None
             },
             {
                 "is_test": None
             },
             {
                 "is_test": None
             },
         ],
     )
 def test_single_column_multidim_nested_types(self):
     data = [
         (
             [
                 "test",
                 [
                     [
                         "foo",
                         123456,
                         [
                             [["test"], 3432546, 7657658766],
                             [["fake"], 656756765, 324324324324],
                         ],
                     ]
                 ],
                 ["test2", 43, 765765765],
                 None,
                 None,
             ],
         )
     ]
     cursor_descr = [("metadata",)]
     results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
     self.assertEqual(results.columns[0]["type"], "STRING")
     df = results.to_pandas_df()
     self.assertEqual(
         df_to_records(df),
         [
             {
                 "metadata": '["test", [["foo", 123456, [[["test"], 3432546, 7657658766], [["fake"], 656756765, 324324324324]]]], ["test2", 43, 765765765], null, null]'
             }
         ],
     )
def test_js_max_int() -> None:
    from superset.db_engine_specs import BaseEngineSpec
    from superset.result_set import SupersetResultSet

    data = [(1, 1239162456494753670, "c1"), (2, 100, "c2")]
    cursor_descr: DbapiDescription = [
        ("a", "int", None, None, None, None, False),
        ("b", "int", None, None, None, None, False),
        ("c", "string", None, None, None, None, False),
    ]
    results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
    df = results.to_pandas_df()

    assert df_to_records(df) == [
        {
            "a": 1,
            "b": "1239162456494753670",
            "c": "c1"
        },
        {
            "a": 2,
            "b": 100,
            "c": "c2"
        },
    ]
 def test_nested_list_types(self):
     data = [([{"TestKey": [123456, "foo"]}],)]
     cursor_descr = [("metadata",)]
     results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
     self.assertEqual(results.columns[0]["type"], "STRING")
     df = results.to_pandas_df()
     self.assertEqual(
         df_to_records(df), [{"metadata": '[{"TestKey": [123456, "foo"]}]'}]
     )
示例#5
0
    def test_results_msgpack_deserialization(self):
        use_new_deserialization = True
        data = [("a", 4, 4.0, "2019-08-18T16:39:16.660000")]
        cursor_descr = (
            ("a", "string"),
            ("b", "int"),
            ("c", "float"),
            ("d", "datetime"),
        )
        db_engine_spec = BaseEngineSpec()
        results = SupersetResultSet(data, cursor_descr, db_engine_spec)
        query = {
            "database_id": 1,
            "sql": "SELECT * FROM birth_names LIMIT 100",
            "status": utils.QueryStatus.PENDING,
        }
        (
            serialized_data,
            selected_columns,
            all_columns,
            expanded_columns,
        ) = sql_lab._serialize_and_expand_data(
            results, db_engine_spec, use_new_deserialization
        )
        payload = {
            "query_id": 1,
            "status": utils.QueryStatus.SUCCESS,
            "state": utils.QueryStatus.SUCCESS,
            "data": serialized_data,
            "columns": all_columns,
            "selected_columns": selected_columns,
            "expanded_columns": expanded_columns,
            "query": query,
        }

        serialized_payload = sql_lab._serialize_payload(
            payload, use_new_deserialization
        )
        self.assertIsInstance(serialized_payload, bytes)

        with mock.patch.object(
            db_engine_spec, "expand_data", wraps=db_engine_spec.expand_data
        ) as expand_data:
            query_mock = mock.Mock()
            query_mock.database.db_engine_spec.expand_data = expand_data

            deserialized_payload = superset.views.utils._deserialize_results_payload(
                serialized_payload, query_mock, use_new_deserialization
            )
            df = results.to_pandas_df()
            payload["data"] = dataframe.df_to_records(df)

            self.assertDictEqual(deserialized_payload, payload)
            expand_data.assert_called_once()
def test_max_pandas_timestamp(input_, expected) -> None:
    from superset.db_engine_specs import BaseEngineSpec
    from superset.result_set import SupersetResultSet

    cursor_descr: DbapiDescription = [
        ("a", "datetime", None, None, None, None, False),
        ("b", "int", None, None, None, None, False),
    ]
    results = SupersetResultSet(input_, cursor_descr, BaseEngineSpec)
    df = results.to_pandas_df()

    assert df_to_records(df) == expected
示例#7
0
 def test_data_as_list_of_lists(self):
     data = [[1, "a"], [2, "b"]]
     cursor_descr = [
         ("user_id", "INT", None, None, None, None, True),
         ("username", "STRING", None, None, None, None, True),
     ]
     results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
     df = results.to_pandas_df()
     self.assertEqual(
         df_to_records(df),
         [{"user_id": 1, "username": "******"}, {"user_id": 2, "username": "******"}],
     )
 def test_nested_types(self):
     data = [
         (
             4,
             [{
                 "table_name": "unicode_test",
                 "database_id": 1
             }],
             [1, 2, 3],
             {
                 "chart_name": "scatter"
             },
         ),
         (
             3,
             [{
                 "table_name": "birth_names",
                 "database_id": 1
             }],
             [4, 5, 6],
             {
                 "chart_name": "plot"
             },
         ),
     ]
     cursor_descr = [("id", ), ("dict_arr", ), ("num_arr", ), ("map_col", )]
     results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
     self.assertEqual(results.columns[0]["type"], "INT")
     self.assertEqual(results.columns[1]["type"], "STRING")
     self.assertEqual(results.columns[2]["type"], "STRING")
     self.assertEqual(results.columns[3]["type"], "STRING")
     df = results.to_pandas_df()
     self.assertEqual(
         df_to_records(df),
         [
             {
                 "id": 4,
                 "dict_arr":
                 '[{"table_name": "unicode_test", "database_id": 1}]',
                 "num_arr": "[1, 2, 3]",
                 "map_col": '{"chart_name": "scatter"}',
             },
             {
                 "id": 3,
                 "dict_arr":
                 '[{"table_name": "birth_names", "database_id": 1}]',
                 "num_arr": "[4, 5, 6]",
                 "map_col": '{"chart_name": "plot"}',
             },
         ],
     )
示例#9
0
    def get_df(  # pylint: disable=too-many-locals
        self,
        sql: str,
        schema: Optional[str] = None,
        mutator: Optional[Callable[[pd.DataFrame], None]] = None,
    ) -> pd.DataFrame:
        sqls = self.db_engine_spec.parse_sql(sql)
        engine = self.get_sqla_engine(schema)

        def needs_conversion(df_series: pd.Series) -> bool:
            return (
                not df_series.empty
                and isinstance(df_series, pd.Series)
                and isinstance(df_series[0], (list, dict))
            )

        def _log_query(sql: str) -> None:
            if log_query:
                log_query(
                    engine.url,
                    sql,
                    schema,
                    get_username(),
                    __name__,
                    security_manager,
                )

        with closing(engine.raw_connection()) as conn:
            cursor = conn.cursor()
            for sql_ in sqls[:-1]:
                _log_query(sql_)
                self.db_engine_spec.execute(cursor, sql_)
                cursor.fetchall()

            _log_query(sqls[-1])
            self.db_engine_spec.execute(cursor, sqls[-1])

            data = self.db_engine_spec.fetch_data(cursor)
            result_set = SupersetResultSet(
                data, cursor.description, self.db_engine_spec
            )
            df = result_set.to_pandas_df()
            if mutator:
                df = mutator(df)

            for col, coltype in df.dtypes.to_dict().items():
                if coltype == numpy.object_ and needs_conversion(df[col]):
                    df[col] = df[col].apply(utils.json_dumps_w_dates)

            return df
 def test_is_date(self):
     data = [("a", 1), ("a", 2)]
     cursor_descr = (("a", "string"), ("a", "string"))
     results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
     self.assertEqual(results.is_temporal("DATE"), True)
     self.assertEqual(results.is_temporal("DATETIME"), True)
     self.assertEqual(results.is_temporal("TIME"), True)
     self.assertEqual(results.is_temporal("TIMESTAMP"), True)
     self.assertEqual(results.is_temporal("STRING"), False)
     self.assertEqual(results.is_temporal(""), False)
     self.assertEqual(results.is_temporal(None), False)
示例#11
0
    def test_msgpack_payload_serialization(self):
        use_new_deserialization = True
        data = [("a", 4, 4.0, datetime.datetime(2019, 8, 18, 16, 39, 16,
                                                660000))]
        cursor_descr = (
            ("a", "string"),
            ("b", "int"),
            ("c", "float"),
            ("d", "datetime"),
        )
        db_engine_spec = BaseEngineSpec()
        results = SupersetResultSet(data, cursor_descr, db_engine_spec)
        query = {
            "database_id": 1,
            "sql": "SELECT * FROM birth_names LIMIT 100",
            "status": QueryStatus.PENDING,
        }
        serialized_data, selected_columns, all_columns, expanded_columns = sql_lab._serialize_and_expand_data(
            results, db_engine_spec, use_new_deserialization)
        payload = {
            "query_id": 1,
            "status": QueryStatus.SUCCESS,
            "state": QueryStatus.SUCCESS,
            "data": serialized_data,
            "columns": all_columns,
            "selected_columns": selected_columns,
            "expanded_columns": expanded_columns,
            "query": query,
        }

        serialized = sql_lab._serialize_payload(payload,
                                                use_new_deserialization)
        self.assertIsInstance(serialized, bytes)
    def test_pa_conversion_tuple(self):
        cols = ["string_col", "int_col", "list_col", "float_col"]
        data = [("Text", 111, [123], 1.0)]
        results = SupersetResultSet(data, cols, BaseEngineSpec)

        self.assertEqual(len(data), results.size)
        self.assertEqual(len(cols), len(results.columns))
    def test_pa_conversion_dict(self):
        cols = ["string_col", "dict_col", "int_col"]
        data = [["a", {"c1": 1, "c2": 2, "c3": 3}, 4]]
        results = SupersetResultSet(data, cols, BaseEngineSpec)

        self.assertEqual(len(data), results.size)
        self.assertEqual(len(cols), len(results.columns))
示例#14
0
def _serialize_and_expand_data(
    result_set: SupersetResultSet,
    db_engine_spec: BaseEngineSpec,
    use_msgpack: Optional[bool] = False,
    expand_data: bool = False,
) -> Tuple[Union[bytes, str], List[Any], List[Any], List[Any]]:
    selected_columns = result_set.columns
    all_columns: List[Any]
    expanded_columns: List[Any]

    if use_msgpack:
        with stats_timing("sqllab.query.results_backend_pa_serialization",
                          stats_logger):
            data = (pa.default_serialization_context().serialize(
                result_set.pa_table).to_buffer().to_pybytes())

        # expand when loading data from results backend
        all_columns, expanded_columns = (selected_columns, [])
    else:
        df = result_set.to_pandas_df()
        data = df_to_records(df) or []

        if expand_data:
            all_columns, data, expanded_columns = db_engine_spec.expand_data(
                selected_columns, data)
        else:
            all_columns = selected_columns
            expanded_columns = []

    return (data, selected_columns, all_columns, expanded_columns)
示例#15
0
    def test_ps_conversion_no_dict(self):
        cols = [["string_col", "string"], ["int_col", "int"], ["float_col", "float"]]
        data = [["a", 4, 4.0]]
        results = SupersetResultSet(data, cols, BaseEngineSpec)

        self.assertEqual(len(data), results.size)
        self.assertEqual(len(cols), len(results.columns))
示例#16
0
def test_msgpack_payload_serialization():
    use_new_deserialization = True
    db_engine_spec = BaseEngineSpec()
    results = SupersetResultSet(SERIALIZATION_DATA, CURSOR_DESCR,
                                db_engine_spec)
    query = {
        "database_id": 1,
        "sql": "SELECT * FROM birth_names LIMIT 100",
        "status": QueryStatus.PENDING,
    }
    (
        serialized_data,
        selected_columns,
        all_columns,
        expanded_columns,
    ) = sql_lab._serialize_and_expand_data(results, db_engine_spec,
                                           use_new_deserialization)
    payload = {
        "query_id": 1,
        "status": QueryStatus.SUCCESS,
        "state": QueryStatus.SUCCESS,
        "data": serialized_data,
        "columns": all_columns,
        "selected_columns": selected_columns,
        "expanded_columns": expanded_columns,
        "query": query,
    }

    serialized = sql_lab._serialize_payload(payload, use_new_deserialization)
    assert isinstance(serialized, bytes)
    def test_new_data_serialization(self):
        data = [("a", 4, 4.0, datetime.datetime(2019, 8, 18, 16, 39, 16,
                                                660000))]
        cursor_descr = (
            ("a", "string"),
            ("b", "int"),
            ("c", "float"),
            ("d", "datetime"),
        )
        db_engine_spec = BaseEngineSpec()
        results = SupersetResultSet(data, cursor_descr, db_engine_spec)

        with mock.patch.object(
                db_engine_spec, "expand_data",
                wraps=db_engine_spec.expand_data) as expand_data:
            (
                data,
                selected_columns,
                all_columns,
                expanded_columns,
            ) = sql_lab._serialize_and_expand_data(results, db_engine_spec,
                                                   True)
            expand_data.assert_not_called()

        self.assertIsInstance(data, bytes)
示例#18
0
 def test_mssql_engine_spec_pymssql(self):
     # Test for case when tuple is returned (pymssql)
     data = [
         (1, 1, datetime.datetime(2017, 10, 19, 23, 39, 16, 660000)),
         (2, 2, datetime.datetime(2018, 10, 19, 23, 39, 16, 660000)),
     ]
     results = SupersetResultSet(
         list(data), [["col1"], ["col2"], ["col3"]], MssqlEngineSpec
     )
     df = results.to_pandas_df()
     data = dataframe.df_to_records(df)
     self.assertEqual(len(data), 2)
     self.assertEqual(
         data[0],
         {"col1": 1, "col2": 1, "col3": pd.Timestamp("2017-10-19 23:39:16.660000")},
     )
def test_column_names_as_bytes() -> None:
    """
    Test that we can handle column names as bytes.
    """
    from superset.db_engine_specs.redshift import RedshiftEngineSpec
    from superset.result_set import SupersetResultSet

    data = (
        [
            "2016-01-26",
            392.002014,
            397.765991,
            390.575012,
            392.153015,
            392.153015,
            58147000,
        ],
        [
            "2016-01-27",
            392.444,
            396.842987,
            391.782013,
            394.971985,
            394.971985,
            47424400,
        ],
    )
    description = [
        (b"date", 1043, None, None, None, None, None),
        (b"open", 701, None, None, None, None, None),
        (b"high", 701, None, None, None, None, None),
        (b"low", 701, None, None, None, None, None),
        (b"close", 701, None, None, None, None, None),
        (b"adj close", 701, None, None, None, None, None),
        (b"volume", 20, None, None, None, None, None),
    ]
    result_set = SupersetResultSet(data, description, RedshiftEngineSpec)  # type: ignore

    assert (
        result_set.to_pandas_df().to_markdown()
        == """
|    | date       |    open |    high |     low |   close |   adj close |   volume |
|---:|:-----------|--------:|--------:|--------:|--------:|------------:|---------:|
|  0 | 2016-01-26 | 392.002 | 397.766 | 390.575 | 392.153 |     392.153 | 58147000 |
|  1 | 2016-01-27 | 392.444 | 396.843 | 391.782 | 394.972 |     394.972 | 47424400 |
    """.strip()
    )
 def test_empty_data(self):
     data = []
     cursor_descr = [
         ("emptyone", "varchar", None, None, None, None, True),
         ("emptytwo", "int", None, None, None, None, True),
     ]
     results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
     self.assertEqual(results.columns, [])
示例#21
0
    def test_df_to_records(self):
        data = [("a1", "b1", "c1"), ("a2", "b2", "c2")]
        cursor_descr = (("a", "string"), ("b", "string"), ("c", "string"))
        results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
        df = results.to_pandas_df()

        self.assertEqual(
            df_to_records(df),
            [{
                "a": "a1",
                "b": "b1",
                "c": "c1"
            }, {
                "a": "a2",
                "b": "b2",
                "c": "c2"
            }],
        )
 def test_no_type_coercion(self):
     data = [("a", 1), ("b", 2)]
     cursor_descr = [
         ("one", "varchar", None, None, None, None, True),
         ("two", "int", None, None, None, None, True),
     ]
     results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
     self.assertEqual(results.columns[0]["type"], "VARCHAR")
     self.assertEqual(results.columns[1]["type"], "INT")
示例#23
0
def test_new_data_serialization():
    db_engine_spec = BaseEngineSpec()
    results = SupersetResultSet(SERIALIZATION_DATA, CURSOR_DESCR, db_engine_spec)

    with mock.patch.object(
        db_engine_spec, "expand_data", wraps=db_engine_spec.expand_data
    ) as expand_data:
        data = sql_lab._serialize_and_expand_data(results, db_engine_spec, True)
        expand_data.assert_not_called()
    assert isinstance(data[0], bytes)
示例#24
0
 def test_dataframe_timezone(self):
     tz = pytz.FixedOffset(60)
     data = [
         (datetime.datetime(2017, 11, 18, 21, 53, 0, 219225, tzinfo=tz), ),
         (datetime.datetime(2017, 11, 18, 22, 6, 30, tzinfo=tz), ),
     ]
     results = SupersetResultSet(list(data), [["data"]], BaseEngineSpec)
     df = results.to_pandas_df()
     data = dataframe.df_to_records(df)
     json_str = json.dumps(data,
                           default=utils.pessimistic_json_iso_dttm_ser)
     self.assertDictEqual(
         data[0],
         {"data": pd.Timestamp("2017-11-18 21:53:00.219225+0100", tz=tz)})
     self.assertDictEqual(
         data[1], {"data": pd.Timestamp("2017-11-18 22:06:30+0100", tz=tz)})
     self.assertEqual(
         json_str,
         '[{"data": "2017-11-18T21:53:00.219225+01:00"}, {"data": "2017-11-18T22:06:30+01:00"}]',
     )
 def test_get_columns_with_int(self):
     data = [("a1", 1), ("a2", 2)]
     cursor_descr = (("a", "string"), ("b", "int"))
     results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
     self.assertEqual(
         results.columns,
         [
             {"is_date": False, "type": "STRING", "name": "a"},
             {"is_date": False, "type": "INT", "name": "b"},
         ],
     )
示例#26
0
    def test_js_max_int(self):
        data = [(1, 1239162456494753670, "c1"), (2, 100, "c2")]
        cursor_descr = (("a", "int"), ("b", "int"), ("c", "string"))
        results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
        df = results.to_pandas_df()

        self.assertEqual(
            df_to_records(df),
            [
                {
                    "a": 1,
                    "b": "1239162456494753670",
                    "c": "c1"
                },
                {
                    "a": 2,
                    "b": 100,
                    "c": "c2"
                },
            ],
        )
 def test_get_columns_basic(self):
     data = [("a1", "b1", "c1"), ("a2", "b2", "c2")]
     cursor_descr = (("a", "string"), ("b", "string"), ("c", "string"))
     results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
     self.assertEqual(
         results.columns,
         [
             {"is_date": False, "type": "STRING", "name": "a"},
             {"is_date": False, "type": "STRING", "name": "b"},
             {"is_date": False, "type": "STRING", "name": "c"},
         ],
     )
def test_df_to_records() -> None:
    from superset.db_engine_specs import BaseEngineSpec
    from superset.result_set import SupersetResultSet

    data = [("a1", "b1", "c1"), ("a2", "b2", "c2")]
    cursor_descr: DbapiDescription = [(column, "string", None, None, None,
                                       None, False)
                                      for column in ("a", "b", "c")]
    results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
    df = results.to_pandas_df()

    assert df_to_records(df) == [
        {
            "a": "a1",
            "b": "b1",
            "c": "c1"
        },
        {
            "a": "a2",
            "b": "b2",
            "c": "c2"
        },
    ]
示例#29
0
    def test_results_default_deserialization(self):
        use_new_deserialization = False
        data = [("a", 4, 4.0, "2019-08-18T16:39:16.660000")]
        cursor_descr = (
            ("a", "string"),
            ("b", "int"),
            ("c", "float"),
            ("d", "datetime"),
        )
        db_engine_spec = BaseEngineSpec()
        results = SupersetResultSet(data, cursor_descr, db_engine_spec)
        query = {
            "database_id": 1,
            "sql": "SELECT * FROM birth_names LIMIT 100",
            "status": utils.QueryStatus.PENDING,
        }
        (
            serialized_data,
            selected_columns,
            all_columns,
            expanded_columns,
        ) = sql_lab._serialize_and_expand_data(
            results, db_engine_spec, use_new_deserialization
        )
        payload = {
            "query_id": 1,
            "status": utils.QueryStatus.SUCCESS,
            "state": utils.QueryStatus.SUCCESS,
            "data": serialized_data,
            "columns": all_columns,
            "selected_columns": selected_columns,
            "expanded_columns": expanded_columns,
            "query": query,
        }

        serialized_payload = sql_lab._serialize_payload(
            payload, use_new_deserialization
        )
        self.assertIsInstance(serialized_payload, str)

        query_mock = mock.Mock()
        deserialized_payload = superset.views.utils._deserialize_results_payload(
            serialized_payload, query_mock, use_new_deserialization
        )

        self.assertDictEqual(deserialized_payload, payload)
        query_mock.assert_not_called()
 def test_get_columns_type_inference(self):
     data = [
         (1.2, 1, "foo", datetime(2018, 10, 19, 23, 39, 16, 660000), True),
         (3.14, 2, "bar", datetime(2019, 10, 19, 23, 39, 16, 660000), False),
     ]
     cursor_descr = (("a", None), ("b", None), ("c", None), ("d", None), ("e", None))
     results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
     self.assertEqual(
         results.columns,
         [
             {"is_date": False, "type": "FLOAT", "name": "a"},
             {"is_date": False, "type": "INT", "name": "b"},
             {"is_date": False, "type": "STRING", "name": "c"},
             {"is_date": True, "type": "DATETIME", "name": "d"},
             {"is_date": False, "type": "BOOL", "name": "e"},
         ],
     )