def test_run_async_cta_query_with_lower_limit(test_client, ctas_method): if backend() == "mysql": # failing return tmp_table = f"{TEST_ASYNC_LOWER_LIMIT}_{ctas_method.lower()}" result = run_sql( test_client, QUERY, cta=True, ctas_method=ctas_method, async_=True, tmp_table=tmp_table, ) query = wait_for_success(result) assert QueryStatus.SUCCESS == query.status sqllite_select_sql = f"SELECT *\nFROM {tmp_table}\nLIMIT {query.limit}\nOFFSET 0" assert query.select_sql == (sqllite_select_sql if backend() == "sqlite" else get_select_star(tmp_table, query.limit)) assert f"CREATE {ctas_method} {tmp_table} AS \n{QUERY}" == query.executed_sql assert QUERY == query.sql assert query.rows == (1 if backend() == "presto" else 0) assert query.limit == 10000 assert query.select_as_cta assert query.select_as_cta_used delete_tmp_view_or_table(tmp_table, ctas_method)
def test_run_async_cta_query(test_client, ctas_method): if backend() == "mysql": # failing return table_name = f"{TEST_ASYNC_CTA}_{ctas_method.lower()}" result = run_sql( test_client, QUERY, cta=True, ctas_method=ctas_method, async_=True, tmp_table=table_name, ) query = wait_for_success(result) assert QueryStatus.SUCCESS == query.status assert get_select_star(table_name, query.limit) in query.select_sql assert f"CREATE {ctas_method} {table_name} AS \n{QUERY}" == query.executed_sql assert QUERY == query.sql assert query.rows == (1 if backend() == "presto" else 0) assert query.select_as_cta assert query.select_as_cta_used delete_tmp_view_or_table(table_name, ctas_method)
def test_import_excel(setup_csv_upload, create_excel_files): if utils.backend() == "hive": pytest.skip("Hive doesn't excel upload.") success_msg = ( f'Excel file "{EXCEL_FILENAME}" uploaded to table "{EXCEL_UPLOAD_TABLE}"' ) # initial upload with fail mode resp = upload_excel(EXCEL_FILENAME, EXCEL_UPLOAD_TABLE) assert success_msg in resp # upload again with fail mode; should fail fail_msg = f'Unable to upload Excel file "{EXCEL_FILENAME}" to table "{EXCEL_UPLOAD_TABLE}"' resp = upload_excel(EXCEL_FILENAME, EXCEL_UPLOAD_TABLE) assert fail_msg in resp if utils.backend() != "hive": # upload again with append mode resp = upload_excel(EXCEL_FILENAME, EXCEL_UPLOAD_TABLE, extra={"if_exists": "append"}) assert success_msg in resp # upload again with replace mode resp = upload_excel(EXCEL_FILENAME, EXCEL_UPLOAD_TABLE, extra={"if_exists": "replace"}) assert success_msg in resp # make sure that john and empty string are replaced with None data = (get_upload_db().get_sqla_engine().execute( f"SELECT * from {EXCEL_UPLOAD_TABLE}").fetchall()) assert data == [(0, "john", 1), (1, "paul", 2)]
def test_import_csv_enforced_schema(mock_event_logger): if utils.backend() == "sqlite": pytest.skip("Sqlite doesn't support schema / database creation") full_table_name = f"admin_database.{CSV_UPLOAD_TABLE_W_SCHEMA}" # no schema specified, fail upload resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE_W_SCHEMA, extra={"schema": None}) assert ( f'Database "{CSV_UPLOAD_DATABASE}" schema "None" is not allowed for csv uploads' in resp) success_msg = f'CSV file "{CSV_FILENAME1}" uploaded to table "{full_table_name}"' resp = upload_csv( CSV_FILENAME1, CSV_UPLOAD_TABLE_W_SCHEMA, extra={ "schema": "admin_database", "if_exists": "replace" }, ) assert success_msg in resp mock_event_logger.assert_called_with( action="successful_csv_upload", database=get_upload_db().name, schema="admin_database", table=CSV_UPLOAD_TABLE_W_SCHEMA, ) engine = get_upload_db().get_sqla_engine() data = engine.execute( f"SELECT * from {ADMIN_SCHEMA_NAME}.{CSV_UPLOAD_TABLE_W_SCHEMA}" ).fetchall() assert data == [("john", 1), ("paul", 2)] # user specified schema doesn't match, fail resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE_W_SCHEMA, extra={"schema": "gold"}) assert ( f'Database "{CSV_UPLOAD_DATABASE}" schema "gold" is not allowed for csv uploads' in resp) # user specified schema matches the expected schema, append if utils.backend() == "hive": pytest.skip("Hive database doesn't support append csv uploads.") resp = upload_csv( CSV_FILENAME1, CSV_UPLOAD_TABLE_W_SCHEMA, extra={ "schema": "admin_database", "if_exists": "append" }, ) assert success_msg in resp
def test_import_excel(mock_event_logger): if utils.backend() == "hive": pytest.skip("Hive doesn't excel upload.") schema = utils.get_example_default_schema() full_table_name = f"{schema}.{EXCEL_UPLOAD_TABLE}" if schema else EXCEL_UPLOAD_TABLE test_db = get_upload_db() success_msg = f'Excel file "{EXCEL_FILENAME}" uploaded to table "{full_table_name}"' # initial upload with fail mode resp = upload_excel(EXCEL_FILENAME, EXCEL_UPLOAD_TABLE) assert success_msg in resp mock_event_logger.assert_called_with( action="successful_excel_upload", database=test_db.name, schema=schema, table=EXCEL_UPLOAD_TABLE, ) # ensure user is assigned as an owner table = SupersetTestCase.get_table(name=EXCEL_UPLOAD_TABLE) assert security_manager.find_user("admin") in table.owners # upload again with fail mode; should fail fail_msg = f'Unable to upload Excel file "{EXCEL_FILENAME}" to table "{EXCEL_UPLOAD_TABLE}"' resp = upload_excel(EXCEL_FILENAME, EXCEL_UPLOAD_TABLE) assert fail_msg in resp if utils.backend() != "hive": # upload again with append mode resp = upload_excel(EXCEL_FILENAME, EXCEL_UPLOAD_TABLE, extra={"if_exists": "append"}) assert success_msg in resp # upload again with replace mode resp = upload_excel(EXCEL_FILENAME, EXCEL_UPLOAD_TABLE, extra={"if_exists": "replace"}) assert success_msg in resp mock_event_logger.assert_called_with( action="successful_excel_upload", database=test_db.name, schema=schema, table=EXCEL_UPLOAD_TABLE, ) # make sure that john and empty string are replaced with None data = (test_db.get_sqla_engine().execute( f"SELECT * from {EXCEL_UPLOAD_TABLE}").fetchall()) assert data == [(0, "john", 1), (1, "paul", 2)]
def test_import_excel(mock_event_logger): if utils.backend() == "hive": pytest.skip("Hive doesn't excel upload.") test_db = get_upload_db() success_msg = ( f'Excel file "{EXCEL_FILENAME}" uploaded to table "{EXCEL_UPLOAD_TABLE}"' ) # initial upload with fail mode resp = upload_excel(EXCEL_FILENAME, EXCEL_UPLOAD_TABLE) assert success_msg in resp mock_event_logger.assert_called_with( action="successful_excel_upload", database=test_db.name, schema=None, table=EXCEL_UPLOAD_TABLE, ) # upload again with fail mode; should fail fail_msg = f'Unable to upload Excel file "{EXCEL_FILENAME}" to table "{EXCEL_UPLOAD_TABLE}"' resp = upload_excel(EXCEL_FILENAME, EXCEL_UPLOAD_TABLE) assert fail_msg in resp if utils.backend() != "hive": # upload again with append mode resp = upload_excel( EXCEL_FILENAME, EXCEL_UPLOAD_TABLE, extra={"if_exists": "append"} ) assert success_msg in resp # upload again with replace mode resp = upload_excel( EXCEL_FILENAME, EXCEL_UPLOAD_TABLE, extra={"if_exists": "replace"} ) assert success_msg in resp mock_event_logger.assert_called_with( action="successful_excel_upload", database=test_db.name, schema=None, table=EXCEL_UPLOAD_TABLE, ) # make sure that john and empty string are replaced with None data = ( test_db.get_sqla_engine() .execute(f"SELECT * from {EXCEL_UPLOAD_TABLE}") .fetchall() ) assert data == [(0, "john", 1), (1, "paul", 2)]
def test_run_sync_query_dont_exist(setup_sqllab, ctas_method): sql_dont_exist = "SELECT name FROM table_dont_exist" result = run_sql(sql_dont_exist, cta=True, ctas_method=ctas_method) if backend() == "sqlite" and ctas_method == CtasMethod.VIEW: assert QueryStatus.SUCCESS == result["status"], result else: assert QueryStatus.FAILED == result["status"], result
def test_admin_permissions(self): if backend() == "hive": return self.assert_can_gamma(get_perm_tuples("Admin")) self.assert_can_alpha(get_perm_tuples("Admin")) self.assert_can_admin(get_perm_tuples("Admin"))
def test_run_async_query_cta_config(setup_sqllab, ctas_method): if backend() == "sqlite": # sqlite doesn't support schemas return tmp_table_name = f"{TEST_ASYNC_CTA_CONFIG}_{ctas_method.lower()}" result = run_sql( QUERY, cta=True, ctas_method=ctas_method, async_=True, tmp_table=tmp_table_name, ) time.sleep(CELERY_SLEEP_TIME) query = get_query_by_id(result["query"]["serverId"]) assert QueryStatus.SUCCESS == query.status assert get_select_star(tmp_table_name, schema=CTAS_SCHEMA_NAME) == query.select_sql assert ( f"CREATE {ctas_method} {CTAS_SCHEMA_NAME}.{tmp_table_name} AS \n{QUERY}" == query.executed_sql) delete_tmp_view_or_table(f"{CTAS_SCHEMA_NAME}.{tmp_table_name}", ctas_method)
def get_select_star(table: str, limit: int, schema: Optional[str] = None): if backend() in {"presto", "hive"}: schema = quote_f(schema) table = quote_f(table) if schema: return f"SELECT *\nFROM {schema}.{table}\nLIMIT {limit}" return f"SELECT *\nFROM {table}\nLIMIT {limit}"
def test_run_async_query_cta_config(test_client, ctas_method): if backend() in {"sqlite", "mysql"}: # sqlite doesn't support schemas, mysql is flaky return tmp_table_name = f"{TEST_ASYNC_CTA_CONFIG}_{ctas_method.lower()}" result = run_sql( test_client, QUERY, cta=True, ctas_method=ctas_method, async_=True, tmp_table=tmp_table_name, ) query = wait_for_success(result) assert QueryStatus.SUCCESS == query.status assert (get_select_star(tmp_table_name, limit=query.limit, schema=CTAS_SCHEMA_NAME) == query.select_sql) assert ( f"CREATE {ctas_method} {CTAS_SCHEMA_NAME}.{tmp_table_name} AS \n{QUERY}" == query.executed_sql) delete_tmp_view_or_table(f"{CTAS_SCHEMA_NAME}.{tmp_table_name}", ctas_method)
def test_create_dataset_same_name_different_schema(self): if backend() == "sqlite": # sqlite doesn't support schemas return example_db = get_example_database() example_db.get_sqla_engine().execute( f"CREATE TABLE {CTAS_SCHEMA_NAME}.birth_names AS SELECT 2 as two") self.login(username="******") table_data = { "database": example_db.id, "schema": CTAS_SCHEMA_NAME, "table_name": "birth_names", } uri = "api/v1/dataset/" rv = self.post_assert_metric(uri, table_data, "post") assert rv.status_code == 201 # cleanup data = json.loads(rv.data.decode("utf-8")) uri = f'api/v1/dataset/{data.get("id")}' rv = self.client.delete(uri) assert rv.status_code == 200 example_db.get_sqla_engine().execute( f"DROP TABLE {CTAS_SCHEMA_NAME}.birth_names")
def test_get_dashboard_view__user_access_with_dashboard_permission(self): if backend() == "hive": return # arrange username = random_str() new_role = f"role_{random_str()}" self.create_user_with_roles(username, [new_role], should_create_roles=True) slice = (db.session.query(Slice).filter_by( slice_name="Girl Name Cloud").one_or_none()) dashboard_to_access = create_dashboard_to_db(published=True, slices=[slice]) self.login(username) grant_access_to_dashboard(dashboard_to_access, new_role) # act response = self.get_dashboard_view_response(dashboard_to_access) # assert self.assert200(response) request_payload = get_query_context("birth_names") rv = self.post_assert_metric(CHART_DATA_URI, request_payload, "data") self.assertEqual(rv.status_code, 200) # post revoke_access_to_dashboard(dashboard_to_access, new_role)
def test_run_sync_query_cta_config(test_client, ctas_method): if backend() == "sqlite": # sqlite doesn't support schemas return tmp_table_name = f"{TEST_SYNC_CTA}_{ctas_method.lower()}" result = run_sql(test_client, QUERY, cta=True, ctas_method=ctas_method, tmp_table=tmp_table_name) assert QueryStatus.SUCCESS == result["query"]["state"], result assert cta_result(ctas_method) == (result["data"], result["columns"]) query = get_query_by_id(result["query"]["serverId"]) assert ( f"CREATE {ctas_method} {CTAS_SCHEMA_NAME}.{tmp_table_name} AS \n{QUERY}" == query.executed_sql) assert query.select_sql == get_select_star(tmp_table_name, limit=query.limit, schema=CTAS_SCHEMA_NAME) results = run_sql(test_client, query.select_sql) assert QueryStatus.SUCCESS == results["status"], result delete_tmp_view_or_table(f"{CTAS_SCHEMA_NAME}.{tmp_table_name}", ctas_method)
def test_run_sync_query_dont_exist(test_client, ctas_method): examples_db = get_example_database() engine_name = examples_db.db_engine_spec.engine_name sql_dont_exist = "SELECT name FROM table_dont_exist" result = run_sql(test_client, sql_dont_exist, cta=True, ctas_method=ctas_method) if backend() == "sqlite" and ctas_method == CtasMethod.VIEW: assert QueryStatus.SUCCESS == result["status"], result elif backend() == "presto": assert (result["errors"][0]["error_type"] == SupersetErrorType.TABLE_DOES_NOT_EXIST_ERROR) assert result["errors"][0]["level"] == ErrorLevel.ERROR assert result["errors"][0]["extra"] == { "engine_name": "Presto", "issue_codes": [ { "code": 1003, "message": "Issue 1003 - There is a syntax error in the SQL query. Perhaps there was a misspelling or a typo.", }, { "code": 1005, "message": "Issue 1005 - The table was deleted or renamed in the database.", }, ], } else: assert (result["errors"][0]["error_type"] == SupersetErrorType.GENERIC_DB_ENGINE_ERROR) assert result["errors"][0]["level"] == ErrorLevel.ERROR assert result["errors"][0]["extra"] == { "issue_codes": [{ "code": 1002, "message": "Issue 1002 - The database returned an unexpected error.", }], "engine_name": engine_name, }
def test_import_csv_explore_database(setup_csv_upload, create_csv_files): if utils.backend() == "sqlite": pytest.skip("Sqlite doesn't support schema / database creation") resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE_W_EXPLORE) assert ( f'CSV file "{CSV_FILENAME1}" uploaded to table "{CSV_UPLOAD_TABLE_W_EXPLORE}"' in resp) table = SupersetTestCase.get_table_by_name(CSV_UPLOAD_TABLE_W_EXPLORE) assert table.database_id == utils.get_example_database().id
def test_import_csv_explore_database(setup_csv_upload, create_csv_files): schema = utils.get_example_default_schema() full_table_name = (f"{schema}.{CSV_UPLOAD_TABLE_W_EXPLORE}" if schema else CSV_UPLOAD_TABLE_W_EXPLORE) if utils.backend() == "sqlite": pytest.skip("Sqlite doesn't support schema / database creation") resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE_W_EXPLORE) assert f'CSV file "{CSV_FILENAME1}" uploaded to table "{full_table_name}"' in resp table = SupersetTestCase.get_table(name=CSV_UPLOAD_TABLE_W_EXPLORE) assert table.database_id == utils.get_example_database().id
def test_run_async_cta_query_with_lower_limit(setup_sqllab, ctas_method): if backend() == "mysql": # failing return tmp_table = f"{TEST_ASYNC_LOWER_LIMIT}_{ctas_method.lower()}" result = run_sql(QUERY, cta=True, ctas_method=ctas_method, async_=True, tmp_table=tmp_table) query = wait_for_success(result) assert QueryStatus.SUCCESS == query.status assert get_select_star(tmp_table) == query.select_sql assert f"CREATE {ctas_method} {tmp_table} AS \n{QUERY}" == query.executed_sql assert QUERY == query.sql assert query.rows == (1 if backend() == "presto" else 0) assert query.limit is None assert query.select_as_cta assert query.select_as_cta_used delete_tmp_view_or_table(tmp_table, ctas_method)
def test_sql_json(self): examples_db = get_example_database() engine_name = examples_db.db_engine_spec.engine_name self.login("admin") data = self.run_sql("SELECT * FROM birth_names LIMIT 10", "1") self.assertLess(0, len(data["data"])) data = self.run_sql("SELECT * FROM unexistant_table", "2") if backend() == "presto": assert (data["errors"][0]["error_type"] == SupersetErrorType.TABLE_DOES_NOT_EXIST_ERROR) assert data["errors"][0]["level"] == ErrorLevel.ERROR assert data["errors"][0]["extra"] == { "engine_name": "Presto", "issue_codes": [ { "code": 1003, "message": "Issue 1003 - There is a syntax error in the SQL query. Perhaps there was a misspelling or a typo.", }, { "code": 1005, "message": "Issue 1005 - The table was deleted or renamed in the database.", }, ], } else: assert (data["errors"][0]["error_type"] == SupersetErrorType.GENERIC_DB_ENGINE_ERROR) assert data["errors"][0]["level"] == ErrorLevel.ERROR assert data["errors"][0]["extra"] == { "issue_codes": [{ "code": 1002, "message": "Issue 1002 - The database returned an unexpected error.", }], "engine_name": engine_name, }
def test_run_sync_query_dont_exist(setup_sqllab, ctas_method): sql_dont_exist = "SELECT name FROM table_dont_exist" result = run_sql(sql_dont_exist, cta=True, ctas_method=ctas_method) if backend() == "sqlite" and ctas_method == CtasMethod.VIEW: assert QueryStatus.SUCCESS == result["status"], result else: assert (result["errors"][0]["error_type"] == SupersetErrorType.GENERIC_DB_ENGINE_ERROR) assert result["errors"][0]["level"] == ErrorLevel.ERROR assert result["errors"][0]["extra"] == { "issue_codes": [{ "code": 1002, "message": "Issue 1002 - The database returned an unexpected error.", }] }
def cta_result(ctas_method: CtasMethod): if backend() != "presto": return [], [] if ctas_method == CtasMethod.TABLE: return [{ "rows": 1 }], [{ "name": "rows", "type": "BIGINT", "is_dttm": False }] return [{ "result": True }], [{ "name": "result", "type": "BOOLEAN", "is_dttm": False }]
def test_run_async_cta_query_with_lower_limit(setup_sqllab, ctas_method): tmp_table = f"{TEST_ASYNC_LOWER_LIMIT}_{ctas_method.lower()}" result = run_sql(QUERY, cta=True, ctas_method=ctas_method, async_=True, tmp_table=tmp_table) time.sleep(CELERY_SLEEP_TIME) query = get_query_by_id(result["query"]["serverId"]) assert QueryStatus.SUCCESS == query.status assert get_select_star(tmp_table) == query.select_sql assert f"CREATE {ctas_method} {tmp_table} AS \n{QUERY}" == query.executed_sql assert QUERY == query.sql assert query.rows == (1 if backend() == "presto" else 0) assert query.limit is None assert query.select_as_cta assert query.select_as_cta_used
def test_should_generate_closed_and_open_time_filter_range(): with app.app_context(): if backend() != "postgresql": pytest.skip( f"{backend()} has different dialect for datetime column") table = SqlaTable( table_name="temporal_column_table", sql=("SELECT '2021-12-31'::timestamp as datetime_col " "UNION SELECT '2022-01-01'::timestamp " "UNION SELECT '2022-03-10'::timestamp " "UNION SELECT '2023-01-01'::timestamp " "UNION SELECT '2023-03-10'::timestamp "), database=get_example_database(), ) TableColumn( column_name="datetime_col", type="TIMESTAMP", table=table, is_dttm=True, ) SqlMetric(metric_name="count", expression="count(*)", table=table) result_object = table.query({ "metrics": ["count"], "is_timeseries": False, "filter": [], "from_dttm": datetime(2022, 1, 1), "to_dttm": datetime(2023, 1, 1), "granularity": "datetime_col", }) """ >>> result_object.query SELECT count(*) AS count FROM (SELECT '2021-12-31'::timestamp as datetime_col UNION SELECT '2022-01-01'::timestamp UNION SELECT '2022-03-10'::timestamp UNION SELECT '2023-01-01'::timestamp UNION SELECT '2023-03-10'::timestamp) AS virtual_table WHERE datetime_col >= TO_TIMESTAMP('2022-01-01 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US') AND datetime_col < TO_TIMESTAMP('2023-01-01 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US') """ assert result_object.df.iloc[0]["count"] == 2
def test_run_async_cta_query(setup_sqllab, ctas_method): table_name = f"{TEST_ASYNC_CTA}_{ctas_method.lower()}" result = run_sql(QUERY, cta=True, ctas_method=ctas_method, async_=True, tmp_table=table_name) time.sleep(CELERY_SLEEP_TIME) query = get_query_by_id(result["query"]["serverId"]) assert QueryStatus.SUCCESS == query.status assert get_select_star(table_name) in query.select_sql assert f"CREATE {ctas_method} {table_name} AS \n{QUERY}" == query.executed_sql assert QUERY == query.sql assert query.rows == (1 if backend() == "presto" else 0) assert query.select_as_cta assert query.select_as_cta_used delete_tmp_view_or_table(table_name, ctas_method)
def test_run_sync_query_cta_config(setup_sqllab, ctas_method): if backend() == "sqlite": # sqlite doesn't support schemas return tmp_table_name = f"{TEST_SYNC_CTA}_{ctas_method.lower()}" result = run_sql(QUERY, cta=True, ctas_method=ctas_method, tmp_table=tmp_table_name) assert QueryStatus.SUCCESS == result["query"]["state"], result assert cta_result(ctas_method) == (result["data"], result["columns"]) query = get_query_by_id(result["query"]["serverId"]) assert ( f"CREATE {ctas_method} {CTAS_SCHEMA_NAME}.{tmp_table_name} AS \n{QUERY}" == query.executed_sql) assert query.select_sql == get_select_star(tmp_table_name, schema=CTAS_SCHEMA_NAME) time.sleep(CELERY_SLEEP_TIME) results = run_sql(query.select_sql) assert QueryStatus.SUCCESS == results["status"], result
def test_import_csv_enforced_schema(setup_csv_upload, create_csv_files): if utils.backend() == "sqlite": pytest.skip("Sqlite doesn't support schema / database creation") full_table_name = f"admin_database.{CSV_UPLOAD_TABLE_W_SCHEMA}" # no schema specified, fail upload resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE_W_SCHEMA) assert ( f'Database "{CSV_UPLOAD_DATABASE}" schema "None" is not allowed for csv uploads' in resp ) # user specified schema matches the expected schema, append success_msg = f'CSV file "{CSV_FILENAME1}" uploaded to table "{full_table_name}"' resp = upload_csv( CSV_FILENAME1, CSV_UPLOAD_TABLE_W_SCHEMA, extra={"schema": "admin_database", "if_exists": "append"}, ) assert success_msg in resp resp = upload_csv( CSV_FILENAME1, CSV_UPLOAD_TABLE_W_SCHEMA, extra={"schema": "admin_database", "if_exists": "replace"}, ) assert success_msg in resp # user specified schema doesn't match, fail resp = upload_csv( CSV_FILENAME1, CSV_UPLOAD_TABLE_W_SCHEMA, extra={"schema": "gold"} ) assert ( f'Database "{CSV_UPLOAD_DATABASE}" schema "gold" is not allowed for csv uploads' in resp )
def test_export_database_command(self, mock_g): mock_g.user = security_manager.find_user("admin") example_db = get_example_database() db_uuid = example_db.uuid command = ExportDatabasesCommand([example_db.id]) contents = dict(command.run()) # TODO: this list shouldn't depend on the order in which unit tests are run # or on the backend; for now use a stable subset core_files = { "metadata.yaml", "databases/examples.yaml", "datasets/examples/energy_usage.yaml", "datasets/examples/wb_health_population.yaml", "datasets/examples/birth_names.yaml", } expected_extra = { "engine_params": {}, "metadata_cache_timeout": {}, "metadata_params": {}, "schemas_allowed_for_csv_upload": [], } if backend() == "presto": expected_extra = { "engine_params": { "connect_args": { "poll_interval": 0.1 } } } assert core_files.issubset(set(contents.keys())) if example_db.backend == "postgresql": ds_type = "TIMESTAMP WITHOUT TIME ZONE" elif example_db.backend == "hive": ds_type = "TIMESTAMP" elif example_db.backend == "presto": ds_type = "VARCHAR(255)" else: ds_type = "DATETIME" if example_db.backend == "mysql": big_int_type = "BIGINT(20)" else: big_int_type = "BIGINT" metadata = yaml.safe_load(contents["databases/examples.yaml"]) assert metadata == ({ "allow_csv_upload": True, "allow_ctas": True, "allow_cvas": True, "allow_run_async": False, "cache_timeout": None, "database_name": "examples", "expose_in_sqllab": True, "extra": expected_extra, "sqlalchemy_uri": example_db.sqlalchemy_uri, "uuid": str(example_db.uuid), "version": "1.0.0", }) metadata = yaml.safe_load( contents["datasets/examples/birth_names.yaml"]) metadata.pop("uuid") metadata["columns"].sort(key=lambda x: x["column_name"]) expected_metadata = { "cache_timeout": None, "columns": [ { "column_name": "ds", "description": None, "expression": None, "filterable": True, "groupby": True, "is_active": True, "is_dttm": True, "python_date_format": None, "type": ds_type, "verbose_name": None, }, { "column_name": "gender", "description": None, "expression": None, "filterable": True, "groupby": True, "is_active": True, "is_dttm": False, "python_date_format": None, "type": "STRING" if example_db.backend == "hive" else "VARCHAR(16)", "verbose_name": None, }, { "column_name": "name", "description": None, "expression": None, "filterable": True, "groupby": True, "is_active": True, "is_dttm": False, "python_date_format": None, "type": "STRING" if example_db.backend == "hive" else "VARCHAR(255)", "verbose_name": None, }, { "column_name": "num", "description": None, "expression": None, "filterable": True, "groupby": True, "is_active": True, "is_dttm": False, "python_date_format": None, "type": big_int_type, "verbose_name": None, }, { "column_name": "num_california", "description": None, "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", "filterable": True, "groupby": True, "is_active": True, "is_dttm": False, "python_date_format": None, "type": None, "verbose_name": None, }, { "column_name": "state", "description": None, "expression": None, "filterable": True, "groupby": True, "is_active": True, "is_dttm": False, "python_date_format": None, "type": "STRING" if example_db.backend == "hive" else "VARCHAR(10)", "verbose_name": None, }, { "column_name": "num_boys", "description": None, "expression": None, "filterable": True, "groupby": True, "is_active": True, "is_dttm": False, "python_date_format": None, "type": big_int_type, "verbose_name": None, }, { "column_name": "num_girls", "description": None, "expression": None, "filterable": True, "groupby": True, "is_active": True, "is_dttm": False, "python_date_format": None, "type": big_int_type, "verbose_name": None, }, ], "database_uuid": str(db_uuid), "default_endpoint": None, "description": "", "extra": None, "fetch_values_predicate": None, "filter_select_enabled": True, "main_dttm_col": "ds", "metrics": [ { "d3format": None, "description": None, "expression": "COUNT(*)", "extra": None, "metric_name": "count", "metric_type": "count", "verbose_name": "COUNT(*)", "warning_text": None, }, { "d3format": None, "description": None, "expression": "SUM(num)", "extra": None, "metric_name": "sum__num", "metric_type": None, "verbose_name": None, "warning_text": None, }, ], "offset": 0, "params": None, "schema": None, "sql": None, "table_name": "birth_names", "template_params": None, "version": "1.0.0", } expected_metadata["columns"].sort(key=lambda x: x["column_name"]) assert metadata == expected_metadata
def test_export_database_command(self, mock_g): mock_g.user = security_manager.find_user("admin") example_db = get_example_database() command = ExportDatabasesCommand([example_db.id]) contents = dict(command.run()) # TODO: this list shouldn't depend on the order in which unit tests are run # or on the backend; for now use a stable subset core_files = { "metadata.yaml", "databases/examples.yaml", "datasets/examples/energy_usage.yaml", "datasets/examples/wb_health_population.yaml", "datasets/examples/birth_names.yaml", } expected_extra = { "engine_params": {}, "metadata_cache_timeout": {}, "metadata_params": {}, "schemas_allowed_for_csv_upload": [], } if backend() == "presto": expected_extra = { "engine_params": { "connect_args": { "poll_interval": 0.1 } } } assert core_files.issubset(set(contents.keys())) metadata = yaml.safe_load(contents["databases/examples.yaml"]) assert metadata == ({ "allow_csv_upload": True, "allow_ctas": True, "allow_cvas": True, "allow_run_async": False, "cache_timeout": None, "database_name": "examples", "expose_in_sqllab": True, "extra": expected_extra, "sqlalchemy_uri": example_db.sqlalchemy_uri, "uuid": str(example_db.uuid), "version": "1.0.0", }) metadata = yaml.safe_load( contents["datasets/examples/birth_names.yaml"]) metadata.pop("uuid") assert metadata == { "table_name": "birth_names", "main_dttm_col": None, "description": "Adding a DESCRip", "default_endpoint": "", "offset": 66, "cache_timeout": 55, "schema": "", "sql": "", "params": None, "template_params": None, "filter_select_enabled": True, "fetch_values_predicate": None, "extra": None, "metrics": [ { "metric_name": "ratio", "verbose_name": "Ratio Boys/Girls", "metric_type": None, "expression": "sum(sum_boys) / sum(sum_girls)", "description": "This represents the ratio of boys/girls", "d3format": ".2%", "extra": None, "warning_text": "no warning", }, { "metric_name": "sum__num", "verbose_name": "Babies", "metric_type": None, "expression": "SUM(num)", "description": "", "d3format": "", "extra": None, "warning_text": "", }, { "metric_name": "count", "verbose_name": "", "metric_type": None, "expression": "count(1)", "description": None, "d3format": None, "extra": None, "warning_text": None, }, ], "columns": [ { "column_name": "num_california", "verbose_name": None, "is_dttm": False, "is_active": None, "type": "NUMBER", "groupby": False, "filterable": False, "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", "description": None, "python_date_format": None, }, { "column_name": "ds", "verbose_name": "", "is_dttm": True, "is_active": None, "type": "DATETIME", "groupby": True, "filterable": True, "expression": "", "description": None, "python_date_format": None, }, { "column_name": "sum_girls", "verbose_name": None, "is_dttm": False, "is_active": None, "type": "BIGINT(20)", "groupby": False, "filterable": False, "expression": "", "description": None, "python_date_format": None, }, { "column_name": "gender", "verbose_name": None, "is_dttm": False, "is_active": None, "type": "VARCHAR(16)", "groupby": True, "filterable": True, "expression": "", "description": None, "python_date_format": None, }, { "column_name": "state", "verbose_name": None, "is_dttm": None, "is_active": None, "type": "VARCHAR(10)", "groupby": True, "filterable": True, "expression": None, "description": None, "python_date_format": None, }, { "column_name": "sum_boys", "verbose_name": None, "is_dttm": None, "is_active": None, "type": "BIGINT(20)", "groupby": True, "filterable": True, "expression": None, "description": None, "python_date_format": None, }, { "column_name": "num", "verbose_name": None, "is_dttm": None, "is_active": None, "type": "BIGINT(20)", "groupby": True, "filterable": True, "expression": None, "description": None, "python_date_format": None, }, { "column_name": "name", "verbose_name": None, "is_dttm": None, "is_active": None, "type": "VARCHAR(255)", "groupby": True, "filterable": True, "expression": None, "description": None, "python_date_format": None, }, ], "version": "1.0.0", "database_uuid": str(example_db.uuid), }
def test_import_csv(setup_csv_upload, create_csv_files): success_msg_f1 = ( f'CSV file "{CSV_FILENAME1}" uploaded to table "{CSV_UPLOAD_TABLE}"') # initial upload with fail mode resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE) assert success_msg_f1 in resp # upload again with fail mode; should fail fail_msg = ( f'Unable to upload CSV file "{CSV_FILENAME1}" to table "{CSV_UPLOAD_TABLE}"' ) resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE) assert fail_msg in resp if utils.backend() != "hive": # upload again with append mode resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE, extra={"if_exists": "append"}) assert success_msg_f1 in resp # upload again with replace mode resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE, extra={"if_exists": "replace"}) assert success_msg_f1 in resp # try to append to table from file with different schema resp = upload_csv(CSV_FILENAME2, CSV_UPLOAD_TABLE, extra={"if_exists": "append"}) fail_msg_f2 = ( f'Unable to upload CSV file "{CSV_FILENAME2}" to table "{CSV_UPLOAD_TABLE}"' ) assert fail_msg_f2 in resp # replace table from file with different schema resp = upload_csv(CSV_FILENAME2, CSV_UPLOAD_TABLE, extra={"if_exists": "replace"}) success_msg_f2 = ( f'CSV file "{CSV_FILENAME2}" uploaded to table "{CSV_UPLOAD_TABLE}"') assert success_msg_f2 in resp table = SupersetTestCase.get_table_by_name(CSV_UPLOAD_TABLE) # make sure the new column name is reflected in the table metadata assert "d" in table.column_names # null values are set upload_csv( CSV_FILENAME2, CSV_UPLOAD_TABLE, extra={ "null_values": '["", "john"]', "if_exists": "replace" }, ) # make sure that john and empty string are replaced with None engine = get_upload_db().get_sqla_engine() data = engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE}").fetchall() assert data == [(None, 1, "x"), ("paul", 2, None)] # default null values upload_csv(CSV_FILENAME2, CSV_UPLOAD_TABLE, extra={"if_exists": "replace"}) # make sure that john and empty string are replaced with None data = engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE}").fetchall() assert data == [("john", 1, "x"), ("paul", 2, None)]
def test_import_csv(setup_csv_upload, create_csv_files): success_msg_f1 = ( f'CSV file "{CSV_FILENAME1}" uploaded to table "{CSV_UPLOAD_TABLE}"') # initial upload with fail mode resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE) assert success_msg_f1 in resp # upload again with fail mode; should fail fail_msg = ( f'Unable to upload CSV file "{CSV_FILENAME1}" to table "{CSV_UPLOAD_TABLE}"' ) resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE) assert fail_msg in resp if utils.backend() != "hive": # upload again with append mode resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE, extra={"if_exists": "append"}) assert success_msg_f1 in resp # upload again with replace mode resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE, extra={"if_exists": "replace"}) assert success_msg_f1 in resp # try to append to table from file with different schema resp = upload_csv(CSV_FILENAME2, CSV_UPLOAD_TABLE, extra={"if_exists": "append"}) fail_msg_f2 = ( f'Unable to upload CSV file "{CSV_FILENAME2}" to table "{CSV_UPLOAD_TABLE}"' ) assert fail_msg_f2 in resp # replace table from file with different schema resp = upload_csv(CSV_FILENAME2, CSV_UPLOAD_TABLE, extra={"if_exists": "replace"}) success_msg_f2 = ( f'CSV file "{CSV_FILENAME2}" uploaded to table "{CSV_UPLOAD_TABLE}"') assert success_msg_f2 in resp table = SupersetTestCase.get_table_by_name(CSV_UPLOAD_TABLE) # make sure the new column name is reflected in the table metadata assert "d" in table.column_names # null values are set upload_csv( CSV_FILENAME2, CSV_UPLOAD_TABLE, extra={ "null_values": '["", "john"]', "if_exists": "replace" }, ) # make sure that john and empty string are replaced with None engine = get_upload_db().get_sqla_engine() data = engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE}").fetchall() if utils.backend() == "hive": # Be aware that hive only uses first value from the null values list. # It is hive database engine limitation. # TODO(bkyryliuk): preprocess csv file for hive upload to match default engine capabilities. assert data == [("john", 1, "x"), ("paul", 2, None)] else: assert data == [(None, 1, "x"), ("paul", 2, None)] # default null values upload_csv(CSV_FILENAME2, CSV_UPLOAD_TABLE, extra={"if_exists": "replace"}) # make sure that john and empty string are replaced with None data = engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE}").fetchall() if utils.backend() == "hive": # By default hive does not convert values to null vs other databases. assert data == [("john", 1, "x"), ("paul", 2, "")] else: assert data == [("john", 1, "x"), ("paul", 2, None)]