def create_slice( datasource_id: Optional[int] = None, datasource: Optional[SqlaTable] = None, name: Optional[str] = None, owners: Optional[List[User]] = None, ) -> Slice: name = name if name is not None else random_str() owners = owners if owners is not None else [] datasource_type = "table" if datasource: return Slice( slice_name=name, table=datasource, owners=owners, datasource_type=datasource_type, ) datasource_id = (datasource_id if datasource_id is not None else create_datasource_table_to_db(name=name + "_table").id) return Slice( slice_name=name, datasource_id=datasource_id, owners=owners, datasource_type=datasource_type, )
def create_slice( self, name, ds_id=None, id=None, db_name="examples", table_name="wb_health_population", ): params = { "num_period_compare": "10", "remote_id": id, "datasource_name": table_name, "database_name": db_name, "schema": "", # Test for trailing commas "metrics": ["sum__signup_attempt_email", "sum__signup_attempt_facebook"], } if table_name and not ds_id: table = self.get_table_by_name(table_name) if table: ds_id = table.id return Slice( slice_name=name, datasource_type="table", viz_type="bubble", params=json.dumps(params), datasource_id=ds_id, id=id, )
def load_multi_line(only_metadata: bool = False) -> None: load_world_bank_health_n_pop(only_metadata) load_birth_names(only_metadata) ids = [ row.id for row in db.session.query(Slice).filter( Slice.slice_name.in_(["Growth Rate", "Trends"])) ] slc = Slice( datasource_type=DatasourceType.TABLE, # not true, but needed datasource_id=1, # cannot be empty slice_name="Multi Line", viz_type="line_multi", params=json.dumps({ "slice_name": "Multi Line", "viz_type": "line_multi", "line_charts": [ids[0]], "line_charts_2": [ids[1]], "since": "1970", "until": "1995", "prefix_metric_with_slice_name": True, "show_legend": False, "x_axis_format": "%Y", }), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc)
def decode_dashboards( # pylint: disable=too-many-return-statements o: Dict[str, Any]) -> Any: """ Function to be passed into json.loads obj_hook parameter Recreates the dashboard object from a json representation. """ from superset.connectors.druid.models import ( DruidCluster, DruidColumn, DruidDatasource, DruidMetric, ) if "__Dashboard__" in o: return Dashboard(**o["__Dashboard__"]) if "__Slice__" in o: return Slice(**o["__Slice__"]) if "__TableColumn__" in o: return TableColumn(**o["__TableColumn__"]) if "__SqlaTable__" in o: return SqlaTable(**o["__SqlaTable__"]) if "__SqlMetric__" in o: return SqlMetric(**o["__SqlMetric__"]) if "__DruidCluster__" in o: return DruidCluster(**o["__DruidCluster__"]) if "__DruidColumn__" in o: return DruidColumn(**o["__DruidColumn__"]) if "__DruidDatasource__" in o: return DruidDatasource(**o["__DruidDatasource__"]) if "__DruidMetric__" in o: return DruidMetric(**o["__DruidMetric__"]) if "__datetime__" in o: return datetime.strptime(o["__datetime__"], "%Y-%m-%dT%H:%M:%S") return o
def new_slice(name=None, table=None, tags=None): """Create a new test slice (and test table if none specified)""" if name is None: name = "slice-%s" % uuid.uuid4() if table is None: table = create_table(tags=tags) if tags is None: tags = ['test'] slyce = Slice( slice_name=name, datasource_type='table', datasource_name=table.datasource_name, viz_type='bubble', params=json.dumps( dict( tags=tags, database_name=table.database_name, datasource_name=table.datasource_name, schema=table.schema, metrics=[], )), ) # NOTE that we don't actually import the slice here - it needs to # be attached to a dashboard for that to make sense return slyce
def test_treemap_migrate(app_context: SupersetApp) -> None: from superset.models.slice import Slice slc = Slice( viz_type=MigrateTreeMap.source_viz_type, datasource_type="table", params=treemap_form_data, query_context=f'{{"form_data": {treemap_form_data}}}', ) slc = MigrateTreeMap.upgrade_slice(slc) assert slc.viz_type == MigrateTreeMap.target_viz_type # verify form_data new_form_data = json.loads(slc.params) assert new_form_data["metric"] == "sum__num" assert new_form_data["viz_type"] == "treemap_v2" assert "metrics" not in new_form_data assert json.dumps(new_form_data["form_data_bak"], sort_keys=True) == json.dumps( json.loads(treemap_form_data), sort_keys=True ) # verify query_context new_query_context = json.loads(slc.query_context) assert new_query_context["form_data"]["viz_type"] == "treemap_v2" # downgrade slc = MigrateTreeMap.downgrade_slice(slc) assert slc.viz_type == MigrateTreeMap.source_viz_type assert json.dumps(json.loads(slc.params), sort_keys=True) == json.dumps( json.loads(treemap_form_data), sort_keys=True )
def test_data_for_slices_with_adhoc_column(self): # should perform sqla.model.BaseDatasource.data_for_slices() with adhoc # column and legacy chart tbl = self.get_table(name="birth_names") dashboard = self.get_dash_by_slug("births") slc = Slice( slice_name="slice with adhoc column", datasource_type="table", viz_type="table", params=json.dumps({ "adhoc_filters": [], "granularity_sqla": "ds", "groupby": [ "name", { "label": "adhoc_column", "sqlExpression": "name" }, ], "metrics": ["sum__num"], "time_range": "No filter", "viz_type": "table", }), datasource_id=tbl.id, ) dashboard.slices.append(slc) datasource_info = slc.datasource.data_for_slices([slc]) assert "database" in datasource_info # clean up and auto commit metadata_db.session.delete(slc)
def insert_chart( self, slice_name: str, owners: List[int], datasource_id: int, created_by=None, datasource_type: str = "table", description: Optional[str] = None, viz_type: Optional[str] = None, params: Optional[str] = None, cache_timeout: Optional[int] = None, ) -> Slice: obj_owners = list() for owner in owners: user = db.session.query(security_manager.user_model).get(owner) obj_owners.append(user) datasource = ConnectorRegistry.get_datasource(datasource_type, datasource_id, db.session) slice = Slice( cache_timeout=cache_timeout, created_by=created_by, datasource_id=datasource.id, datasource_name=datasource.name, datasource_type=datasource.type, description=description, owners=obj_owners, params=params, slice_name=slice_name, viz_type=viz_type, ) db.session.add(slice) db.session.commit() return slice
def load_random_time_series_data(only_metadata: bool = False, force: bool = False) -> None: """Loading random time series data from a zip file in the repo""" tbl_name = "random_time_series" database = utils.get_example_database() table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): data = get_example_data("random_time_series.json.gz") pdf = pd.read_json(data) if database.backend == "presto": pdf.ds = pd.to_datetime(pdf.ds, unit="s") pdf.ds = pdf.ds.dt.strftime("%Y-%m-%d %H:%M%:%S") else: pdf.ds = pd.to_datetime(pdf.ds, unit="s") pdf.to_sql( tbl_name, database.get_sqla_engine(), if_exists="replace", chunksize=500, dtype={ "ds": DateTime if database.backend != "presto" else String(255) }, index=False, ) print("Done loading table!") print("-" * 80) print(f"Creating table [{tbl_name}] reference") obj = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not obj: obj = TBL(table_name=tbl_name) obj.main_dttm_col = "ds" obj.database = database db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj slice_data = { "granularity_sqla": "day", "row_limit": config["ROW_LIMIT"], "since": "2019-01-01", "until": "2019-02-01", "metric": "count", "viz_type": "cal_heatmap", "domain_granularity": "month", "subdomain_granularity": "day", } print("Creating a slice") slc = Slice( slice_name="Calendar Heatmap", viz_type="cal_heatmap", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(slice_data), ) merge_slice(slc)
def create_slice(title: str, viz_type: str, table: SqlaTable, slices_dict: Dict[str, str]) -> Slice: return Slice( slice_name=title, viz_type=viz_type, datasource_type=DatasourceType.TABLE, datasource_id=table.id, params=json.dumps(slices_dict, indent=4, sort_keys=True), )
def test_set_perm_slice(self): session = db.session database = Database(database_name="tmp_database", sqlalchemy_uri="sqlite://test") table = SqlaTable(table_name="tmp_perm_table", database=database) session.add(database) session.add(table) session.commit() # no schema permission slice = Slice( datasource_id=table.id, datasource_type="table", datasource_name="tmp_perm_table", slice_name="slice_name", ) session.add(slice) session.commit() slice = session.query(Slice).filter_by(slice_name="slice_name").one() self.assertEquals(slice.perm, table.perm) self.assertEquals(slice.perm, f"[tmp_database].[tmp_perm_table](id:{table.id})") self.assertEquals(slice.schema_perm, table.schema_perm) self.assertIsNone(slice.schema_perm) table.schema = "tmp_perm_schema" table.table_name = "tmp_perm_table_v2" session.commit() # TODO(bogdan): modify slice permissions on the table update. self.assertNotEquals(slice.perm, table.perm) self.assertEquals(slice.perm, f"[tmp_database].[tmp_perm_table](id:{table.id})") self.assertEquals( table.perm, f"[tmp_database].[tmp_perm_table_v2](id:{table.id})") # TODO(bogdan): modify slice schema permissions on the table update. self.assertNotEquals(slice.schema_perm, table.schema_perm) self.assertIsNone(slice.schema_perm) # updating slice refreshes the permissions slice.slice_name = "slice_name_v2" session.commit() self.assertEquals(slice.perm, table.perm) self.assertEquals( slice.perm, f"[tmp_database].[tmp_perm_table_v2](id:{table.id})") self.assertEquals(slice.schema_perm, table.schema_perm) self.assertEquals(slice.schema_perm, "[tmp_database].[tmp_perm_schema]") session.delete(slice) session.delete(table) session.delete(database) session.commit()
def test_saved_chart_is_admin(mocker: MockFixture, app_context: AppContext) -> None: from superset.connectors.sqla.models import SqlaTable from superset.explore.form_data.utils import check_access from superset.models.slice import Slice mocker.patch(dataset_find_by_id, return_value=SqlaTable()) mocker.patch(can_access_datasource, return_value=True) mocker.patch(is_user_admin, return_value=True) mocker.patch(chart_find_by_id, return_value=Slice()) assert check_access(dataset_id=1, chart_id=1, actor=User()) == True
def create_slice(datasource_id: Optional[int], name: Optional[str], owners: Optional[List[User]]) -> Slice: name = name or random_str() owners = owners or [] datasource_id = (datasource_id or create_datasource_table_to_db(name=name + "_table").id) return Slice( slice_name=name, datasource_id=datasource_id, owners=owners, datasource_type="table", )
def test_saved_chart_no_access(mocker: MockFixture, app_context: AppContext) -> None: from superset.connectors.sqla.models import SqlaTable from superset.explore.form_data.utils import check_access from superset.models.slice import Slice with raises(ChartAccessDeniedError): mocker.patch(dataset_find_by_id, return_value=SqlaTable()) mocker.patch(can_access_datasource, return_value=True) mocker.patch(is_user_admin, return_value=False) mocker.patch(is_owner, return_value=False) mocker.patch(can_access, return_value=False) mocker.patch(chart_find_by_id, return_value=Slice()) check_access(dataset_id=1, chart_id=1, actor=User())
def test_saved_chart_is_admin(mocker: MockFixture) -> None: from superset.connectors.sqla.models import SqlaTable from superset.explore.utils import check_access as check_chart_access from superset.models.slice import Slice mocker.patch(dataset_find_by_id, return_value=SqlaTable()) mocker.patch(can_access_datasource, return_value=True) mocker.patch(is_admin, return_value=True) mocker.patch(chart_find_by_id, return_value=Slice()) with override_user(User()): check_chart_access( datasource_id=1, chart_id=1, datasource_type=DatasourceType.TABLE, )
def test_saved_chart_is_owner(mocker: MockFixture, app_context: AppContext) -> None: from superset.connectors.sqla.models import SqlaTable from superset.explore.utils import check_access as check_chart_access from superset.models.slice import Slice mocker.patch(dataset_find_by_id, return_value=SqlaTable()) mocker.patch(can_access_datasource, return_value=True) mocker.patch(is_user_admin, return_value=False) mocker.patch(is_owner, return_value=True) mocker.patch(chart_find_by_id, return_value=Slice()) check_chart_access( datasource_id=1, chart_id=1, actor=User(), datasource_type=DatasourceType.TABLE, )
def session_with_data(session: Session) -> Iterator[Session]: from superset.models.slice import Slice engine = session.get_bind() Slice.metadata.create_all(engine) # pylint: disable=no-member slice_obj = Slice( id=1, datasource_id=1, datasource_type=DatasourceType.TABLE, datasource_name="tmp_perm_table", slice_name="slice_name", ) session.add(slice_obj) session.commit() yield session session.rollback()
def decode_dashboards(o: Dict[str, Any]) -> Any: """ Function to be passed into json.loads obj_hook parameter Recreates the dashboard object from a json representation. """ if "__Dashboard__" in o: return Dashboard(**o["__Dashboard__"]) if "__Slice__" in o: return Slice(**o["__Slice__"]) if "__TableColumn__" in o: return TableColumn(**o["__TableColumn__"]) if "__SqlaTable__" in o: return SqlaTable(**o["__SqlaTable__"]) if "__SqlMetric__" in o: return SqlMetric(**o["__SqlMetric__"]) if "__datetime__" in o: return datetime.strptime(o["__datetime__"], "%Y-%m-%dT%H:%M:%S") return o
def create_slice(self): with self.create_app().app_context(): session = db.session dataset = (session.query(SqlaTable).filter_by( table_name="dummy_sql_table").first()) slice = Slice( datasource_id=dataset.id, datasource_type=DatasourceType.TABLE, datasource_name="tmp_perm_table", slice_name="slice_name", ) session.add(slice) session.commit() yield slice # rollback session.delete(slice) session.commit()
def decode_dashboards(o): """ Function to be passed into json.loads obj_hook parameter Recreates the dashboard object from a json representation. """ import superset.models.core as models if "__Dashboard__" in o: return Dashboard(**o["__Dashboard__"]) elif "__Slice__" in o: return Slice(**o["__Slice__"]) elif "__TableColumn__" in o: return TableColumn(**o["__TableColumn__"]) elif "__SqlaTable__" in o: return SqlaTable(**o["__SqlaTable__"]) elif "__SqlMetric__" in o: return SqlMetric(**o["__SqlMetric__"]) elif "__datetime__" in o: return datetime.strptime(o["__datetime__"], "%Y-%m-%dT%H:%M:%S") else: return o
def insert_chart( self, slice_name: str, owners: List[int], datasource_id: int, created_by=None, datasource_type: str = "table", description: Optional[str] = None, viz_type: Optional[str] = None, params: Optional[str] = None, cache_timeout: Optional[int] = None, certified_by: Optional[str] = None, certification_details: Optional[str] = None, ) -> Slice: obj_owners = list() for owner in owners: user = db.session.query(security_manager.user_model).get(owner) obj_owners.append(user) datasource = (db.session.query(SqlaTable).filter_by( id=datasource_id).one_or_none()) slice = Slice( cache_timeout=cache_timeout, certified_by=certified_by, certification_details=certification_details, created_by=created_by, datasource_id=datasource.id, datasource_name=datasource.name, datasource_type=datasource.type, description=description, owners=obj_owners, params=params, slice_name=slice_name, viz_type=viz_type, ) db.session.add(slice) db.session.commit() return slice
def test_area_migrate(app_context: SupersetApp) -> None: from superset.models.slice import Slice slc = Slice( viz_type=MigrateAreaChart.source_viz_type, datasource_type="table", params=area_form_data, query_context=f'{{"form_data": {area_form_data}}}', ) slc = MigrateAreaChart.upgrade_slice(slc) assert slc.viz_type == MigrateAreaChart.target_viz_type # verify form_data new_form_data = json.loads(slc.params) assert new_form_data["contributionMode"] == "row" assert "contribution" not in new_form_data assert new_form_data["show_extra_controls"] is True assert new_form_data["stack"] == "Stack" assert new_form_data["x_axis_title"] == "x asix label" assert new_form_data["x_axis_title_margin"] == 30 assert json.dumps(new_form_data["form_data_bak"], sort_keys=True) == json.dumps( json.loads(area_form_data), sort_keys=True ) # verify query_context new_query_context = json.loads(slc.query_context) assert ( new_query_context["form_data"]["viz_type"] == MigrateAreaChart.target_viz_type ) # downgrade slc = MigrateAreaChart.downgrade_slice(slc) assert slc.viz_type == MigrateAreaChart.source_viz_type assert json.dumps(json.loads(slc.params), sort_keys=True) == json.dumps( json.loads(area_form_data), sort_keys=True )
def load_birth_names(only_metadata: bool = False, force: bool = False, sample: bool = False) -> None: """Loading birth name dataset from a zip file in the repo""" # pylint: disable=too-many-locals tbl_name = "birth_names" database = get_example_database() table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): load_data(tbl_name, database, sample=sample) obj = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not obj: print(f"Creating table [{tbl_name}] reference") obj = TBL(table_name=tbl_name) db.session.add(obj) obj.main_dttm_col = "ds" obj.database = database obj.filter_select_enabled = True if not any(col.column_name == "num_california" for col in obj.columns): col_state = str(column("state").compile(db.engine)) col_num = str(column("num").compile(db.engine)) obj.columns.append( TableColumn( column_name="num_california", expression= f"CASE WHEN {col_state} = 'CA' THEN {col_num} ELSE 0 END", )) if not any(col.metric_name == "sum__num" for col in obj.metrics): col = str(column("num").compile(db.engine)) obj.metrics.append( SqlMetric(metric_name="sum__num", expression=f"SUM({col})")) db.session.commit() obj.fetch_metadata() tbl = obj metrics = [{ "expressionType": "SIMPLE", "column": { "column_name": "num", "type": "BIGINT" }, "aggregate": "SUM", "label": "Births", "optionName": "metric_11", }] metric = "sum__num" defaults = { "compare_lag": "10", "compare_suffix": "o10Y", "limit": "25", "granularity_sqla": "ds", "groupby": [], "row_limit": config["ROW_LIMIT"], "since": "100 years ago", "until": "now", "viz_type": "table", "markup_type": "markdown", } admin = security_manager.find_user("admin") print("Creating some slices") slices = [ Slice( slice_name="Participants", viz_type="big_number", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="big_number", granularity_sqla="ds", compare_lag="5", compare_suffix="over 5Y", metric=metric, ), ), Slice( slice_name="Genders", viz_type="pie", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(defaults, viz_type="pie", groupby=["gender"], metric=metric), ), Slice( slice_name="Trends", viz_type="line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="line", groupby=["name"], granularity_sqla="ds", rich_tooltip=True, show_legend=True, metrics=metrics, ), ), Slice( slice_name="Genders by State", viz_type="dist_bar", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, adhoc_filters=[{ "clause": "WHERE", "expressionType": "SIMPLE", "filterOptionName": "2745eae5", "comparator": ["other"], "operator": "NOT IN", "subject": "state", }], viz_type="dist_bar", metrics=[ { "expressionType": "SIMPLE", "column": { "column_name": "sum_boys", "type": "BIGINT(20)" }, "aggregate": "SUM", "label": "Boys", "optionName": "metric_11", }, { "expressionType": "SIMPLE", "column": { "column_name": "sum_girls", "type": "BIGINT(20)" }, "aggregate": "SUM", "label": "Girls", "optionName": "metric_12", }, ], groupby=["state"], ), ), Slice( slice_name="Girls", viz_type="table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, groupby=["name"], adhoc_filters=[gen_filter("gender", "girl")], row_limit=50, timeseries_limit_metric="sum__num", metrics=metrics, ), ), Slice( slice_name="Girl Name Cloud", viz_type="word_cloud", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="word_cloud", size_from="10", series="name", size_to="70", rotation="square", limit="100", adhoc_filters=[gen_filter("gender", "girl")], metric=metric, ), ), Slice( slice_name="Boys", viz_type="table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, groupby=["name"], adhoc_filters=[gen_filter("gender", "boy")], row_limit=50, metrics=metrics, ), ), Slice( slice_name="Boy Name Cloud", viz_type="word_cloud", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="word_cloud", size_from="10", series="name", size_to="70", rotation="square", limit="100", adhoc_filters=[gen_filter("gender", "boy")], metric=metric, ), ), Slice( slice_name="Top 10 Girl Name Share", viz_type="area", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, adhoc_filters=[gen_filter("gender", "girl")], comparison_type="values", groupby=["name"], limit=10, stacked_style="expand", time_grain_sqla="P1D", viz_type="area", x_axis_forma="smart_date", metrics=metrics, ), ), Slice( slice_name="Top 10 Boy Name Share", viz_type="area", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, adhoc_filters=[gen_filter("gender", "boy")], comparison_type="values", groupby=["name"], limit=10, stacked_style="expand", time_grain_sqla="P1D", viz_type="area", x_axis_forma="smart_date", metrics=metrics, ), ), ] misc_slices = [ Slice( slice_name="Average and Sum Trends", viz_type="dual_line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="dual_line", metric={ "expressionType": "SIMPLE", "column": { "column_name": "num", "type": "BIGINT(20)" }, "aggregate": "AVG", "label": "AVG(num)", "optionName": "metric_vgops097wej_g8uff99zhk7", }, metric_2="sum__num", granularity_sqla="ds", metrics=metrics, ), ), Slice( slice_name="Num Births Trend", viz_type="line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(defaults, viz_type="line", metrics=metrics), ), Slice( slice_name="Daily Totals", viz_type="table", datasource_type="table", datasource_id=tbl.id, created_by=admin, params=get_slice_json( defaults, groupby=["ds"], since="40 years ago", until="now", viz_type="table", metrics=metrics, ), ), Slice( slice_name="Number of California Births", viz_type="big_number_total", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, metric={ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }, viz_type="big_number_total", granularity_sqla="ds", ), ), Slice( slice_name="Top 10 California Names Timeseries", viz_type="line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, metrics=[{ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }], viz_type="line", granularity_sqla="ds", groupby=["name"], timeseries_limit_metric={ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }, limit="10", ), ), Slice( slice_name="Names Sorted by Num in California", viz_type="table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, metrics=metrics, groupby=["name"], row_limit=50, timeseries_limit_metric={ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }, ), ), Slice( slice_name="Number of Girls", viz_type="big_number_total", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, metric=metric, viz_type="big_number_total", granularity_sqla="ds", adhoc_filters=[gen_filter("gender", "girl")], subheader="total female participants", ), ), Slice( slice_name="Pivot Table", viz_type="pivot_table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="pivot_table", groupby=["name"], columns=["state"], metrics=metrics, ), ), ] for slc in slices: merge_slice(slc) for slc in misc_slices: merge_slice(slc) misc_dash_slices.add(slc.slice_name) print("Creating a dashboard") dash = db.session.query(Dashboard).filter_by(slug="births").first() if not dash: dash = Dashboard() db.session.add(dash) dash.published = True dash.json_metadata = textwrap.dedent("""\ { "label_colors": { "Girls": "#FF69B4", "Boys": "#ADD8E6", "girl": "#FF69B4", "boy": "#ADD8E6" } }""") js = textwrap.dedent( # pylint: disable=line-too-long """\ { "CHART-6GdlekVise": { "children": [], "id": "CHART-6GdlekVise", "meta": { "chartId": 5547, "height": 50, "sliceName": "Top 10 Girl Name Share", "width": 5 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-eh0w37bWbR" ], "type": "CHART" }, "CHART-6n9jxb30JG": { "children": [], "id": "CHART-6n9jxb30JG", "meta": { "chartId": 5540, "height": 36, "sliceName": "Genders by State", "width": 5 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW--EyBZQlDi" ], "type": "CHART" }, "CHART-Jj9qh1ol-N": { "children": [], "id": "CHART-Jj9qh1ol-N", "meta": { "chartId": 5545, "height": 50, "sliceName": "Boy Name Cloud", "width": 4 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-kzWtcvo8R1" ], "type": "CHART" }, "CHART-ODvantb_bF": { "children": [], "id": "CHART-ODvantb_bF", "meta": { "chartId": 5548, "height": 50, "sliceName": "Top 10 Boy Name Share", "width": 5 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-kzWtcvo8R1" ], "type": "CHART" }, "CHART-PAXUUqwmX9": { "children": [], "id": "CHART-PAXUUqwmX9", "meta": { "chartId": 5538, "height": 34, "sliceName": "Genders", "width": 3 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-2n0XgiHDgs" ], "type": "CHART" }, "CHART-_T6n_K9iQN": { "children": [], "id": "CHART-_T6n_K9iQN", "meta": { "chartId": 5539, "height": 36, "sliceName": "Trends", "width": 7 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW--EyBZQlDi" ], "type": "CHART" }, "CHART-eNY0tcE_ic": { "children": [], "id": "CHART-eNY0tcE_ic", "meta": { "chartId": 5537, "height": 34, "sliceName": "Participants", "width": 3 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-2n0XgiHDgs" ], "type": "CHART" }, "CHART-g075mMgyYb": { "children": [], "id": "CHART-g075mMgyYb", "meta": { "chartId": 5541, "height": 50, "sliceName": "Girls", "width": 3 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-eh0w37bWbR" ], "type": "CHART" }, "CHART-n-zGGE6S1y": { "children": [], "id": "CHART-n-zGGE6S1y", "meta": { "chartId": 5542, "height": 50, "sliceName": "Girl Name Cloud", "width": 4 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-eh0w37bWbR" ], "type": "CHART" }, "CHART-vJIPjmcbD3": { "children": [], "id": "CHART-vJIPjmcbD3", "meta": { "chartId": 5543, "height": 50, "sliceName": "Boys", "width": 3 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-kzWtcvo8R1" ], "type": "CHART" }, "DASHBOARD_VERSION_KEY": "v2", "GRID_ID": { "children": [ "ROW-2n0XgiHDgs", "ROW--EyBZQlDi", "ROW-eh0w37bWbR", "ROW-kzWtcvo8R1" ], "id": "GRID_ID", "parents": [ "ROOT_ID" ], "type": "GRID" }, "HEADER_ID": { "id": "HEADER_ID", "meta": { "text": "Births" }, "type": "HEADER" }, "MARKDOWN-zaflB60tbC": { "children": [], "id": "MARKDOWN-zaflB60tbC", "meta": { "code": "<div style=\\"text-align:center\\"> <h1>Birth Names Dashboard</h1> <img src=\\"/static/assets/images/babies.png\\" style=\\"width:50%;\\"></div>", "height": 34, "width": 6 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-2n0XgiHDgs" ], "type": "MARKDOWN" }, "ROOT_ID": { "children": [ "GRID_ID" ], "id": "ROOT_ID", "type": "ROOT" }, "ROW--EyBZQlDi": { "children": [ "CHART-_T6n_K9iQN", "CHART-6n9jxb30JG" ], "id": "ROW--EyBZQlDi", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "parents": [ "ROOT_ID", "GRID_ID" ], "type": "ROW" }, "ROW-2n0XgiHDgs": { "children": [ "CHART-eNY0tcE_ic", "MARKDOWN-zaflB60tbC", "CHART-PAXUUqwmX9" ], "id": "ROW-2n0XgiHDgs", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "parents": [ "ROOT_ID", "GRID_ID" ], "type": "ROW" }, "ROW-eh0w37bWbR": { "children": [ "CHART-g075mMgyYb", "CHART-n-zGGE6S1y", "CHART-6GdlekVise" ], "id": "ROW-eh0w37bWbR", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "parents": [ "ROOT_ID", "GRID_ID" ], "type": "ROW" }, "ROW-kzWtcvo8R1": { "children": [ "CHART-vJIPjmcbD3", "CHART-Jj9qh1ol-N", "CHART-ODvantb_bF" ], "id": "ROW-kzWtcvo8R1", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "parents": [ "ROOT_ID", "GRID_ID" ], "type": "ROW" } } """ # pylint: enable=line-too-long ) pos = json.loads(js) # dashboard v2 doesn't allow add markup slice dash.slices = [slc for slc in slices if slc.viz_type != "markup"] update_slice_ids(pos, dash.slices) dash.dashboard_title = "USA Births Names" dash.position_json = json.dumps(pos, indent=4) dash.slug = "births" db.session.commit()
def load_country_map_data(only_metadata: bool = False, force: bool = False) -> None: """Loading data for map with country map""" tbl_name = "birth_france_by_region" database = utils.get_example_database() table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): csv_bytes = get_example_data("birth_france_data_for_country_map.csv", make_bytes=False) data = pd.read_csv(csv_bytes, encoding="utf-8") data["dttm"] = datetime.datetime.now().date() data.to_sql( # pylint: disable=no-member tbl_name, database.get_sqla_engine(), if_exists="replace", chunksize=500, dtype={ "DEPT_ID": String(10), "2003": BigInteger, "2004": BigInteger, "2005": BigInteger, "2006": BigInteger, "2007": BigInteger, "2008": BigInteger, "2009": BigInteger, "2010": BigInteger, "2011": BigInteger, "2012": BigInteger, "2013": BigInteger, "2014": BigInteger, "dttm": Date(), }, index=False, ) print("Done loading table!") print("-" * 80) print("Creating table reference") obj = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not obj: obj = TBL(table_name=tbl_name) obj.main_dttm_col = "dttm" obj.database = database if not any(col.metric_name == "avg__2004" for col in obj.metrics): col = str(column("2004").compile(db.engine)) obj.metrics.append( SqlMetric(metric_name="avg__2004", expression=f"AVG({col})")) db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj slice_data = { "granularity_sqla": "", "since": "", "until": "", "viz_type": "country_map", "entity": "DEPT_ID", "metric": { "expressionType": "SIMPLE", "column": { "type": "INT", "column_name": "2004" }, "aggregate": "AVG", "label": "Boys", "optionName": "metric_112342", }, "row_limit": 500000, } print("Creating a slice") slc = Slice( slice_name="Birth in France by department in 2016", viz_type="country_map", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(slice_data), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc)
def load_unicode_test_data(only_metadata: bool = False, force: bool = False, sample: bool = False) -> None: """Loading unicode test dataset from a csv file in the repo""" tbl_name = "unicode_test" database = utils.get_example_database() table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): data = get_example_data("unicode_utf8_unixnl_test.csv", is_gzip=False, make_bytes=True) df = pd.read_csv(data, encoding="utf-8") # generate date/numeric data df["dttm"] = datetime.datetime.now().date() df["value"] = [random.randint(1, 100) for _ in range(len(df))] df = df.head(100) if sample else df df.to_sql( # pylint: disable=no-member tbl_name, database.get_sqla_engine(), if_exists="replace", chunksize=500, dtype={ "phrase": String(500), "short_phrase": String(10), "with_missing": String(100), "dttm": Date(), "value": Float(), }, index=False, ) print("Done loading table!") print("-" * 80) print("Creating table [unicode_test] reference") obj = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not obj: obj = TBL(table_name=tbl_name) obj.main_dttm_col = "dttm" obj.database = database db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj slice_data = { "granularity_sqla": "dttm", "groupby": [], "metric": { "aggregate": "SUM", "column": { "column_name": "value" }, "expressionType": "SIMPLE", "label": "Value", }, "row_limit": config["ROW_LIMIT"], "since": "100 years ago", "until": "now", "viz_type": "word_cloud", "size_from": "10", "series": "short_phrase", "size_to": "70", "rotation": "square", "limit": "100", } print("Creating a slice") slc = Slice( slice_name="Unicode Cloud", viz_type="word_cloud", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(slice_data), ) merge_slice(slc) print("Creating a dashboard") dash = db.session.query(Dashboard).filter_by(slug="unicode-test").first() if not dash: dash = Dashboard() js = """\ { "CHART-Hkx6154FEm": { "children": [], "id": "CHART-Hkx6154FEm", "meta": { "chartId": 2225, "height": 30, "sliceName": "slice 1", "width": 4 }, "type": "CHART" }, "GRID_ID": { "children": [ "ROW-SyT19EFEQ" ], "id": "GRID_ID", "type": "GRID" }, "ROOT_ID": { "children": [ "GRID_ID" ], "id": "ROOT_ID", "type": "ROOT" }, "ROW-SyT19EFEQ": { "children": [ "CHART-Hkx6154FEm" ], "id": "ROW-SyT19EFEQ", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "DASHBOARD_VERSION_KEY": "v2" } """ dash.dashboard_title = "Unicode Test" pos = json.loads(js) update_slice_ids(pos, [slc]) dash.position_json = json.dumps(pos, indent=4) dash.slug = "unicode-test" dash.slices = [slc] db.session.merge(dash) db.session.commit()
def load_deck_dash() -> None: # pylint: disable=too-many-statements print("Loading deck.gl dashboard") slices = [] table = get_table_connector_registry() tbl = db.session.query(table).filter_by(table_name="long_lat").first() slice_data = { "spatial": { "type": "latlong", "lonCol": "LON", "latCol": "LAT" }, "color_picker": COLOR_RED, "datasource": "5__table", "granularity_sqla": None, "groupby": [], "mapbox_style": "mapbox://styles/mapbox/light-v9", "multiplier": 10, "point_radius_fixed": { "type": "metric", "value": "count" }, "point_unit": "square_m", "min_radius": 1, "max_radius": 250, "row_limit": 5000, "time_range": " : ", "size": "count", "time_grain_sqla": None, "viewport": { "bearing": -4.952916738791771, "latitude": 37.78926922909199, "longitude": -122.42613341901688, "pitch": 4.750411100577438, "zoom": 12.729132798697304, }, "viz_type": "deck_scatter", } print("Creating Scatterplot slice") slc = Slice( slice_name="Deck.gl Scatterplot", viz_type="deck_scatter", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(slice_data), ) merge_slice(slc) slices.append(slc) slice_data = { "point_unit": "square_m", "row_limit": 5000, "spatial": { "type": "latlong", "lonCol": "LON", "latCol": "LAT" }, "mapbox_style": "mapbox://styles/mapbox/dark-v9", "granularity_sqla": None, "size": "count", "viz_type": "deck_screengrid", "time_range": "No filter", "point_radius": "Auto", "color_picker": { "a": 1, "r": 14, "b": 0, "g": 255 }, "grid_size": 20, "viewport": { "zoom": 14.161641703941438, "longitude": -122.41827069521386, "bearing": -4.952916738791771, "latitude": 37.76024135844065, "pitch": 4.750411100577438, }, "point_radius_fixed": { "type": "fix", "value": 2000 }, "datasource": "5__table", "time_grain_sqla": None, "groupby": [], } print("Creating Screen Grid slice") slc = Slice( slice_name="Deck.gl Screen grid", viz_type="deck_screengrid", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(slice_data), ) merge_slice(slc) slices.append(slc) slice_data = { "spatial": { "type": "latlong", "lonCol": "LON", "latCol": "LAT" }, "row_limit": 5000, "mapbox_style": "mapbox://styles/mapbox/streets-v9", "granularity_sqla": None, "size": "count", "viz_type": "deck_hex", "time_range": "No filter", "point_radius_unit": "Pixels", "point_radius": "Auto", "color_picker": { "a": 1, "r": 14, "b": 0, "g": 255 }, "grid_size": 40, "extruded": True, "viewport": { "latitude": 37.789795085160335, "pitch": 54.08961642447763, "zoom": 13.835465702403654, "longitude": -122.40632230075536, "bearing": -2.3984797349335167, }, "point_radius_fixed": { "type": "fix", "value": 2000 }, "datasource": "5__table", "time_grain_sqla": None, "groupby": [], } print("Creating Hex slice") slc = Slice( slice_name="Deck.gl Hexagons", viz_type="deck_hex", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(slice_data), ) merge_slice(slc) slices.append(slc) slice_data = { "autozoom": False, "spatial": { "type": "latlong", "lonCol": "LON", "latCol": "LAT" }, "row_limit": 5000, "mapbox_style": "mapbox://styles/mapbox/satellite-streets-v9", "granularity_sqla": None, "size": "count", "viz_type": "deck_grid", "point_radius_unit": "Pixels", "point_radius": "Auto", "time_range": "No filter", "color_picker": { "a": 1, "r": 14, "b": 0, "g": 255 }, "grid_size": 120, "extruded": True, "viewport": { "longitude": -122.42066918995666, "bearing": 155.80099696026355, "zoom": 12.699690845482069, "latitude": 37.7942314882596, "pitch": 53.470800300695146, }, "point_radius_fixed": { "type": "fix", "value": 2000 }, "datasource": "5__table", "time_grain_sqla": None, "groupby": [], } print("Creating Grid slice") slc = Slice( slice_name="Deck.gl Grid", viz_type="deck_grid", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(slice_data), ) merge_slice(slc) slices.append(slc) polygon_tbl = (db.session.query(table).filter_by( table_name="sf_population_polygons").first()) slice_data = { "datasource": "11__table", "viz_type": "deck_polygon", "slice_id": 41, "granularity_sqla": None, "time_grain_sqla": None, "time_range": " : ", "line_column": "contour", "metric": { "aggregate": "SUM", "column": { "column_name": "population", "description": None, "expression": None, "filterable": True, "groupby": True, "id": 1332, "is_dttm": False, "optionName": "_col_population", "python_date_format": None, "type": "BIGINT", "verbose_name": None, }, "expressionType": "SIMPLE", "hasCustomLabel": True, "label": "Population", "optionName": "metric_t2v4qbfiz1_w6qgpx4h2p", "sqlExpression": None, }, "line_type": "json", "linear_color_scheme": "oranges", "mapbox_style": "mapbox://styles/mapbox/light-v9", "viewport": { "longitude": -122.43388541747726, "latitude": 37.752020331384834, "zoom": 11.133995608594631, "bearing": 37.89506450385642, "pitch": 60, "width": 667, "height": 906, "altitude": 1.5, "maxZoom": 20, "minZoom": 0, "maxPitch": 60, "minPitch": 0, "maxLatitude": 85.05113, "minLatitude": -85.05113, }, "reverse_long_lat": False, "fill_color_picker": { "r": 3, "g": 65, "b": 73, "a": 1 }, "stroke_color_picker": { "r": 0, "g": 122, "b": 135, "a": 1 }, "filled": True, "stroked": False, "extruded": True, "multiplier": 0.1, "point_radius_fixed": { "type": "metric", "value": { "aggregate": None, "column": None, "expressionType": "SQL", "hasCustomLabel": None, "label": "Density", "optionName": "metric_c5rvwrzoo86_293h6yrv2ic", "sqlExpression": "SUM(population)/SUM(area)", }, }, "js_columns": [], "js_data_mutator": "", "js_tooltip": "", "js_onclick_href": "", "legend_format": ".1s", "legend_position": "tr", } print("Creating Polygon slice") slc = Slice( slice_name="Deck.gl Polygons", viz_type="deck_polygon", datasource_type="table", datasource_id=polygon_tbl.id, params=get_slice_json(slice_data), ) merge_slice(slc) slices.append(slc) slice_data = { "datasource": "10__table", "viz_type": "deck_arc", "slice_id": 42, "granularity_sqla": None, "time_grain_sqla": None, "time_range": " : ", "start_spatial": { "type": "latlong", "latCol": "LATITUDE", "lonCol": "LONGITUDE", }, "end_spatial": { "type": "latlong", "latCol": "LATITUDE_DEST", "lonCol": "LONGITUDE_DEST", }, "row_limit": 5000, "mapbox_style": "mapbox://styles/mapbox/light-v9", "viewport": { "altitude": 1.5, "bearing": 8.546256357301871, "height": 642, "latitude": 44.596651438714254, "longitude": -91.84340711201104, "maxLatitude": 85.05113, "maxPitch": 60, "maxZoom": 20, "minLatitude": -85.05113, "minPitch": 0, "minZoom": 0, "pitch": 60, "width": 997, "zoom": 2.929837070560775, }, "color_picker": { "r": 0, "g": 122, "b": 135, "a": 1 }, "stroke_width": 1, } print("Creating Arc slice") slc = Slice( slice_name="Deck.gl Arcs", viz_type="deck_arc", datasource_type="table", datasource_id=db.session.query(table).filter_by( table_name="flights").first().id, params=get_slice_json(slice_data), ) merge_slice(slc) slices.append(slc) slice_data = { "datasource": "12__table", "slice_id": 43, "viz_type": "deck_path", "time_grain_sqla": None, "time_range": " : ", "line_column": "path_json", "line_type": "json", "row_limit": 5000, "mapbox_style": "mapbox://styles/mapbox/light-v9", "viewport": { "longitude": -122.18885402582598, "latitude": 37.73671752604488, "zoom": 9.51847667620428, "bearing": 0, "pitch": 0, "width": 669, "height": 1094, "altitude": 1.5, "maxZoom": 20, "minZoom": 0, "maxPitch": 60, "minPitch": 0, "maxLatitude": 85.05113, "minLatitude": -85.05113, }, "color_picker": { "r": 0, "g": 122, "b": 135, "a": 1 }, "line_width": 150, "reverse_long_lat": False, "js_columns": ["color"], "js_data_mutator": "data => data.map(d => ({\n" " ...d,\n" " color: colors.hexToRGB(d.extraProps.color)\n" "}));", "js_tooltip": "", "js_onclick_href": "", } print("Creating Path slice") slc = Slice( slice_name="Deck.gl Path", viz_type="deck_path", datasource_type="table", datasource_id=db.session.query(table).filter_by( table_name="bart_lines").first().id, params=get_slice_json(slice_data), ) merge_slice(slc) slices.append(slc) slug = "deck" print("Creating a dashboard") title = "deck.gl Demo" dash = db.session.query(Dashboard).filter_by(slug=slug).first() if not dash: dash = Dashboard() dash.published = True js = POSITION_JSON pos = json.loads(js) slices = update_slice_ids(pos) dash.position_json = json.dumps(pos, indent=4) dash.dashboard_title = title dash.slug = slug dash.slices = slices db.session.merge(dash) db.session.commit()
def create_slices(tbl: BaseDatasource, admin_owner: bool) -> Tuple[List[Slice], List[Slice]]: metrics = [{ "expressionType": "SIMPLE", "column": { "column_name": "num", "type": "BIGINT" }, "aggregate": "SUM", "label": "Births", "optionName": "metric_11", }] metric = "sum__num" defaults = { "compare_lag": "10", "compare_suffix": "o10Y", "limit": "25", "time_range": "No filter", "time_range_endpoints": ["inclusive", "exclusive"], "granularity_sqla": "ds", "groupby": [], "row_limit": config["ROW_LIMIT"], "since": "100 years ago", "until": "now", "viz_type": "table", "markup_type": "markdown", } if admin_owner: slice_props = dict( datasource_id=tbl.id, datasource_type="table", owners=[admin], created_by=admin, ) else: slice_props = dict(datasource_id=tbl.id, datasource_type="table", owners=[], created_by=admin) print("Creating some slices") slices = [ Slice( **slice_props, slice_name="Participants", viz_type="big_number", params=get_slice_json( defaults, viz_type="big_number", granularity_sqla="ds", compare_lag="5", compare_suffix="over 5Y", metric=metric, ), ), Slice( **slice_props, slice_name="Genders", viz_type="pie", params=get_slice_json(defaults, viz_type="pie", groupby=["gender"], metric=metric), ), Slice( **slice_props, slice_name="Trends", viz_type="line", params=get_slice_json( defaults, viz_type="line", groupby=["name"], granularity_sqla="ds", rich_tooltip=True, show_legend=True, metrics=metrics, ), ), Slice( **slice_props, slice_name="Genders by State", viz_type="dist_bar", params=get_slice_json( defaults, adhoc_filters=[{ "clause": "WHERE", "expressionType": "SIMPLE", "filterOptionName": "2745eae5", "comparator": ["other"], "operator": "NOT IN", "subject": "state", }], viz_type="dist_bar", metrics=[ { "expressionType": "SIMPLE", "column": { "column_name": "num_boys", "type": "BIGINT(20)" }, "aggregate": "SUM", "label": "Boys", "optionName": "metric_11", }, { "expressionType": "SIMPLE", "column": { "column_name": "num_girls", "type": "BIGINT(20)" }, "aggregate": "SUM", "label": "Girls", "optionName": "metric_12", }, ], groupby=["state"], ), ), Slice( **slice_props, slice_name="Girls", viz_type="table", params=get_slice_json( defaults, groupby=["name"], adhoc_filters=[gen_filter("gender", "girl")], row_limit=50, timeseries_limit_metric="sum__num", metrics=metrics, ), ), Slice( **slice_props, slice_name="Girl Name Cloud", viz_type="word_cloud", params=get_slice_json( defaults, viz_type="word_cloud", size_from="10", series="name", size_to="70", rotation="square", limit="100", adhoc_filters=[gen_filter("gender", "girl")], metric=metric, ), ), Slice( **slice_props, slice_name="Boys", viz_type="table", params=get_slice_json( defaults, groupby=["name"], adhoc_filters=[gen_filter("gender", "boy")], row_limit=50, metrics=metrics, ), ), Slice( **slice_props, slice_name="Boy Name Cloud", viz_type="word_cloud", params=get_slice_json( defaults, viz_type="word_cloud", size_from="10", series="name", size_to="70", rotation="square", limit="100", adhoc_filters=[gen_filter("gender", "boy")], metric=metric, ), ), Slice( **slice_props, slice_name="Top 10 Girl Name Share", viz_type="area", params=get_slice_json( defaults, adhoc_filters=[gen_filter("gender", "girl")], comparison_type="values", groupby=["name"], limit=10, stacked_style="expand", time_grain_sqla="P1D", viz_type="area", x_axis_forma="smart_date", metrics=metrics, ), ), Slice( **slice_props, slice_name="Top 10 Boy Name Share", viz_type="area", params=get_slice_json( defaults, adhoc_filters=[gen_filter("gender", "boy")], comparison_type="values", groupby=["name"], limit=10, stacked_style="expand", time_grain_sqla="P1D", viz_type="area", x_axis_forma="smart_date", metrics=metrics, ), ), ] misc_slices = [ Slice( **slice_props, slice_name="Average and Sum Trends", viz_type="dual_line", params=get_slice_json( defaults, viz_type="dual_line", metric={ "expressionType": "SIMPLE", "column": { "column_name": "num", "type": "BIGINT(20)" }, "aggregate": "AVG", "label": "AVG(num)", "optionName": "metric_vgops097wej_g8uff99zhk7", }, metric_2="sum__num", granularity_sqla="ds", metrics=metrics, ), ), Slice( **slice_props, slice_name="Num Births Trend", viz_type="line", params=get_slice_json(defaults, viz_type="line", metrics=metrics), ), Slice( **slice_props, slice_name="Daily Totals", viz_type="table", params=get_slice_json( defaults, groupby=["ds"], since="40 years ago", until="now", viz_type="table", metrics=metrics, ), ), Slice( **slice_props, slice_name="Number of California Births", viz_type="big_number_total", params=get_slice_json( defaults, metric={ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }, viz_type="big_number_total", granularity_sqla="ds", ), ), Slice( **slice_props, slice_name="Top 10 California Names Timeseries", viz_type="line", params=get_slice_json( defaults, metrics=[{ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }], viz_type="line", granularity_sqla="ds", groupby=["name"], timeseries_limit_metric={ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }, limit="10", ), ), Slice( **slice_props, slice_name="Names Sorted by Num in California", viz_type="table", params=get_slice_json( defaults, metrics=metrics, groupby=["name"], row_limit=50, timeseries_limit_metric={ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }, ), ), Slice( **slice_props, slice_name="Number of Girls", viz_type="big_number_total", params=get_slice_json( defaults, metric=metric, viz_type="big_number_total", granularity_sqla="ds", adhoc_filters=[gen_filter("gender", "girl")], subheader="total female participants", ), ), Slice( **slice_props, slice_name="Pivot Table", viz_type="pivot_table", params=get_slice_json( defaults, viz_type="pivot_table", groupby=["name"], columns=["state"], metrics=metrics, ), ), ] for slc in slices: merge_slice(slc) for slc in misc_slices: merge_slice(slc) misc_dash_slices.add(slc.slice_name) return slices, misc_slices
def load_long_lat_data(only_metadata: bool = False, force: bool = False) -> None: """Loading lat/long data from a csv file in the repo""" tbl_name = "long_lat" database = utils.get_example_database() table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): data = get_example_data("san_francisco.csv", make_bytes=False) pdf = pd.read_csv(data, encoding="utf-8") start = datetime.datetime.now().replace( hour=0, minute=0, second=0, microsecond=0 ) pdf["datetime"] = [ start + datetime.timedelta(hours=i * 24 / (len(pdf) - 1)) for i in range(len(pdf)) ] pdf["occupancy"] = [random.randint(1, 6) for _ in range(len(pdf))] pdf["radius_miles"] = [random.uniform(1, 3) for _ in range(len(pdf))] pdf["geohash"] = pdf[["LAT", "LON"]].apply( lambda x: geohash.encode(*x), axis=1) pdf["delimited"] = pdf["LAT"].map( str).str.cat(pdf["LON"].map(str), sep=",") pdf.to_sql( # pylint: disable=no-member tbl_name, database.get_sqla_engine(), if_exists="replace", chunksize=500, dtype={ "longitude": Float(), "latitude": Float(), "number": Float(), "street": String(100), "unit": String(10), "city": String(50), "district": String(50), "region": String(50), "postcode": Float(), "id": String(100), "datetime": DateTime(), "occupancy": Float(), "radius_miles": Float(), "geohash": String(12), "delimited": String(60), }, index=False, ) print("Done loading table!") print("-" * 80) print("Creating table reference") obj = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not obj: obj = TBL(table_name=tbl_name) obj.main_dttm_col = "datetime" obj.database = database db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj slice_data = { "granularity_sqla": "day", "since": "2014-01-01", "until": "now", "viz_type": "mapbox", "all_columns_x": "LON", "all_columns_y": "LAT", "mapbox_style": "mapbox://styles/mapbox/light-v9", "all_columns": ["occupancy"], "row_limit": 500000, } print("Creating a slice") slc = Slice( slice_name="Mapbox Long/Lat", viz_type="mapbox", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(slice_data), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc)
def load_world_bank_health_n_pop(only_metadata=False, force=False): # pylint: disable=too-many-locals """Loads the world bank health dataset, slices and a dashboard""" tbl_name = "wb_health_population" database = utils.get_example_database() table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): data = get_example_data("countries.json.gz") pdf = pd.read_json(data) pdf.columns = [col.replace(".", "_") for col in pdf.columns] pdf.year = pd.to_datetime(pdf.year) pdf.to_sql( tbl_name, database.get_sqla_engine(), if_exists="replace", chunksize=50, dtype={ "year": DateTime(), "country_code": String(3), "country_name": String(255), "region": String(255), }, index=False, ) print("Creating table [wb_health_population] reference") tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not tbl: tbl = TBL(table_name=tbl_name) tbl.description = utils.readfile( os.path.join(EXAMPLES_FOLDER, "countries.md")) tbl.main_dttm_col = "year" tbl.database = database tbl.filter_select_enabled = True metrics = [ "sum__SP_POP_TOTL", "sum__SH_DYN_AIDS", "sum__SH_DYN_AIDS", "sum__SP_RUR_TOTL_ZS", "sum__SP_DYN_LE00_IN", "sum__SP_RUR_TOTL", ] for metric in metrics: if not any(col.metric_name == metric for col in tbl.metrics): aggr_func = metric[:3] col = str(column(metric[5:]).compile(db.engine)) tbl.metrics.append( SqlMetric(metric_name=metric, expression=f"{aggr_func}({col})")) db.session.merge(tbl) db.session.commit() tbl.fetch_metadata() defaults = { "compare_lag": "10", "compare_suffix": "o10Y", "limit": "25", "granularity_sqla": "year", "groupby": [], "metric": "sum__SP_POP_TOTL", "metrics": ["sum__SP_POP_TOTL"], "row_limit": config["ROW_LIMIT"], "since": "2014-01-01", "until": "2014-01-02", "time_range": "2014-01-01 : 2014-01-02", "markup_type": "markdown", "country_fieldtype": "cca3", "secondary_metric": { "aggregate": "SUM", "column": { "column_name": "SP_RUR_TOTL", "optionName": "_col_SP_RUR_TOTL", "type": "DOUBLE", }, "expressionType": "SIMPLE", "hasCustomLabel": True, "label": "Rural Population", }, "entity": "country_code", "show_bubbles": True, } print("Creating slices") slices = [ Slice( slice_name="Region Filter", viz_type="filter_box", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="filter_box", date_filter=False, filter_configs=[ { "asc": False, "clearable": True, "column": "region", "key": "2s98dfu", "metric": "sum__SP_POP_TOTL", "multiple": True, }, { "asc": False, "clearable": True, "key": "li3j2lk", "column": "country_name", "metric": "sum__SP_POP_TOTL", "multiple": True, }, ], ), ), Slice( slice_name="World's Population", viz_type="big_number", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, since="2000", viz_type="big_number", compare_lag="10", metric="sum__SP_POP_TOTL", compare_suffix="over 10Y", ), ), Slice( slice_name="Most Populated Countries", viz_type="table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="table", metrics=["sum__SP_POP_TOTL"], groupby=["country_name"], ), ), Slice( slice_name="Growth Rate", viz_type="line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="line", since="1960-01-01", metrics=["sum__SP_POP_TOTL"], num_period_compare="10", groupby=["country_name"], ), ), Slice( slice_name="% Rural", viz_type="world_map", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="world_map", metric="sum__SP_RUR_TOTL_ZS", num_period_compare="10", ), ), Slice( slice_name="Life Expectancy VS Rural %", viz_type="bubble", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="bubble", since="2011-01-01", until="2011-01-02", series="region", limit=0, entity="country_name", x="sum__SP_RUR_TOTL_ZS", y="sum__SP_DYN_LE00_IN", size="sum__SP_POP_TOTL", max_bubble_size="50", adhoc_filters=[{ "clause": "WHERE", "expressionType": "SIMPLE", "filterOptionName": "2745eae5", "comparator": [ "TCA", "MNP", "DMA", "MHL", "MCO", "SXM", "CYM", "TUV", "IMY", "KNA", "ASM", "ADO", "AMA", "PLW", ], "operator": "not in", "subject": "country_code", }], ), ), Slice( slice_name="Rural Breakdown", viz_type="sunburst", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="sunburst", groupby=["region", "country_name"], since="2011-01-01", until="2011-01-01", ), ), Slice( slice_name="World's Pop Growth", viz_type="area", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, since="1960-01-01", until="now", viz_type="area", groupby=["region"], ), ), Slice( slice_name="Box plot", viz_type="box_plot", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, since="1960-01-01", until="now", whisker_options="Min/max (no outliers)", x_ticks_layout="staggered", viz_type="box_plot", groupby=["region"], ), ), Slice( slice_name="Treemap", viz_type="treemap", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, since="1960-01-01", until="now", viz_type="treemap", metrics=["sum__SP_POP_TOTL"], groupby=["region", "country_code"], ), ), Slice( slice_name="Parallel Coordinates", viz_type="para", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, since="2011-01-01", until="2011-01-01", viz_type="para", limit=100, metrics=[ "sum__SP_POP_TOTL", "sum__SP_RUR_TOTL_ZS", "sum__SH_DYN_AIDS" ], secondary_metric="sum__SP_POP_TOTL", series="country_name", ), ), ] misc_dash_slices.add(slices[-1].slice_name) for slc in slices: merge_slice(slc) print("Creating a World's Health Bank dashboard") dash_name = "World Bank's Data" slug = "world_health" dash = db.session.query(Dashboard).filter_by(slug=slug).first() if not dash: dash = Dashboard() dash.published = True js = textwrap.dedent("""\ { "CHART-36bfc934": { "children": [], "id": "CHART-36bfc934", "meta": { "chartId": 40, "height": 25, "sliceName": "Region Filter", "width": 2 }, "type": "CHART" }, "CHART-37982887": { "children": [], "id": "CHART-37982887", "meta": { "chartId": 41, "height": 25, "sliceName": "World's Population", "width": 2 }, "type": "CHART" }, "CHART-17e0f8d8": { "children": [], "id": "CHART-17e0f8d8", "meta": { "chartId": 42, "height": 92, "sliceName": "Most Populated Countries", "width": 3 }, "type": "CHART" }, "CHART-2ee52f30": { "children": [], "id": "CHART-2ee52f30", "meta": { "chartId": 43, "height": 38, "sliceName": "Growth Rate", "width": 6 }, "type": "CHART" }, "CHART-2d5b6871": { "children": [], "id": "CHART-2d5b6871", "meta": { "chartId": 44, "height": 52, "sliceName": "% Rural", "width": 7 }, "type": "CHART" }, "CHART-0fd0d252": { "children": [], "id": "CHART-0fd0d252", "meta": { "chartId": 45, "height": 50, "sliceName": "Life Expectancy VS Rural %", "width": 8 }, "type": "CHART" }, "CHART-97f4cb48": { "children": [], "id": "CHART-97f4cb48", "meta": { "chartId": 46, "height": 38, "sliceName": "Rural Breakdown", "width": 3 }, "type": "CHART" }, "CHART-b5e05d6f": { "children": [], "id": "CHART-b5e05d6f", "meta": { "chartId": 47, "height": 50, "sliceName": "World's Pop Growth", "width": 4 }, "type": "CHART" }, "CHART-e76e9f5f": { "children": [], "id": "CHART-e76e9f5f", "meta": { "chartId": 48, "height": 50, "sliceName": "Box plot", "width": 4 }, "type": "CHART" }, "CHART-a4808bba": { "children": [], "id": "CHART-a4808bba", "meta": { "chartId": 49, "height": 50, "sliceName": "Treemap", "width": 8 }, "type": "CHART" }, "COLUMN-071bbbad": { "children": [ "ROW-1e064e3c", "ROW-afdefba9" ], "id": "COLUMN-071bbbad", "meta": { "background": "BACKGROUND_TRANSPARENT", "width": 9 }, "type": "COLUMN" }, "COLUMN-fe3914b8": { "children": [ "CHART-36bfc934", "CHART-37982887" ], "id": "COLUMN-fe3914b8", "meta": { "background": "BACKGROUND_TRANSPARENT", "width": 2 }, "type": "COLUMN" }, "GRID_ID": { "children": [ "ROW-46632bc2", "ROW-3fa26c5d", "ROW-812b3f13" ], "id": "GRID_ID", "type": "GRID" }, "HEADER_ID": { "id": "HEADER_ID", "meta": { "text": "World's Bank Data" }, "type": "HEADER" }, "ROOT_ID": { "children": [ "GRID_ID" ], "id": "ROOT_ID", "type": "ROOT" }, "ROW-1e064e3c": { "children": [ "COLUMN-fe3914b8", "CHART-2d5b6871" ], "id": "ROW-1e064e3c", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "ROW-3fa26c5d": { "children": [ "CHART-b5e05d6f", "CHART-0fd0d252" ], "id": "ROW-3fa26c5d", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "ROW-46632bc2": { "children": [ "COLUMN-071bbbad", "CHART-17e0f8d8" ], "id": "ROW-46632bc2", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "ROW-812b3f13": { "children": [ "CHART-a4808bba", "CHART-e76e9f5f" ], "id": "ROW-812b3f13", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "ROW-afdefba9": { "children": [ "CHART-2ee52f30", "CHART-97f4cb48" ], "id": "ROW-afdefba9", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "DASHBOARD_VERSION_KEY": "v2" } """) pos = json.loads(js) update_slice_ids(pos, slices) dash.dashboard_title = dash_name dash.position_json = json.dumps(pos, indent=4) dash.slug = slug dash.slices = slices[:-1] db.session.merge(dash) db.session.commit()
def load_energy(only_metadata: bool = False, force: bool = False, sample: bool = False) -> None: """Loads an energy related dataset to use with sankey and graphs""" tbl_name = "energy_usage" database = utils.get_example_database() table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): data = get_example_data("energy.json.gz") pdf = pd.read_json(data) pdf = pdf.head(100) if sample else pdf pdf.to_sql( tbl_name, database.get_sqla_engine(), if_exists="replace", chunksize=500, dtype={ "source": String(255), "target": String(255), "value": Float() }, index=False, method="multi", ) print("Creating table [wb_health_population] reference") table = get_table_connector_registry() tbl = db.session.query(table).filter_by(table_name=tbl_name).first() if not tbl: tbl = table(table_name=tbl_name) tbl.description = "Energy consumption" tbl.database = database tbl.filter_select_enabled = True if not any(col.metric_name == "sum__value" for col in tbl.metrics): col = str(column("value").compile(db.engine)) tbl.metrics.append( SqlMetric(metric_name="sum__value", expression=f"SUM({col})")) db.session.merge(tbl) db.session.commit() tbl.fetch_metadata() slc = Slice( slice_name="Energy Sankey", viz_type="sankey", datasource_type="table", datasource_id=tbl.id, params=textwrap.dedent("""\ { "collapsed_fieldsets": "", "groupby": [ "source", "target" ], "metric": "sum__value", "row_limit": "5000", "slice_name": "Energy Sankey", "viz_type": "sankey" } """), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc) slc = Slice( slice_name="Energy Force Layout", viz_type="graph_chart", datasource_type="table", datasource_id=tbl.id, params=textwrap.dedent("""\ { "source": "source", "target": "target", "edgeLength": 400, "repulsion": 1000, "layout": "force", "metric": "sum__value", "row_limit": "5000", "slice_name": "Force", "viz_type": "graph_chart" } """), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc) slc = Slice( slice_name="Heatmap", viz_type="heatmap", datasource_type="table", datasource_id=tbl.id, params=textwrap.dedent("""\ { "all_columns_x": "source", "all_columns_y": "target", "canvas_image_rendering": "pixelated", "collapsed_fieldsets": "", "linear_color_scheme": "blue_white_yellow", "metric": "sum__value", "normalize_across": "heatmap", "slice_name": "Heatmap", "viz_type": "heatmap", "xscale_interval": "1", "yscale_interval": "1" } """), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc)