Exemplo n.º 1
0
def sample_metrics() -> Dict["SqlMetric", Dict[str, Any]]:
    from superset.connectors.sqla.models import SqlMetric

    return {
        SqlMetric(metric_name="cnt",
                  expression="COUNT(*)",
                  metric_type="COUNT"): {
            "name": "cnt",
            "expression": "COUNT(*)",
            "extra_json": '{"metric_type": "COUNT"}',
            "type": "UNKNOWN",
            "is_additive": True,
            "is_aggregation": True,
            "is_filterable": False,
            "is_physical": False,
        },
        SqlMetric(metric_name="avg revenue",
                  expression="AVG(revenue)",
                  metric_type="AVG"): {
            "name": "avg revenue",
            "expression": "AVG(revenue)",
            "extra_json": '{"metric_type": "AVG"}',
            "type": "UNKNOWN",
            "is_additive": False,
            "is_aggregation": True,
            "is_filterable": False,
            "is_physical": False,
        },
    }
Exemplo n.º 2
0
    def create_table(self,
                     name,
                     schema='',
                     id=0,
                     cols_names=[],
                     metric_names=[]):
        database_name = 'main'
        name = '{0}{1}'.format(NAME_PREFIX, name)
        params = {DBREF: id, 'database_name': database_name}

        dict_rep = {
            'database_id': get_main_database(db.session).id,
            'table_name': name,
            'schema': schema,
            'id': id,
            'params': json.dumps(params),
            'columns': [{
                'column_name': c
            } for c in cols_names],
            'metrics': [{
                'metric_name': c
            } for c in metric_names],
        }

        table = SqlaTable(
            id=id,
            schema=schema,
            table_name=name,
            params=json.dumps(params),
        )
        for col_name in cols_names:
            table.columns.append(TableColumn(column_name=col_name))
        for metric_name in metric_names:
            table.metrics.append(SqlMetric(metric_name=metric_name))
        return table, dict_rep
Exemplo n.º 3
0
    def create_table(
        self, name, schema=None, id=0, cols_names=[], cols_uuids=None, metric_names=[]
    ):
        database_name = "main"
        name = "{0}{1}".format(NAME_PREFIX, name)
        params = {DBREF: id, "database_name": database_name}

        if cols_uuids is None:
            cols_uuids = [None] * len(cols_names)

        dict_rep = {
            "database_id": get_example_database().id,
            "table_name": name,
            "schema": schema,
            "id": id,
            "params": json.dumps(params),
            "columns": [
                {"column_name": c, "uuid": u} for c, u in zip(cols_names, cols_uuids)
            ],
            "metrics": [{"metric_name": c, "expression": ""} for c in metric_names],
        }

        table = SqlaTable(
            id=id, schema=schema, table_name=name, params=json.dumps(params)
        )
        for col_name, uuid in zip(cols_names, cols_uuids):
            table.columns.append(TableColumn(column_name=col_name, uuid=uuid))
        for metric_name in metric_names:
            table.metrics.append(SqlMetric(metric_name=metric_name, expression=""))
        return table, dict_rep
Exemplo n.º 4
0
    def test_query_cache_key_changes_when_metric_is_updated(self):
        self.login(username="******")
        payload = get_query_context("birth_names")

        # make temporary change and revert it to refresh the changed_on property
        datasource = DatasourceDAO.get_datasource(
            session=db.session,
            datasource_type=DatasourceType(payload["datasource"]["type"]),
            datasource_id=payload["datasource"]["id"],
        )

        datasource.metrics.append(SqlMetric(metric_name="foo", expression="select 1;"))
        db.session.commit()

        # construct baseline query_cache_key
        query_context = ChartDataQueryContextSchema().load(payload)
        query_object = query_context.queries[0]
        cache_key_original = query_context.query_cache_key(query_object)

        # wait a second since mysql records timestamps in second granularity
        time.sleep(1)

        datasource.metrics[0].expression = "select 2;"
        db.session.commit()

        # create new QueryContext with unchanged attributes, extract new query_cache_key
        query_context = ChartDataQueryContextSchema().load(payload)
        query_object = query_context.queries[0]
        cache_key_new = query_context.query_cache_key(query_object)

        datasource.metrics = []
        db.session.commit()

        # the new cache_key should be different due to updated datasource
        self.assertNotEqual(cache_key_original, cache_key_new)
Exemplo n.º 5
0
def _add_table_metrics(datasource: SqlaTable) -> None:
    # By accessing the attribute first, we make sure `datasource.columns` and
    # `datasource.metrics` are already loaded. Otherwise accessing them later
    # may trigger an unnecessary and unexpected `after_update` event.
    columns, metrics = datasource.columns, datasource.metrics

    if not any(col.column_name == "num_california" for col in columns):
        col_state = str(column("state").compile(db.engine))
        col_num = str(column("num").compile(db.engine))
        columns.append(
            TableColumn(
                column_name="num_california",
                expression=
                f"CASE WHEN {col_state} = 'CA' THEN {col_num} ELSE 0 END",
            ))

    if not any(col.metric_name == "sum__num" for col in metrics):
        col = str(column("num").compile(db.engine))
        metrics.append(
            SqlMetric(metric_name="sum__num", expression=f"SUM({col})"))

    for col in columns:
        if col.column_name == "ds":
            col.is_dttm = True
            break
Exemplo n.º 6
0
def decode_dashboards(o):
    """
    Function to be passed into json.loads obj_hook parameter
    Recreates the dashboard object from a json representation.
    """
    import superset.models.core as models
    from superset.connectors.sqla.models import (
        SqlaTable, SqlMetric, TableColumn,
    )

    if '__Dashboard__' in o:
        d = models.Dashboard()
        d.__dict__.update(o['__Dashboard__'])
        return d
    elif '__Slice__' in o:
        d = models.Slice()
        d.__dict__.update(o['__Slice__'])
        return d
    elif '__TableColumn__' in o:
        d = TableColumn()
        d.__dict__.update(o['__TableColumn__'])
        return d
    elif '__SqlaTable__' in o:
        d = SqlaTable()
        d.__dict__.update(o['__SqlaTable__'])
        return d
    elif '__SqlMetric__' in o:
        d = SqlMetric()
        d.__dict__.update(o['__SqlMetric__'])
        return d
    elif '__datetime__' in o:
        return datetime.strptime(o['__datetime__'], '%Y-%m-%dT%H:%M:%S')
    else:
        return o
Exemplo n.º 7
0
def decode_dashboards(  # pylint: disable=too-many-return-statements
        o: Dict[str, Any]) -> Any:
    """
    Function to be passed into json.loads obj_hook parameter
    Recreates the dashboard object from a json representation.
    """
    from superset.connectors.druid.models import (
        DruidCluster,
        DruidColumn,
        DruidDatasource,
        DruidMetric,
    )

    if "__Dashboard__" in o:
        return Dashboard(**o["__Dashboard__"])
    if "__Slice__" in o:
        return Slice(**o["__Slice__"])
    if "__TableColumn__" in o:
        return TableColumn(**o["__TableColumn__"])
    if "__SqlaTable__" in o:
        return SqlaTable(**o["__SqlaTable__"])
    if "__SqlMetric__" in o:
        return SqlMetric(**o["__SqlMetric__"])
    if "__DruidCluster__" in o:
        return DruidCluster(**o["__DruidCluster__"])
    if "__DruidColumn__" in o:
        return DruidColumn(**o["__DruidColumn__"])
    if "__DruidDatasource__" in o:
        return DruidDatasource(**o["__DruidDatasource__"])
    if "__DruidMetric__" in o:
        return DruidMetric(**o["__DruidMetric__"])
    if "__datetime__" in o:
        return datetime.strptime(o["__datetime__"], "%Y-%m-%dT%H:%M:%S")

    return o
Exemplo n.º 8
0
def _create_energy_table(df: DataFrame, table_name: str):
    database = get_example_database()

    table_description = "Energy consumption"
    schema = {"source": String(255), "target": String(255), "value": Float()}
    table = create_table_for_dashboard(df, table_name, database, schema,
                                       table_description)
    table.fetch_metadata()

    if not any(col.metric_name == "sum__value" for col in table.metrics):
        col = str(column("value").compile(db.engine))
        table.metrics.append(
            SqlMetric(metric_name="sum__value", expression=f"SUM({col})"))

    db.session.merge(table)
    db.session.commit()
    table.fetch_metadata()

    for slice_data in _get_energy_slices():
        _create_and_commit_energy_slice(
            table,
            slice_data["slice_title"],
            slice_data["viz_type"],
            slice_data["params"],
        )
 def create_table(self, name, schema=None, id=0, cols_names=[], metric_names=[]):
     params = {"remote_id": id, "database_name": "examples"}
     table = SqlaTable(
         id=id, schema=schema, table_name=name, params=json.dumps(params)
     )
     for col_name in cols_names:
         table.columns.append(TableColumn(column_name=col_name))
     for metric_name in metric_names:
         table.metrics.append(SqlMetric(metric_name=metric_name, expression=""))
     return table
Exemplo n.º 10
0
def physical_dataset():
    from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn

    example_database = get_example_database()
    engine = example_database.get_sqla_engine()
    # sqlite can only execute one statement at a time
    engine.execute("""
        CREATE TABLE IF NOT EXISTS physical_dataset(
          col1 INTEGER,
          col2 VARCHAR(255),
          col3 DECIMAL(4,2),
          col4 VARCHAR(255),
          col5 TIMESTAMP
        );
        """)
    engine.execute("""
        INSERT INTO physical_dataset values
        (0, 'a', 1.0, NULL, '2000-01-01 00:00:00'),
        (1, 'b', 1.1, NULL, '2000-01-02 00:00:00'),
        (2, 'c', 1.2, NULL, '2000-01-03 00:00:00'),
        (3, 'd', 1.3, NULL, '2000-01-04 00:00:00'),
        (4, 'e', 1.4, NULL, '2000-01-05 00:00:00'),
        (5, 'f', 1.5, NULL, '2000-01-06 00:00:00'),
        (6, 'g', 1.6, NULL, '2000-01-07 00:00:00'),
        (7, 'h', 1.7, NULL, '2000-01-08 00:00:00'),
        (8, 'i', 1.8, NULL, '2000-01-09 00:00:00'),
        (9, 'j', 1.9, NULL, '2000-01-10 00:00:00');
    """)

    dataset = SqlaTable(
        table_name="physical_dataset",
        database=example_database,
    )
    TableColumn(column_name="col1", type="INTEGER", table=dataset)
    TableColumn(column_name="col2", type="VARCHAR(255)", table=dataset)
    TableColumn(column_name="col3", type="DECIMAL(4,2)", table=dataset)
    TableColumn(column_name="col4", type="VARCHAR(255)", table=dataset)
    TableColumn(column_name="col5",
                type="TIMESTAMP",
                is_dttm=True,
                table=dataset)
    SqlMetric(metric_name="count", expression="count(*)", table=dataset)
    db.session.merge(dataset)
    db.session.commit()

    yield dataset

    engine.execute("""
        DROP TABLE physical_dataset;
    """)
    dataset = db.session.query(SqlaTable).filter_by(
        table_name="physical_dataset").all()
    for ds in dataset:
        db.session.delete(ds)
    db.session.commit()
class SqlaConnectorTestCase(BaseConnectorTestCase):
    columns = [
        TableColumn(column_name='region', type='VARCHAR(20)'),
        TableColumn(column_name='district', type='VARCHAR(20)'),
        TableColumn(column_name='project', type='VARCHAR(20)'),
        TableColumn(column_name='received', type='DATE', is_dttm=True),
        TableColumn(column_name='value', type='BIGINT'),
    ]
    metrics = [
        SqlMetric(metric_name='sum__value', metric_type='sum',
                  expression='SUM(value)'),
        SqlMetric(metric_name='avg__value', metric_type='avg',
                  expression='AVG(value)'),
        SqlMetric(metric_name='ratio', metric_type='avg',
                  expression='AVG(value/value2)'),
        SqlMetric(metric_name='value_percentage', metric_type='custom',
                  expression='SUM(value)/SUM(value + value2)'),
        SqlMetric(metric_name='category_percentage', metric_type='custom',
                  expression="SUM(CASE WHEN category='CategoryA' THEN 1 ELSE 0 END)/"
                             'CAST(COUNT(*) AS REAL)'),
    ]
    def setUp(self):
        super(SqlaConnectorTestCase, self).setUp()
        sqlalchemy_uri = 'sqlite:////tmp/test.db'
        database = Database(
            database_name='test_database',
            sqlalchemy_uri=sqlalchemy_uri)
        self.connection = database.get_sqla_engine().connect()
        self.datasource = SqlaTable(table_name='test_datasource',
                                    database=database,
                                    columns=self.columns,
                                    metrics=self.metrics)
        with database.get_sqla_engine().begin() as connection:
            self.df.to_sql(self.datasource.table_name,
                           connection,
                           if_exists='replace',
                           index=False,
                           dtype={'received': Date})
Exemplo n.º 12
0
 def create_table(
         self, name, schema='', id=0, cols_names=[], metric_names=[]):
     params = {'remote_id': id, 'database_name': 'main'}
     table = SqlaTable(
         id=id,
         schema=schema,
         table_name=name,
         params=json.dumps(params)
     )
     for col_name in cols_names:
         table.columns.append(
             TableColumn(column_name=col_name))
     for metric_name in metric_names:
         table.metrics.append(SqlMetric(metric_name=metric_name))
     return table
Exemplo n.º 13
0
def text_column_table():
    with app.app_context():
        table = SqlaTable(
            table_name="text_column_table",
            sql=("SELECT 'foo' as foo "
                 "UNION SELECT '' "
                 "UNION SELECT NULL "
                 "UNION SELECT 'null' "
                 "UNION SELECT '\"text in double quotes\"' "
                 "UNION SELECT '''text in single quotes''' "
                 "UNION SELECT 'double quotes \" in text' "
                 "UNION SELECT 'single quotes '' in text' "),
            database=get_example_database(),
        )
        TableColumn(column_name="foo", type="VARCHAR(255)", table=table)
        SqlMetric(metric_name="count", expression="count(*)", table=table)
        yield table
Exemplo n.º 14
0
def test_update_sqlatable_metric(mocker: MockFixture, app_context: None,
                                 session: Session) -> None:
    """
    Test that updating a ``SqlaTable`` also updates the corresponding ``Dataset``.

    For this test we check that updating the SQL expression in a metric belonging to a
    ``SqlaTable`` is reflected in the ``Dataset`` metric.
    """
    # patch session
    mocker.patch("superset.security.SupersetSecurityManager.get_session",
                 return_value=session)

    from superset.columns.models import Column
    from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn
    from superset.datasets.models import Dataset
    from superset.models.core import Database
    from superset.tables.models import Table

    engine = session.get_bind()
    Dataset.metadata.create_all(engine)  # pylint: disable=no-member

    columns = [
        TableColumn(column_name="ds", is_dttm=1, type="TIMESTAMP"),
    ]
    metrics = [
        SqlMetric(metric_name="cnt", expression="COUNT(*)"),
    ]
    sqla_table = SqlaTable(
        table_name="old_dataset",
        columns=columns,
        metrics=metrics,
        database=Database(database_name="my_database",
                          sqlalchemy_uri="sqlite://"),
    )
    session.add(sqla_table)
    session.flush()

    # check that the metric was created
    column = session.query(Column).filter_by(is_physical=False).one()
    assert column.expression == "COUNT(*)"

    # change the metric definition
    sqla_table.metrics[0].expression = "MAX(ds)"
    session.flush()

    assert column.expression == "MAX(ds)"
Exemplo n.º 15
0
def test_should_generate_closed_and_open_time_filter_range():
    with app.app_context():
        if backend() != "postgresql":
            pytest.skip(
                f"{backend()} has different dialect for datetime column")

        table = SqlaTable(
            table_name="temporal_column_table",
            sql=("SELECT '2021-12-31'::timestamp as datetime_col "
                 "UNION SELECT '2022-01-01'::timestamp "
                 "UNION SELECT '2022-03-10'::timestamp "
                 "UNION SELECT '2023-01-01'::timestamp "
                 "UNION SELECT '2023-03-10'::timestamp "),
            database=get_example_database(),
        )
        TableColumn(
            column_name="datetime_col",
            type="TIMESTAMP",
            table=table,
            is_dttm=True,
        )
        SqlMetric(metric_name="count", expression="count(*)", table=table)
        result_object = table.query({
            "metrics": ["count"],
            "is_timeseries": False,
            "filter": [],
            "from_dttm": datetime(2022, 1, 1),
            "to_dttm": datetime(2023, 1, 1),
            "granularity": "datetime_col",
        })
        """ >>> result_object.query
                SELECT count(*) AS count
                FROM
                  (SELECT '2021-12-31'::timestamp as datetime_col
                   UNION SELECT '2022-01-01'::timestamp
                   UNION SELECT '2022-03-10'::timestamp
                   UNION SELECT '2023-01-01'::timestamp
                   UNION SELECT '2023-03-10'::timestamp) AS virtual_table
                WHERE datetime_col >= TO_TIMESTAMP('2022-01-01 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
                  AND datetime_col < TO_TIMESTAMP('2023-01-01 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
        """
        assert result_object.df.iloc[0]["count"] == 2
Exemplo n.º 16
0
def decode_dashboards(o: Dict[str, Any]) -> Any:
    """
    Function to be passed into json.loads obj_hook parameter
    Recreates the dashboard object from a json representation.
    """

    if "__Dashboard__" in o:
        return Dashboard(**o["__Dashboard__"])
    if "__Slice__" in o:
        return Slice(**o["__Slice__"])
    if "__TableColumn__" in o:
        return TableColumn(**o["__TableColumn__"])
    if "__SqlaTable__" in o:
        return SqlaTable(**o["__SqlaTable__"])
    if "__SqlMetric__" in o:
        return SqlMetric(**o["__SqlMetric__"])
    if "__datetime__" in o:
        return datetime.strptime(o["__datetime__"], "%Y-%m-%dT%H:%M:%S")

    return o
Exemplo n.º 17
0
def virtual_dataset():
    from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn

    dataset = SqlaTable(
        table_name="virtual_dataset",
        sql=
        ("SELECT 0 as col1, 'a' as col2, 1.0 as col3, NULL as col4, '2000-01-01 00:00:00' as col5 "
         "UNION ALL "
         "SELECT 1, 'b', 1.1, NULL, '2000-01-02 00:00:00' "
         "UNION ALL "
         "SELECT 2 as col1, 'c' as col2, 1.2, NULL, '2000-01-03 00:00:00' "
         "UNION ALL "
         "SELECT 3 as col1, 'd' as col2, 1.3, NULL, '2000-01-04 00:00:00' "
         "UNION ALL "
         "SELECT 4 as col1, 'e' as col2, 1.4, NULL, '2000-01-05 00:00:00' "
         "UNION ALL "
         "SELECT 5 as col1, 'f' as col2, 1.5, NULL, '2000-01-06 00:00:00' "
         "UNION ALL "
         "SELECT 6 as col1, 'g' as col2, 1.6, NULL, '2000-01-07 00:00:00' "
         "UNION ALL "
         "SELECT 7 as col1, 'h' as col2, 1.7, NULL, '2000-01-08 00:00:00' "
         "UNION ALL "
         "SELECT 8 as col1, 'i' as col2, 1.8, NULL, '2000-01-09 00:00:00' "
         "UNION ALL "
         "SELECT 9 as col1, 'j' as col2, 1.9, NULL, '2000-01-10 00:00:00' "),
        database=get_example_database(),
    )
    TableColumn(column_name="col1", type="INTEGER", table=dataset)
    TableColumn(column_name="col2", type="VARCHAR(255)", table=dataset)
    TableColumn(column_name="col3", type="DECIMAL(4,2)", table=dataset)
    TableColumn(column_name="col4", type="VARCHAR(255)", table=dataset)
    # Different database dialect datetime type is not consistent, so temporarily use varchar
    TableColumn(column_name="col5", type="VARCHAR(255)", table=dataset)

    SqlMetric(metric_name="count", expression="count(*)", table=dataset)
    db.session.merge(dataset)

    yield dataset

    db.session.delete(dataset)
    db.session.commit()
Exemplo n.º 18
0
def load_birth_names(only_metadata: bool = False,
                     force: bool = False,
                     sample: bool = False) -> None:
    """Loading birth name dataset from a zip file in the repo"""
    # pylint: disable=too-many-locals
    tbl_name = "birth_names"
    database = get_example_database()
    table_exists = database.has_table_by_name(tbl_name)

    if not only_metadata and (not table_exists or force):
        load_data(tbl_name, database, sample=sample)

    obj = db.session.query(TBL).filter_by(table_name=tbl_name).first()
    if not obj:
        print(f"Creating table [{tbl_name}] reference")
        obj = TBL(table_name=tbl_name)
        db.session.add(obj)
    obj.main_dttm_col = "ds"
    obj.database = database
    obj.filter_select_enabled = True
    obj.fetch_metadata()

    if not any(col.column_name == "num_california" for col in obj.columns):
        col_state = str(column("state").compile(db.engine))
        col_num = str(column("num").compile(db.engine))
        obj.columns.append(
            TableColumn(
                column_name="num_california",
                expression=
                f"CASE WHEN {col_state} = 'CA' THEN {col_num} ELSE 0 END",
            ))

    if not any(col.metric_name == "sum__num" for col in obj.metrics):
        col = str(column("num").compile(db.engine))
        obj.metrics.append(
            SqlMetric(metric_name="sum__num", expression=f"SUM({col})"))

    db.session.commit()

    slices, _ = create_slices(obj)
    create_dashboard(slices)
Exemplo n.º 19
0
def decode_dashboards(o):
    """
    Function to be passed into json.loads obj_hook parameter
    Recreates the dashboard object from a json representation.
    """
    import superset.models.core as models

    if "__Dashboard__" in o:
        return Dashboard(**o["__Dashboard__"])
    elif "__Slice__" in o:
        return Slice(**o["__Slice__"])
    elif "__TableColumn__" in o:
        return TableColumn(**o["__TableColumn__"])
    elif "__SqlaTable__" in o:
        return SqlaTable(**o["__SqlaTable__"])
    elif "__SqlMetric__" in o:
        return SqlMetric(**o["__SqlMetric__"])
    elif "__datetime__" in o:
        return datetime.strptime(o["__datetime__"], "%Y-%m-%dT%H:%M:%S")
    else:
        return o
Exemplo n.º 20
0
def _add_table_metrics(datasource: "BaseDatasource") -> None:
    if not any(col.column_name == "num_california"
               for col in datasource.columns):
        col_state = str(column("state").compile(db.engine))
        col_num = str(column("num").compile(db.engine))
        datasource.columns.append(
            TableColumn(
                column_name="num_california",
                expression=
                f"CASE WHEN {col_state} = 'CA' THEN {col_num} ELSE 0 END",
            ))

    if not any(col.metric_name == "sum__num" for col in datasource.metrics):
        col = str(column("num").compile(db.engine))
        datasource.metrics.append(
            SqlMetric(metric_name="sum__num", expression=f"SUM({col})"))

    for col in datasource.columns:
        if col.column_name == "ds":
            col.is_dttm = True  # type: ignore
            break
Exemplo n.º 21
0
def virtual_dataset_comma_in_column_value():
    from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn

    dataset = SqlaTable(
        table_name="virtual_dataset",
        sql=("SELECT 'col1,row1' as col1, 'col2, row1' as col2 "
             "UNION ALL "
             "SELECT 'col1,row2' as col1, 'col2, row2' as col2 "
             "UNION ALL "
             "SELECT 'col1,row3' as col1, 'col2, row3' as col2 "),
        database=get_example_database(),
    )
    TableColumn(column_name="col1", type="VARCHAR(255)", table=dataset)
    TableColumn(column_name="col2", type="VARCHAR(255)", table=dataset)

    SqlMetric(metric_name="count", expression="count(*)", table=dataset)
    db.session.merge(dataset)

    yield dataset

    db.session.delete(dataset)
    db.session.commit()
Exemplo n.º 22
0
    def create_table(self, name, schema="", id=0, cols_names=[], metric_names=[]):
        database_name = "main"
        name = "{0}{1}".format(NAME_PREFIX, name)
        params = {DBREF: id, "database_name": database_name}

        dict_rep = {
            "database_id": get_main_database().id,
            "table_name": name,
            "schema": schema,
            "id": id,
            "params": json.dumps(params),
            "columns": [{"column_name": c} for c in cols_names],
            "metrics": [{"metric_name": c, "expression": ""} for c in metric_names],
        }

        table = SqlaTable(
            id=id, schema=schema, table_name=name, params=json.dumps(params)
        )
        for col_name in cols_names:
            table.columns.append(TableColumn(column_name=col_name))
        for metric_name in metric_names:
            table.metrics.append(SqlMetric(metric_name=metric_name, expression=""))
        return table, dict_rep
Exemplo n.º 23
0
def decode_dashboards(o):
    """
    Function to be passed into json.loads obj_hook parameter
    Recreates the dashboard object from a json representation.
    """
    import superset.models.core as models
    from superset import security_manager
    from superset.connectors.sqla.models import (
        SqlaTable,
        SqlMetric,
        TableColumn,
    )

    if '__Dashboard__' in o:
        return models.Dashboard(**o['__Dashboard__'])
    elif '__Slice__' in o:
        return models.Slice(**o['__Slice__'])
    elif '__TableColumn__' in o:
        return TableColumn(**o['__TableColumn__'])
    elif '__SqlaTable__' in o:
        return SqlaTable(**o['__SqlaTable__'])
    elif '__SqlMetric__' in o:
        return SqlMetric(**o['__SqlMetric__'])
    elif '__Database__' in o:
        return models.Database(**o['__Database__'])
    elif '__User__' in o:
        return security_manager.user_model(**o['__User__'])
    elif '__Role__' in o:
        return security_manager.role_model(**o['__Role__'])
    elif '__ViewMenu__' in o:
        return security_manager.viewmenu_model(**o['__ViewMenu__'])
    elif '__PermissionView__' in o:
        return security_manager.permissionview_model(**o['__PermissionView__'])
    elif '__datetime__' in o:
        return datetime.strptime(o['__datetime__'], '%Y-%m-%dT%H:%M:%S')
    else:
        return o
Exemplo n.º 24
0
def _create_energy_table():
    table = create_table_metadata(
        table_name=ENERGY_USAGE_TBL_NAME,
        database=get_example_database(),
        table_description="Energy consumption",
    )
    table.fetch_metadata()

    if not any(col.metric_name == "sum__value" for col in table.metrics):
        col = str(column("value").compile(db.engine))
        table.metrics.append(
            SqlMetric(metric_name="sum__value", expression=f"SUM({col})"))

    db.session.merge(table)
    db.session.commit()
    table.fetch_metadata()

    for slice_data in _get_energy_slices():
        _create_and_commit_energy_slice(
            table,
            slice_data["slice_title"],
            slice_data["viz_type"],
            slice_data["params"],
        )
Exemplo n.º 25
0
def load_country_map_data(only_metadata: bool = False,
                          force: bool = False) -> None:
    """Loading data for map with country map"""
    tbl_name = "birth_france_by_region"
    database = utils.get_example_database()
    table_exists = database.has_table_by_name(tbl_name)

    if not only_metadata and (not table_exists or force):
        csv_bytes = get_example_data("birth_france_data_for_country_map.csv",
                                     make_bytes=False)
        data = pd.read_csv(csv_bytes, encoding="utf-8")
        data["dttm"] = datetime.datetime.now().date()
        data.to_sql(  # pylint: disable=no-member
            tbl_name,
            database.get_sqla_engine(),
            if_exists="replace",
            chunksize=500,
            dtype={
                "DEPT_ID": String(10),
                "2003": BigInteger,
                "2004": BigInteger,
                "2005": BigInteger,
                "2006": BigInteger,
                "2007": BigInteger,
                "2008": BigInteger,
                "2009": BigInteger,
                "2010": BigInteger,
                "2011": BigInteger,
                "2012": BigInteger,
                "2013": BigInteger,
                "2014": BigInteger,
                "dttm": Date(),
            },
            index=False,
        )
        print("Done loading table!")
        print("-" * 80)

    print("Creating table reference")
    obj = db.session.query(TBL).filter_by(table_name=tbl_name).first()
    if not obj:
        obj = TBL(table_name=tbl_name)
    obj.main_dttm_col = "dttm"
    obj.database = database
    if not any(col.metric_name == "avg__2004" for col in obj.metrics):
        col = str(column("2004").compile(db.engine))
        obj.metrics.append(
            SqlMetric(metric_name="avg__2004", expression=f"AVG({col})"))
    db.session.merge(obj)
    db.session.commit()
    obj.fetch_metadata()
    tbl = obj

    slice_data = {
        "granularity_sqla": "",
        "since": "",
        "until": "",
        "viz_type": "country_map",
        "entity": "DEPT_ID",
        "metric": {
            "expressionType": "SIMPLE",
            "column": {
                "type": "INT",
                "column_name": "2004"
            },
            "aggregate": "AVG",
            "label": "Boys",
            "optionName": "metric_112342",
        },
        "row_limit": 500000,
    }

    print("Creating a slice")
    slc = Slice(
        slice_name="Birth in France by department in 2016",
        viz_type="country_map",
        datasource_type="table",
        datasource_id=tbl.id,
        params=get_slice_json(slice_data),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)
Exemplo n.º 26
0
def load_energy(only_metadata: bool = False,
                force: bool = False,
                sample: bool = False) -> None:
    """Loads an energy related dataset to use with sankey and graphs"""
    tbl_name = "energy_usage"
    database = utils.get_example_database()
    table_exists = database.has_table_by_name(tbl_name)

    if not only_metadata and (not table_exists or force):
        data = get_example_data("energy.json.gz")
        pdf = pd.read_json(data)
        pdf = pdf.head(100) if sample else pdf
        pdf.to_sql(
            tbl_name,
            database.get_sqla_engine(),
            if_exists="replace",
            chunksize=500,
            dtype={
                "source": String(255),
                "target": String(255),
                "value": Float()
            },
            index=False,
            method="multi",
        )

    print("Creating table [wb_health_population] reference")
    table = get_table_connector_registry()
    tbl = db.session.query(table).filter_by(table_name=tbl_name).first()
    if not tbl:
        tbl = table(table_name=tbl_name)
    tbl.description = "Energy consumption"
    tbl.database = database
    tbl.filter_select_enabled = True

    if not any(col.metric_name == "sum__value" for col in tbl.metrics):
        col = str(column("value").compile(db.engine))
        tbl.metrics.append(
            SqlMetric(metric_name="sum__value", expression=f"SUM({col})"))

    db.session.merge(tbl)
    db.session.commit()
    tbl.fetch_metadata()

    slc = Slice(
        slice_name="Energy Sankey",
        viz_type="sankey",
        datasource_type="table",
        datasource_id=tbl.id,
        params=textwrap.dedent("""\
        {
            "collapsed_fieldsets": "",
            "groupby": [
                "source",
                "target"
            ],
            "metric": "sum__value",
            "row_limit": "5000",
            "slice_name": "Energy Sankey",
            "viz_type": "sankey"
        }
        """),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)

    slc = Slice(
        slice_name="Energy Force Layout",
        viz_type="graph_chart",
        datasource_type="table",
        datasource_id=tbl.id,
        params=textwrap.dedent("""\
        {
            "source": "source",
            "target": "target",
            "edgeLength": 400,
            "repulsion": 1000,
            "layout": "force",
            "metric": "sum__value",
            "row_limit": "5000",
            "slice_name": "Force",
            "viz_type": "graph_chart"
        }
        """),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)

    slc = Slice(
        slice_name="Heatmap",
        viz_type="heatmap",
        datasource_type="table",
        datasource_id=tbl.id,
        params=textwrap.dedent("""\
        {
            "all_columns_x": "source",
            "all_columns_y": "target",
            "canvas_image_rendering": "pixelated",
            "collapsed_fieldsets": "",
            "linear_color_scheme": "blue_white_yellow",
            "metric": "sum__value",
            "normalize_across": "heatmap",
            "slice_name": "Heatmap",
            "viz_type": "heatmap",
            "xscale_interval": "1",
            "yscale_interval": "1"
        }
        """),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)
Exemplo n.º 27
0
def load_birth_names(only_metadata=False, force=False):
    """Loading birth name dataset from a zip file in the repo"""
    # pylint: disable=too-many-locals
    tbl_name = "birth_names"
    database = get_example_database()
    table_exists = database.has_table_by_name(tbl_name)

    if not only_metadata and (not table_exists or force):
        pdf = pd.read_json(get_example_data("birth_names.json.gz"))
        pdf.ds = pd.to_datetime(pdf.ds, unit="ms")
        pdf.to_sql(
            tbl_name,
            database.get_sqla_engine(),
            if_exists="replace",
            chunksize=500,
            dtype={
                "ds": DateTime,
                "gender": String(16),
                "state": String(10),
                "name": String(255),
            },
            index=False,
        )
        print("Done loading table!")
        print("-" * 80)

    obj = db.session.query(TBL).filter_by(table_name=tbl_name).first()
    if not obj:
        print(f"Creating table [{tbl_name}] reference")
        obj = TBL(table_name=tbl_name)
        db.session.add(obj)
    obj.main_dttm_col = "ds"
    obj.database = database
    obj.filter_select_enabled = True

    if not any(col.column_name == "num_california" for col in obj.columns):
        col_state = str(column("state").compile(db.engine))
        col_num = str(column("num").compile(db.engine))
        obj.columns.append(
            TableColumn(
                column_name="num_california",
                expression=
                f"CASE WHEN {col_state} = 'CA' THEN {col_num} ELSE 0 END",
            ))

    if not any(col.metric_name == "sum__num" for col in obj.metrics):
        col = str(column("num").compile(db.engine))
        obj.metrics.append(
            SqlMetric(metric_name="sum__num", expression=f"SUM({col})"))

    db.session.commit()
    obj.fetch_metadata()
    tbl = obj

    defaults = {
        "compare_lag":
        "10",
        "compare_suffix":
        "o10Y",
        "limit":
        "25",
        "granularity_sqla":
        "ds",
        "groupby": [],
        "metric":
        "sum__num",
        "metrics": [{
            "expressionType": "SIMPLE",
            "column": {
                "column_name": "num",
                "type": "BIGINT"
            },
            "aggregate": "SUM",
            "label": "Births",
            "optionName": "metric_11",
        }],
        "row_limit":
        config.get("ROW_LIMIT"),
        "since":
        "100 years ago",
        "until":
        "now",
        "viz_type":
        "table",
        "where":
        "",
        "markup_type":
        "markdown",
    }

    admin = security_manager.find_user("admin")

    print("Creating some slices")
    slices = [
        Slice(
            slice_name="Girls",
            viz_type="table",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                groupby=["name"],
                filters=[{
                    "col": "gender",
                    "op": "in",
                    "val": ["girl"]
                }],
                row_limit=50,
                timeseries_limit_metric="sum__num",
            ),
        ),
        Slice(
            slice_name="Boys",
            viz_type="table",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                groupby=["name"],
                filters=[{
                    "col": "gender",
                    "op": "in",
                    "val": ["boy"]
                }],
                row_limit=50,
            ),
        ),
        Slice(
            slice_name="Participants",
            viz_type="big_number",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="big_number",
                granularity_sqla="ds",
                compare_lag="5",
                compare_suffix="over 5Y",
            ),
        ),
        Slice(
            slice_name="Genders",
            viz_type="pie",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(defaults, viz_type="pie",
                                  groupby=["gender"]),
        ),
        Slice(
            slice_name="Genders by State",
            viz_type="dist_bar",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                adhoc_filters=[{
                    "clause": "WHERE",
                    "expressionType": "SIMPLE",
                    "filterOptionName": "2745eae5",
                    "comparator": ["other"],
                    "operator": "not in",
                    "subject": "state",
                }],
                viz_type="dist_bar",
                metrics=[
                    {
                        "expressionType": "SIMPLE",
                        "column": {
                            "column_name": "sum_boys",
                            "type": "BIGINT(20)"
                        },
                        "aggregate": "SUM",
                        "label": "Boys",
                        "optionName": "metric_11",
                    },
                    {
                        "expressionType": "SIMPLE",
                        "column": {
                            "column_name": "sum_girls",
                            "type": "BIGINT(20)"
                        },
                        "aggregate": "SUM",
                        "label": "Girls",
                        "optionName": "metric_12",
                    },
                ],
                groupby=["state"],
            ),
        ),
        Slice(
            slice_name="Trends",
            viz_type="line",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="line",
                groupby=["name"],
                granularity_sqla="ds",
                rich_tooltip=True,
                show_legend=True,
            ),
        ),
        Slice(
            slice_name="Average and Sum Trends",
            viz_type="dual_line",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="dual_line",
                metric={
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num",
                        "type": "BIGINT(20)"
                    },
                    "aggregate": "AVG",
                    "label": "AVG(num)",
                    "optionName": "metric_vgops097wej_g8uff99zhk7",
                },
                metric_2="sum__num",
                granularity_sqla="ds",
            ),
        ),
        Slice(
            slice_name="Title",
            viz_type="markup",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="markup",
                markup_type="html",
                code="""\
    <div style='text-align:center'>
        <h1>Birth Names Dashboard</h1>
        <p>
            The source dataset came from
            <a href='https://github.com/hadley/babynames' target='_blank'>[here]</a>
        </p>
        <img src="/static/assets/images/babies.png">
    </div>
    """,
            ),
        ),
        Slice(
            slice_name="Name Cloud",
            viz_type="word_cloud",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="word_cloud",
                size_from="10",
                series="name",
                size_to="70",
                rotation="square",
                limit="100",
            ),
        ),
        Slice(
            slice_name="Pivot Table",
            viz_type="pivot_table",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(defaults,
                                  viz_type="pivot_table",
                                  groupby=["name"],
                                  columns=["state"]),
        ),
        Slice(
            slice_name="Number of Girls",
            viz_type="big_number_total",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="big_number_total",
                granularity_sqla="ds",
                filters=[{
                    "col": "gender",
                    "op": "in",
                    "val": ["girl"]
                }],
                subheader="total female participants",
            ),
        ),
        Slice(
            slice_name="Number of California Births",
            viz_type="big_number_total",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                metric={
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num_california",
                        "expression":
                        "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    },
                    "aggregate": "SUM",
                    "label": "SUM(num_california)",
                },
                viz_type="big_number_total",
                granularity_sqla="ds",
            ),
        ),
        Slice(
            slice_name="Top 10 California Names Timeseries",
            viz_type="line",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                metrics=[{
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num_california",
                        "expression":
                        "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    },
                    "aggregate": "SUM",
                    "label": "SUM(num_california)",
                }],
                viz_type="line",
                granularity_sqla="ds",
                groupby=["name"],
                timeseries_limit_metric={
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num_california",
                        "expression":
                        "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    },
                    "aggregate": "SUM",
                    "label": "SUM(num_california)",
                },
                limit="10",
            ),
        ),
        Slice(
            slice_name="Names Sorted by Num in California",
            viz_type="table",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                groupby=["name"],
                row_limit=50,
                timeseries_limit_metric={
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num_california",
                        "expression":
                        "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    },
                    "aggregate": "SUM",
                    "label": "SUM(num_california)",
                },
            ),
        ),
        Slice(
            slice_name="Num Births Trend",
            viz_type="line",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(defaults, viz_type="line"),
        ),
        Slice(
            slice_name="Daily Totals",
            viz_type="table",
            datasource_type="table",
            datasource_id=tbl.id,
            created_by=admin,
            params=get_slice_json(
                defaults,
                groupby=["ds"],
                since="40 years ago",
                until="now",
                viz_type="table",
            ),
        ),
    ]
    for slc in slices:
        merge_slice(slc)

    print("Creating a dashboard")
    dash = db.session.query(Dash).filter_by(slug="births").first()

    if not dash:
        dash = Dash()
        db.session.add(dash)
    dash.published = True
    js = textwrap.dedent(
        # pylint: disable=line-too-long
        """\
{
    "CHART-0dd270f0": {
        "meta": {
            "chartId": 51,
            "width": 2,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-0dd270f0",
        "children": []
    },
    "CHART-a3c21bcc": {
        "meta": {
            "chartId": 52,
            "width": 2,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-a3c21bcc",
        "children": []
    },
    "CHART-976960a5": {
        "meta": {
            "chartId": 53,
            "width": 2,
            "height": 25
        },
        "type": "CHART",
        "id": "CHART-976960a5",
        "children": []
    },
    "CHART-58575537": {
        "meta": {
            "chartId": 54,
            "width": 2,
            "height": 25
        },
        "type": "CHART",
        "id": "CHART-58575537",
        "children": []
    },
    "CHART-e9cd8f0b": {
        "meta": {
            "chartId": 55,
            "width": 8,
            "height": 38
        },
        "type": "CHART",
        "id": "CHART-e9cd8f0b",
        "children": []
    },
    "CHART-e440d205": {
        "meta": {
            "chartId": 56,
            "width": 8,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-e440d205",
        "children": []
    },
    "CHART-59444e0b": {
        "meta": {
            "chartId": 57,
            "width": 3,
            "height": 38
        },
        "type": "CHART",
        "id": "CHART-59444e0b",
        "children": []
    },
    "CHART-e2cb4997": {
        "meta": {
            "chartId": 59,
            "width": 4,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-e2cb4997",
        "children": []
    },
    "CHART-e8774b49": {
        "meta": {
            "chartId": 60,
            "width": 12,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-e8774b49",
        "children": []
    },
    "CHART-985bfd1e": {
        "meta": {
            "chartId": 61,
            "width": 4,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-985bfd1e",
        "children": []
    },
    "CHART-17f13246": {
        "meta": {
            "chartId": 62,
            "width": 4,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-17f13246",
        "children": []
    },
    "CHART-729324f6": {
        "meta": {
            "chartId": 63,
            "width": 4,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-729324f6",
        "children": []
    },
    "COLUMN-25a865d6": {
        "meta": {
            "width": 4,
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "COLUMN",
        "id": "COLUMN-25a865d6",
        "children": [
            "ROW-cc97c6ac",
            "CHART-e2cb4997"
        ]
    },
    "COLUMN-4557b6ba": {
        "meta": {
            "width": 8,
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "COLUMN",
        "id": "COLUMN-4557b6ba",
        "children": [
            "ROW-d2e78e59",
            "CHART-e9cd8f0b"
        ]
    },
    "GRID_ID": {
        "type": "GRID",
        "id": "GRID_ID",
        "children": [
            "ROW-8515ace3",
            "ROW-1890385f",
            "ROW-f0b64094",
            "ROW-be9526b8"
        ]
    },
    "HEADER_ID": {
        "meta": {
            "text": "Births"
        },
        "type": "HEADER",
        "id": "HEADER_ID"
    },
    "MARKDOWN-00178c27": {
        "meta": {
            "width": 5,
            "code": "<div style=\\"text-align:center\\">\\n <h1>Birth Names Dashboard</h1>\\n <p>\\n The source dataset came from\\n <a href=\\"https://github.com/hadley/babynames\\" target=\\"_blank\\">[here]</a>\\n </p>\\n <img src=\\"/static/assets/images/babies.png\\" style=\\"width:55%;\\">\\n</div>\\n",
            "height": 38
        },
        "type": "MARKDOWN",
        "id": "MARKDOWN-00178c27",
        "children": []
    },
    "ROOT_ID": {
        "type": "ROOT",
        "id": "ROOT_ID",
        "children": [
            "GRID_ID"
        ]
    },
    "ROW-1890385f": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-1890385f",
        "children": [
            "CHART-e440d205",
            "CHART-0dd270f0",
            "CHART-a3c21bcc"
        ]
    },
    "ROW-8515ace3": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-8515ace3",
        "children": [
            "COLUMN-25a865d6",
            "COLUMN-4557b6ba"
        ]
    },
    "ROW-be9526b8": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-be9526b8",
        "children": [
            "CHART-985bfd1e",
            "CHART-17f13246",
            "CHART-729324f6"
        ]
    },
    "ROW-cc97c6ac": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-cc97c6ac",
        "children": [
            "CHART-976960a5",
            "CHART-58575537"
        ]
    },
    "ROW-d2e78e59": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-d2e78e59",
        "children": [
            "MARKDOWN-00178c27",
            "CHART-59444e0b"
        ]
    },
    "ROW-f0b64094": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-f0b64094",
        "children": [
            "CHART-e8774b49"
        ]
    },
    "DASHBOARD_VERSION_KEY": "v2"
}
        """

        # pylint: enable=line-too-long
    )
    pos = json.loads(js)
    # dashboard v2 doesn't allow add markup slice
    dash.slices = [slc for slc in slices if slc.viz_type != "markup"]
    update_slice_ids(pos, dash.slices)
    dash.dashboard_title = "Births"
    dash.position_json = json.dumps(pos, indent=4)
    dash.slug = "births"
    db.session.commit()
def load_world_bank_health_n_pop(only_metadata=False, force=False):  # pylint: disable=too-many-locals
    """Loads the world bank health dataset, slices and a dashboard"""
    tbl_name = "wb_health_population"
    database = utils.get_example_database()
    table_exists = database.has_table_by_name(tbl_name)

    if not only_metadata and (not table_exists or force):
        data = get_example_data("countries.json.gz")
        pdf = pd.read_json(data)
        pdf.columns = [col.replace(".", "_") for col in pdf.columns]
        pdf.year = pd.to_datetime(pdf.year)
        pdf.to_sql(
            tbl_name,
            database.get_sqla_engine(),
            if_exists="replace",
            chunksize=50,
            dtype={
                "year": DateTime(),
                "country_code": String(3),
                "country_name": String(255),
                "region": String(255),
            },
            index=False,
        )

    print("Creating table [wb_health_population] reference")
    tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first()
    if not tbl:
        tbl = TBL(table_name=tbl_name)
    tbl.description = utils.readfile(
        os.path.join(EXAMPLES_FOLDER, "countries.md"))
    tbl.main_dttm_col = "year"
    tbl.database = database
    tbl.filter_select_enabled = True

    metrics = [
        "sum__SP_POP_TOTL",
        "sum__SH_DYN_AIDS",
        "sum__SH_DYN_AIDS",
        "sum__SP_RUR_TOTL_ZS",
        "sum__SP_DYN_LE00_IN",
        "sum__SP_RUR_TOTL",
    ]
    for metric in metrics:
        if not any(col.metric_name == metric for col in tbl.metrics):
            aggr_func = metric[:3]
            col = str(column(metric[5:]).compile(db.engine))
            tbl.metrics.append(
                SqlMetric(metric_name=metric,
                          expression=f"{aggr_func}({col})"))

    db.session.merge(tbl)
    db.session.commit()
    tbl.fetch_metadata()

    defaults = {
        "compare_lag": "10",
        "compare_suffix": "o10Y",
        "limit": "25",
        "granularity_sqla": "year",
        "groupby": [],
        "metric": "sum__SP_POP_TOTL",
        "metrics": ["sum__SP_POP_TOTL"],
        "row_limit": config["ROW_LIMIT"],
        "since": "2014-01-01",
        "until": "2014-01-02",
        "time_range": "2014-01-01 : 2014-01-02",
        "markup_type": "markdown",
        "country_fieldtype": "cca3",
        "secondary_metric": {
            "aggregate": "SUM",
            "column": {
                "column_name": "SP_RUR_TOTL",
                "optionName": "_col_SP_RUR_TOTL",
                "type": "DOUBLE",
            },
            "expressionType": "SIMPLE",
            "hasCustomLabel": True,
            "label": "Rural Population",
        },
        "entity": "country_code",
        "show_bubbles": True,
    }

    print("Creating slices")
    slices = [
        Slice(
            slice_name="Region Filter",
            viz_type="filter_box",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="filter_box",
                date_filter=False,
                filter_configs=[
                    {
                        "asc": False,
                        "clearable": True,
                        "column": "region",
                        "key": "2s98dfu",
                        "metric": "sum__SP_POP_TOTL",
                        "multiple": True,
                    },
                    {
                        "asc": False,
                        "clearable": True,
                        "key": "li3j2lk",
                        "column": "country_name",
                        "metric": "sum__SP_POP_TOTL",
                        "multiple": True,
                    },
                ],
            ),
        ),
        Slice(
            slice_name="World's Population",
            viz_type="big_number",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                since="2000",
                viz_type="big_number",
                compare_lag="10",
                metric="sum__SP_POP_TOTL",
                compare_suffix="over 10Y",
            ),
        ),
        Slice(
            slice_name="Most Populated Countries",
            viz_type="table",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="table",
                metrics=["sum__SP_POP_TOTL"],
                groupby=["country_name"],
            ),
        ),
        Slice(
            slice_name="Growth Rate",
            viz_type="line",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="line",
                since="1960-01-01",
                metrics=["sum__SP_POP_TOTL"],
                num_period_compare="10",
                groupby=["country_name"],
            ),
        ),
        Slice(
            slice_name="% Rural",
            viz_type="world_map",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="world_map",
                metric="sum__SP_RUR_TOTL_ZS",
                num_period_compare="10",
            ),
        ),
        Slice(
            slice_name="Life Expectancy VS Rural %",
            viz_type="bubble",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="bubble",
                since="2011-01-01",
                until="2011-01-02",
                series="region",
                limit=0,
                entity="country_name",
                x="sum__SP_RUR_TOTL_ZS",
                y="sum__SP_DYN_LE00_IN",
                size="sum__SP_POP_TOTL",
                max_bubble_size="50",
                adhoc_filters=[{
                    "clause":
                    "WHERE",
                    "expressionType":
                    "SIMPLE",
                    "filterOptionName":
                    "2745eae5",
                    "comparator": [
                        "TCA",
                        "MNP",
                        "DMA",
                        "MHL",
                        "MCO",
                        "SXM",
                        "CYM",
                        "TUV",
                        "IMY",
                        "KNA",
                        "ASM",
                        "ADO",
                        "AMA",
                        "PLW",
                    ],
                    "operator":
                    "not in",
                    "subject":
                    "country_code",
                }],
            ),
        ),
        Slice(
            slice_name="Rural Breakdown",
            viz_type="sunburst",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="sunburst",
                groupby=["region", "country_name"],
                since="2011-01-01",
                until="2011-01-01",
            ),
        ),
        Slice(
            slice_name="World's Pop Growth",
            viz_type="area",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                since="1960-01-01",
                until="now",
                viz_type="area",
                groupby=["region"],
            ),
        ),
        Slice(
            slice_name="Box plot",
            viz_type="box_plot",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                since="1960-01-01",
                until="now",
                whisker_options="Min/max (no outliers)",
                x_ticks_layout="staggered",
                viz_type="box_plot",
                groupby=["region"],
            ),
        ),
        Slice(
            slice_name="Treemap",
            viz_type="treemap",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                since="1960-01-01",
                until="now",
                viz_type="treemap",
                metrics=["sum__SP_POP_TOTL"],
                groupby=["region", "country_code"],
            ),
        ),
        Slice(
            slice_name="Parallel Coordinates",
            viz_type="para",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                since="2011-01-01",
                until="2011-01-01",
                viz_type="para",
                limit=100,
                metrics=[
                    "sum__SP_POP_TOTL", "sum__SP_RUR_TOTL_ZS",
                    "sum__SH_DYN_AIDS"
                ],
                secondary_metric="sum__SP_POP_TOTL",
                series="country_name",
            ),
        ),
    ]
    misc_dash_slices.add(slices[-1].slice_name)
    for slc in slices:
        merge_slice(slc)

    print("Creating a World's Health Bank dashboard")
    dash_name = "World Bank's Data"
    slug = "world_health"
    dash = db.session.query(Dashboard).filter_by(slug=slug).first()

    if not dash:
        dash = Dashboard()
    dash.published = True
    js = textwrap.dedent("""\
{
    "CHART-36bfc934": {
        "children": [],
        "id": "CHART-36bfc934",
        "meta": {
            "chartId": 40,
            "height": 25,
            "sliceName": "Region Filter",
            "width": 2
        },
        "type": "CHART"
    },
    "CHART-37982887": {
        "children": [],
        "id": "CHART-37982887",
        "meta": {
            "chartId": 41,
            "height": 25,
            "sliceName": "World's Population",
            "width": 2
        },
        "type": "CHART"
    },
    "CHART-17e0f8d8": {
        "children": [],
        "id": "CHART-17e0f8d8",
        "meta": {
            "chartId": 42,
            "height": 92,
            "sliceName": "Most Populated Countries",
            "width": 3
        },
        "type": "CHART"
    },
    "CHART-2ee52f30": {
        "children": [],
        "id": "CHART-2ee52f30",
        "meta": {
            "chartId": 43,
            "height": 38,
            "sliceName": "Growth Rate",
            "width": 6
        },
        "type": "CHART"
    },
    "CHART-2d5b6871": {
        "children": [],
        "id": "CHART-2d5b6871",
        "meta": {
            "chartId": 44,
            "height": 52,
            "sliceName": "% Rural",
            "width": 7
        },
        "type": "CHART"
    },
    "CHART-0fd0d252": {
        "children": [],
        "id": "CHART-0fd0d252",
        "meta": {
            "chartId": 45,
            "height": 50,
            "sliceName": "Life Expectancy VS Rural %",
            "width": 8
        },
        "type": "CHART"
    },
    "CHART-97f4cb48": {
        "children": [],
        "id": "CHART-97f4cb48",
        "meta": {
            "chartId": 46,
            "height": 38,
            "sliceName": "Rural Breakdown",
            "width": 3
        },
        "type": "CHART"
    },
    "CHART-b5e05d6f": {
        "children": [],
        "id": "CHART-b5e05d6f",
        "meta": {
            "chartId": 47,
            "height": 50,
            "sliceName": "World's Pop Growth",
            "width": 4
        },
        "type": "CHART"
    },
    "CHART-e76e9f5f": {
        "children": [],
        "id": "CHART-e76e9f5f",
        "meta": {
            "chartId": 48,
            "height": 50,
            "sliceName": "Box plot",
            "width": 4
        },
        "type": "CHART"
    },
    "CHART-a4808bba": {
        "children": [],
        "id": "CHART-a4808bba",
        "meta": {
            "chartId": 49,
            "height": 50,
            "sliceName": "Treemap",
            "width": 8
        },
        "type": "CHART"
    },
    "COLUMN-071bbbad": {
        "children": [
            "ROW-1e064e3c",
            "ROW-afdefba9"
        ],
        "id": "COLUMN-071bbbad",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT",
            "width": 9
        },
        "type": "COLUMN"
    },
    "COLUMN-fe3914b8": {
        "children": [
            "CHART-36bfc934",
            "CHART-37982887"
        ],
        "id": "COLUMN-fe3914b8",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT",
            "width": 2
        },
        "type": "COLUMN"
    },
    "GRID_ID": {
        "children": [
            "ROW-46632bc2",
            "ROW-3fa26c5d",
            "ROW-812b3f13"
        ],
        "id": "GRID_ID",
        "type": "GRID"
    },
    "HEADER_ID": {
        "id": "HEADER_ID",
        "meta": {
            "text": "World's Bank Data"
        },
        "type": "HEADER"
    },
    "ROOT_ID": {
        "children": [
            "GRID_ID"
        ],
        "id": "ROOT_ID",
        "type": "ROOT"
    },
    "ROW-1e064e3c": {
        "children": [
            "COLUMN-fe3914b8",
            "CHART-2d5b6871"
        ],
        "id": "ROW-1e064e3c",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "ROW-3fa26c5d": {
        "children": [
            "CHART-b5e05d6f",
            "CHART-0fd0d252"
        ],
        "id": "ROW-3fa26c5d",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "ROW-46632bc2": {
        "children": [
            "COLUMN-071bbbad",
            "CHART-17e0f8d8"
        ],
        "id": "ROW-46632bc2",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "ROW-812b3f13": {
        "children": [
            "CHART-a4808bba",
            "CHART-e76e9f5f"
        ],
        "id": "ROW-812b3f13",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "ROW-afdefba9": {
        "children": [
            "CHART-2ee52f30",
            "CHART-97f4cb48"
        ],
        "id": "ROW-afdefba9",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "DASHBOARD_VERSION_KEY": "v2"
}
    """)
    pos = json.loads(js)
    update_slice_ids(pos, slices)

    dash.dashboard_title = dash_name
    dash.position_json = json.dumps(pos, indent=4)
    dash.slug = slug

    dash.slices = slices[:-1]
    db.session.merge(dash)
    db.session.commit()
Exemplo n.º 29
0
def load_birth_names(only_metadata: bool = False,
                     force: bool = False,
                     sample: bool = False) -> None:
    """Loading birth name dataset from a zip file in the repo"""
    # pylint: disable=too-many-locals
    tbl_name = "birth_names"
    database = get_example_database()
    table_exists = database.has_table_by_name(tbl_name)

    if not only_metadata and (not table_exists or force):
        load_data(tbl_name, database, sample=sample)

    obj = db.session.query(TBL).filter_by(table_name=tbl_name).first()
    if not obj:
        print(f"Creating table [{tbl_name}] reference")
        obj = TBL(table_name=tbl_name)
        db.session.add(obj)
    obj.main_dttm_col = "ds"
    obj.database = database
    obj.filter_select_enabled = True

    if not any(col.column_name == "num_california" for col in obj.columns):
        col_state = str(column("state").compile(db.engine))
        col_num = str(column("num").compile(db.engine))
        obj.columns.append(
            TableColumn(
                column_name="num_california",
                expression=
                f"CASE WHEN {col_state} = 'CA' THEN {col_num} ELSE 0 END",
            ))

    if not any(col.metric_name == "sum__num" for col in obj.metrics):
        col = str(column("num").compile(db.engine))
        obj.metrics.append(
            SqlMetric(metric_name="sum__num", expression=f"SUM({col})"))

    db.session.commit()
    obj.fetch_metadata()
    tbl = obj

    metrics = [{
        "expressionType": "SIMPLE",
        "column": {
            "column_name": "num",
            "type": "BIGINT"
        },
        "aggregate": "SUM",
        "label": "Births",
        "optionName": "metric_11",
    }]
    metric = "sum__num"

    defaults = {
        "compare_lag": "10",
        "compare_suffix": "o10Y",
        "limit": "25",
        "granularity_sqla": "ds",
        "groupby": [],
        "row_limit": config["ROW_LIMIT"],
        "since": "100 years ago",
        "until": "now",
        "viz_type": "table",
        "markup_type": "markdown",
    }

    admin = security_manager.find_user("admin")

    print("Creating some slices")
    slices = [
        Slice(
            slice_name="Participants",
            viz_type="big_number",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="big_number",
                granularity_sqla="ds",
                compare_lag="5",
                compare_suffix="over 5Y",
                metric=metric,
            ),
        ),
        Slice(
            slice_name="Genders",
            viz_type="pie",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(defaults,
                                  viz_type="pie",
                                  groupby=["gender"],
                                  metric=metric),
        ),
        Slice(
            slice_name="Trends",
            viz_type="line",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="line",
                groupby=["name"],
                granularity_sqla="ds",
                rich_tooltip=True,
                show_legend=True,
                metrics=metrics,
            ),
        ),
        Slice(
            slice_name="Genders by State",
            viz_type="dist_bar",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                adhoc_filters=[{
                    "clause": "WHERE",
                    "expressionType": "SIMPLE",
                    "filterOptionName": "2745eae5",
                    "comparator": ["other"],
                    "operator": "NOT IN",
                    "subject": "state",
                }],
                viz_type="dist_bar",
                metrics=[
                    {
                        "expressionType": "SIMPLE",
                        "column": {
                            "column_name": "sum_boys",
                            "type": "BIGINT(20)"
                        },
                        "aggregate": "SUM",
                        "label": "Boys",
                        "optionName": "metric_11",
                    },
                    {
                        "expressionType": "SIMPLE",
                        "column": {
                            "column_name": "sum_girls",
                            "type": "BIGINT(20)"
                        },
                        "aggregate": "SUM",
                        "label": "Girls",
                        "optionName": "metric_12",
                    },
                ],
                groupby=["state"],
            ),
        ),
        Slice(
            slice_name="Girls",
            viz_type="table",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                groupby=["name"],
                adhoc_filters=[gen_filter("gender", "girl")],
                row_limit=50,
                timeseries_limit_metric="sum__num",
                metrics=metrics,
            ),
        ),
        Slice(
            slice_name="Girl Name Cloud",
            viz_type="word_cloud",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="word_cloud",
                size_from="10",
                series="name",
                size_to="70",
                rotation="square",
                limit="100",
                adhoc_filters=[gen_filter("gender", "girl")],
                metric=metric,
            ),
        ),
        Slice(
            slice_name="Boys",
            viz_type="table",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                groupby=["name"],
                adhoc_filters=[gen_filter("gender", "boy")],
                row_limit=50,
                metrics=metrics,
            ),
        ),
        Slice(
            slice_name="Boy Name Cloud",
            viz_type="word_cloud",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="word_cloud",
                size_from="10",
                series="name",
                size_to="70",
                rotation="square",
                limit="100",
                adhoc_filters=[gen_filter("gender", "boy")],
                metric=metric,
            ),
        ),
        Slice(
            slice_name="Top 10 Girl Name Share",
            viz_type="area",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                adhoc_filters=[gen_filter("gender", "girl")],
                comparison_type="values",
                groupby=["name"],
                limit=10,
                stacked_style="expand",
                time_grain_sqla="P1D",
                viz_type="area",
                x_axis_forma="smart_date",
                metrics=metrics,
            ),
        ),
        Slice(
            slice_name="Top 10 Boy Name Share",
            viz_type="area",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                adhoc_filters=[gen_filter("gender", "boy")],
                comparison_type="values",
                groupby=["name"],
                limit=10,
                stacked_style="expand",
                time_grain_sqla="P1D",
                viz_type="area",
                x_axis_forma="smart_date",
                metrics=metrics,
            ),
        ),
    ]
    misc_slices = [
        Slice(
            slice_name="Average and Sum Trends",
            viz_type="dual_line",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="dual_line",
                metric={
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num",
                        "type": "BIGINT(20)"
                    },
                    "aggregate": "AVG",
                    "label": "AVG(num)",
                    "optionName": "metric_vgops097wej_g8uff99zhk7",
                },
                metric_2="sum__num",
                granularity_sqla="ds",
                metrics=metrics,
            ),
        ),
        Slice(
            slice_name="Num Births Trend",
            viz_type="line",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(defaults, viz_type="line", metrics=metrics),
        ),
        Slice(
            slice_name="Daily Totals",
            viz_type="table",
            datasource_type="table",
            datasource_id=tbl.id,
            created_by=admin,
            params=get_slice_json(
                defaults,
                groupby=["ds"],
                since="40 years ago",
                until="now",
                viz_type="table",
                metrics=metrics,
            ),
        ),
        Slice(
            slice_name="Number of California Births",
            viz_type="big_number_total",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                metric={
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num_california",
                        "expression":
                        "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    },
                    "aggregate": "SUM",
                    "label": "SUM(num_california)",
                },
                viz_type="big_number_total",
                granularity_sqla="ds",
            ),
        ),
        Slice(
            slice_name="Top 10 California Names Timeseries",
            viz_type="line",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                metrics=[{
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num_california",
                        "expression":
                        "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    },
                    "aggregate": "SUM",
                    "label": "SUM(num_california)",
                }],
                viz_type="line",
                granularity_sqla="ds",
                groupby=["name"],
                timeseries_limit_metric={
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num_california",
                        "expression":
                        "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    },
                    "aggregate": "SUM",
                    "label": "SUM(num_california)",
                },
                limit="10",
            ),
        ),
        Slice(
            slice_name="Names Sorted by Num in California",
            viz_type="table",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                metrics=metrics,
                groupby=["name"],
                row_limit=50,
                timeseries_limit_metric={
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num_california",
                        "expression":
                        "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    },
                    "aggregate": "SUM",
                    "label": "SUM(num_california)",
                },
            ),
        ),
        Slice(
            slice_name="Number of Girls",
            viz_type="big_number_total",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                metric=metric,
                viz_type="big_number_total",
                granularity_sqla="ds",
                adhoc_filters=[gen_filter("gender", "girl")],
                subheader="total female participants",
            ),
        ),
        Slice(
            slice_name="Pivot Table",
            viz_type="pivot_table",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="pivot_table",
                groupby=["name"],
                columns=["state"],
                metrics=metrics,
            ),
        ),
    ]
    for slc in slices:
        merge_slice(slc)

    for slc in misc_slices:
        merge_slice(slc)
        misc_dash_slices.add(slc.slice_name)

    print("Creating a dashboard")
    dash = db.session.query(Dashboard).filter_by(slug="births").first()

    if not dash:
        dash = Dashboard()
        db.session.add(dash)
    dash.published = True
    dash.json_metadata = textwrap.dedent("""\
    {
        "label_colors": {
            "Girls": "#FF69B4",
            "Boys": "#ADD8E6",
            "girl": "#FF69B4",
            "boy": "#ADD8E6"
        }
    }""")
    js = textwrap.dedent(
        # pylint: disable=line-too-long
        """\
        {
          "CHART-6GdlekVise": {
            "children": [],
            "id": "CHART-6GdlekVise",
            "meta": {
              "chartId": 5547,
              "height": 50,
              "sliceName": "Top 10 Girl Name Share",
              "width": 5
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID",
              "ROW-eh0w37bWbR"
            ],
            "type": "CHART"
          },
          "CHART-6n9jxb30JG": {
            "children": [],
            "id": "CHART-6n9jxb30JG",
            "meta": {
              "chartId": 5540,
              "height": 36,
              "sliceName": "Genders by State",
              "width": 5
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID",
              "ROW--EyBZQlDi"
            ],
            "type": "CHART"
          },
          "CHART-Jj9qh1ol-N": {
            "children": [],
            "id": "CHART-Jj9qh1ol-N",
            "meta": {
              "chartId": 5545,
              "height": 50,
              "sliceName": "Boy Name Cloud",
              "width": 4
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID",
              "ROW-kzWtcvo8R1"
            ],
            "type": "CHART"
          },
          "CHART-ODvantb_bF": {
            "children": [],
            "id": "CHART-ODvantb_bF",
            "meta": {
              "chartId": 5548,
              "height": 50,
              "sliceName": "Top 10 Boy Name Share",
              "width": 5
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID",
              "ROW-kzWtcvo8R1"
            ],
            "type": "CHART"
          },
          "CHART-PAXUUqwmX9": {
            "children": [],
            "id": "CHART-PAXUUqwmX9",
            "meta": {
              "chartId": 5538,
              "height": 34,
              "sliceName": "Genders",
              "width": 3
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID",
              "ROW-2n0XgiHDgs"
            ],
            "type": "CHART"
          },
          "CHART-_T6n_K9iQN": {
            "children": [],
            "id": "CHART-_T6n_K9iQN",
            "meta": {
              "chartId": 5539,
              "height": 36,
              "sliceName": "Trends",
              "width": 7
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID",
              "ROW--EyBZQlDi"
            ],
            "type": "CHART"
          },
          "CHART-eNY0tcE_ic": {
            "children": [],
            "id": "CHART-eNY0tcE_ic",
            "meta": {
              "chartId": 5537,
              "height": 34,
              "sliceName": "Participants",
              "width": 3
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID",
              "ROW-2n0XgiHDgs"
            ],
            "type": "CHART"
          },
          "CHART-g075mMgyYb": {
            "children": [],
            "id": "CHART-g075mMgyYb",
            "meta": {
              "chartId": 5541,
              "height": 50,
              "sliceName": "Girls",
              "width": 3
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID",
              "ROW-eh0w37bWbR"
            ],
            "type": "CHART"
          },
          "CHART-n-zGGE6S1y": {
            "children": [],
            "id": "CHART-n-zGGE6S1y",
            "meta": {
              "chartId": 5542,
              "height": 50,
              "sliceName": "Girl Name Cloud",
              "width": 4
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID",
              "ROW-eh0w37bWbR"
            ],
            "type": "CHART"
          },
          "CHART-vJIPjmcbD3": {
            "children": [],
            "id": "CHART-vJIPjmcbD3",
            "meta": {
              "chartId": 5543,
              "height": 50,
              "sliceName": "Boys",
              "width": 3
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID",
              "ROW-kzWtcvo8R1"
            ],
            "type": "CHART"
          },
          "DASHBOARD_VERSION_KEY": "v2",
          "GRID_ID": {
            "children": [
              "ROW-2n0XgiHDgs",
              "ROW--EyBZQlDi",
              "ROW-eh0w37bWbR",
              "ROW-kzWtcvo8R1"
            ],
            "id": "GRID_ID",
            "parents": [
              "ROOT_ID"
            ],
            "type": "GRID"
          },
          "HEADER_ID": {
            "id": "HEADER_ID",
            "meta": {
              "text": "Births"
            },
            "type": "HEADER"
          },
          "MARKDOWN-zaflB60tbC": {
            "children": [],
            "id": "MARKDOWN-zaflB60tbC",
            "meta": {
              "code": "<div style=\\"text-align:center\\">  <h1>Birth Names Dashboard</h1>  <img src=\\"/static/assets/images/babies.png\\" style=\\"width:50%;\\"></div>",
              "height": 34,
              "width": 6
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID",
              "ROW-2n0XgiHDgs"
            ],
            "type": "MARKDOWN"
          },
          "ROOT_ID": {
            "children": [
              "GRID_ID"
            ],
            "id": "ROOT_ID",
            "type": "ROOT"
          },
          "ROW--EyBZQlDi": {
            "children": [
              "CHART-_T6n_K9iQN",
              "CHART-6n9jxb30JG"
            ],
            "id": "ROW--EyBZQlDi",
            "meta": {
              "background": "BACKGROUND_TRANSPARENT"
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID"
            ],
            "type": "ROW"
          },
          "ROW-2n0XgiHDgs": {
            "children": [
              "CHART-eNY0tcE_ic",
              "MARKDOWN-zaflB60tbC",
              "CHART-PAXUUqwmX9"
            ],
            "id": "ROW-2n0XgiHDgs",
            "meta": {
              "background": "BACKGROUND_TRANSPARENT"
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID"
            ],
            "type": "ROW"
          },
          "ROW-eh0w37bWbR": {
            "children": [
              "CHART-g075mMgyYb",
              "CHART-n-zGGE6S1y",
              "CHART-6GdlekVise"
            ],
            "id": "ROW-eh0w37bWbR",
            "meta": {
              "background": "BACKGROUND_TRANSPARENT"
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID"
            ],
            "type": "ROW"
          },
          "ROW-kzWtcvo8R1": {
            "children": [
              "CHART-vJIPjmcbD3",
              "CHART-Jj9qh1ol-N",
              "CHART-ODvantb_bF"
            ],
            "id": "ROW-kzWtcvo8R1",
            "meta": {
              "background": "BACKGROUND_TRANSPARENT"
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID"
            ],
            "type": "ROW"
          }
        }
        """

        # pylint: enable=line-too-long
    )
    pos = json.loads(js)
    # dashboard v2 doesn't allow add markup slice
    dash.slices = [slc for slc in slices if slc.viz_type != "markup"]
    update_slice_ids(pos, dash.slices)
    dash.dashboard_title = "USA Births Names"
    dash.position_json = json.dumps(pos, indent=4)
    dash.slug = "births"
    db.session.commit()
Exemplo n.º 30
0
def test_create_virtual_sqlatable(mocker: MockFixture, app_context: None,
                                  session: Session) -> None:
    """
    Test shadow write when creating a new ``SqlaTable``.

    When a new virtual ``SqlaTable`` is created, new models should also be created for
    ``Dataset`` and ``Column``.
    """
    # patch session
    mocker.patch("superset.security.SupersetSecurityManager.get_session",
                 return_value=session)

    from superset.columns.models import Column
    from superset.columns.schemas import ColumnSchema
    from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn
    from superset.datasets.models import Dataset
    from superset.datasets.schemas import DatasetSchema
    from superset.models.core import Database
    from superset.tables.models import Table

    engine = session.get_bind()
    Dataset.metadata.create_all(engine)  # pylint: disable=no-member

    # create the ``Table`` that the virtual dataset points to
    database = Database(database_name="my_database",
                        sqlalchemy_uri="sqlite://")
    table = Table(
        name="some_table",
        schema="my_schema",
        catalog=None,
        database=database,
        columns=[
            Column(name="ds", is_temporal=True, type="TIMESTAMP"),
            Column(name="user_id", type="INTEGER"),
            Column(name="revenue", type="INTEGER"),
            Column(name="expenses", type="INTEGER"),
        ],
    )
    session.add(table)
    session.commit()

    # create virtual dataset
    columns = [
        TableColumn(column_name="ds", is_dttm=1, type="TIMESTAMP"),
        TableColumn(column_name="user_id", type="INTEGER"),
        TableColumn(column_name="revenue", type="INTEGER"),
        TableColumn(column_name="expenses", type="INTEGER"),
        TableColumn(column_name="profit",
                    type="INTEGER",
                    expression="revenue-expenses"),
    ]
    metrics = [
        SqlMetric(metric_name="cnt", expression="COUNT(*)"),
    ]

    sqla_table = SqlaTable(
        table_name="old_dataset",
        columns=columns,
        metrics=metrics,
        main_dttm_col="ds",
        default_endpoint=
        "https://www.youtube.com/watch?v=dQw4w9WgXcQ",  # not used
        database=database,
        offset=-8,
        description="This is the description",
        is_featured=1,
        cache_timeout=3600,
        schema="my_schema",
        sql="""
SELECT
  ds,
  user_id,
  revenue,
  expenses,
  revenue - expenses AS profit
FROM
  some_table""",
        params=json.dumps({
            "remote_id": 64,
            "database_name": "examples",
            "import_time": 1606677834,
        }),
        perm=None,
        filter_select_enabled=1,
        fetch_values_predicate="foo IN (1, 2)",
        is_sqllab_view=0,  # no longer used?
        template_params=json.dumps({"answer": "42"}),
        schema_perm=None,
        extra=json.dumps({"warning_markdown": "*WARNING*"}),
    )
    session.add(sqla_table)
    session.flush()

    # ignore these keys when comparing results
    ignored_keys = {"created_on", "changed_on", "uuid"}

    # check that columns were created
    column_schema = ColumnSchema()
    column_schemas = [{
        k: v
        for k, v in column_schema.dump(column).items() if k not in ignored_keys
    } for column in session.query(Column).all()]
    assert column_schemas == [
        {
            "type": "TIMESTAMP",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": None,
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "ds",
            "is_physical": True,
            "changed_by": None,
            "is_temporal": True,
            "id": 1,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "INTEGER",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": None,
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "user_id",
            "is_physical": True,
            "changed_by": None,
            "is_temporal": False,
            "id": 2,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "INTEGER",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": None,
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "revenue",
            "is_physical": True,
            "changed_by": None,
            "is_temporal": False,
            "id": 3,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "INTEGER",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": None,
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "expenses",
            "is_physical": True,
            "changed_by": None,
            "is_temporal": False,
            "id": 4,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "TIMESTAMP",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": "ds",
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "ds",
            "is_physical": False,
            "changed_by": None,
            "is_temporal": True,
            "id": 5,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "INTEGER",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": "user_id",
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "user_id",
            "is_physical": False,
            "changed_by": None,
            "is_temporal": False,
            "id": 6,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "INTEGER",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": "revenue",
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "revenue",
            "is_physical": False,
            "changed_by": None,
            "is_temporal": False,
            "id": 7,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "INTEGER",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": "expenses",
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "expenses",
            "is_physical": False,
            "changed_by": None,
            "is_temporal": False,
            "id": 8,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "INTEGER",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": "revenue-expenses",
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "profit",
            "is_physical": False,
            "changed_by": None,
            "is_temporal": False,
            "id": 9,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "Unknown",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": "COUNT(*)",
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "cnt",
            "is_physical": False,
            "changed_by": None,
            "is_temporal": False,
            "id": 10,
            "is_aggregation": True,
            "external_url": None,
            "is_managed_externally": False,
        },
    ]

    # check that dataset was created, and has a reference to the table
    dataset_schema = DatasetSchema()
    datasets = [{
        k: v
        for k, v in dataset_schema.dump(dataset).items()
        if k not in ignored_keys
    } for dataset in session.query(Dataset).all()]
    assert datasets == [{
        "id":
        1,
        "sqlatable_id":
        1,
        "name":
        "old_dataset",
        "changed_by":
        None,
        "created_by":
        None,
        "columns": [5, 6, 7, 8, 9, 10],
        "is_physical":
        False,
        "tables": [1],
        "extra_json":
        "{}",
        "external_url":
        None,
        "is_managed_externally":
        False,
        "expression":
        """
SELECT
  ds,
  user_id,
  revenue,
  expenses,
  revenue - expenses AS profit
FROM
  some_table""",
    }]