示例#1
0
def test_import_database(session: Session) -> None:
    """
    Test importing a database.
    """
    from superset.databases.commands.importers.v1.utils import import_database
    from superset.models.core import Database
    from tests.integration_tests.fixtures.importexport import database_config

    engine = session.get_bind()
    Database.metadata.create_all(engine)  # pylint: disable=no-member

    config = copy.deepcopy(database_config)
    database = import_database(session, config)
    assert database.database_name == "imported_database"
    assert database.sqlalchemy_uri == "sqlite:///test.db"
    assert database.cache_timeout is None
    assert database.expose_in_sqllab is True
    assert database.allow_run_async is False
    assert database.allow_ctas is True
    assert database.allow_cvas is True
    assert database.allow_file_upload is True
    assert database.extra == "{}"
    assert database.uuid == "b8a1ccd3-779d-4ab7-8ad8-9ab119d7fe89"
    assert database.is_managed_externally is False
    assert database.external_url is None
示例#2
0
    def _import(session: Session, configs: Dict[str, Any]) -> None:
        # import databases first
        database_ids: Dict[str, int] = {}
        for file_name, config in configs.items():
            if file_name.startswith("databases/"):
                database = import_database(session, config, overwrite=True)
                database_ids[str(database.uuid)] = database.id

        # import saved queries
        for file_name, config in configs.items():
            if file_name.startswith("queries/"):
                config["db_id"] = database_ids[config["database_uuid"]]
                import_saved_query(session, config, overwrite=True)

        # import datasets
        dataset_info: Dict[str, Dict[str, Any]] = {}
        for file_name, config in configs.items():
            if file_name.startswith("datasets/"):
                config["database_id"] = database_ids[config["database_uuid"]]
                dataset = import_dataset(session, config, overwrite=True)
                dataset_info[str(dataset.uuid)] = {
                    "datasource_id": dataset.id,
                    "datasource_type": dataset.datasource_type,
                    "datasource_name": dataset.table_name,
                }

        # import charts
        chart_ids: Dict[str, int] = {}
        for file_name, config in configs.items():
            if file_name.startswith("charts/"):
                config.update(dataset_info[config["dataset_uuid"]])
                chart = import_chart(session, config, overwrite=True)
                chart_ids[str(chart.uuid)] = chart.id

        # store the existing relationship between dashboards and charts
        existing_relationships = session.execute(
            select(
                [dashboard_slices.c.dashboard_id,
                 dashboard_slices.c.slice_id])).fetchall()

        # import dashboards
        dashboard_chart_ids: List[Tuple[int, int]] = []
        for file_name, config in configs.items():
            if file_name.startswith("dashboards/"):
                config = update_id_refs(config, chart_ids, dataset_info)
                dashboard = import_dashboard(session, config, overwrite=True)
                for uuid in find_chart_uuids(config["position"]):
                    if uuid not in chart_ids:
                        break
                    chart_id = chart_ids[uuid]
                    if (dashboard.id, chart_id) not in existing_relationships:
                        dashboard_chart_ids.append((dashboard.id, chart_id))

        # set ref in the dashboard_slices table
        values = [{
            "dashboard_id": dashboard_id,
            "slice_id": chart_id
        } for (dashboard_id, chart_id) in dashboard_chart_ids]
        # pylint: disable=no-value-for-parameter # sqlalchemy/issues/4656
        session.execute(dashboard_slices.insert(), values)
示例#3
0
    def _import(session: Session,
                configs: Dict[str, Any],
                overwrite: bool = False) -> None:
        # discover datasets associated with charts
        dataset_uuids: Set[str] = set()
        for file_name, config in configs.items():
            if file_name.startswith("charts/"):
                dataset_uuids.add(config["dataset_uuid"])

        # discover databases associated with datasets
        database_uuids: Set[str] = set()
        for file_name, config in configs.items():
            if file_name.startswith(
                    "datasets/") and config["uuid"] in dataset_uuids:
                database_uuids.add(config["database_uuid"])

        # import related databases
        database_ids: Dict[str, int] = {}
        for file_name, config in configs.items():
            if file_name.startswith(
                    "databases/") and config["uuid"] in database_uuids:
                database = import_database(session, config, overwrite=False)
                database_ids[str(database.uuid)] = database.id

        # import datasets with the correct parent ref
        datasets: Dict[str, SqlaTable] = {}
        for file_name, config in configs.items():
            if (file_name.startswith("datasets/")
                    and config["database_uuid"] in database_ids):
                config["database_id"] = database_ids[config["database_uuid"]]
                dataset = import_dataset(session, config, overwrite=False)
                datasets[str(dataset.uuid)] = dataset

        # import charts with the correct parent ref
        for file_name, config in configs.items():
            if file_name.startswith(
                    "charts/") and config["dataset_uuid"] in datasets:
                # update datasource id, type, and name
                dataset = datasets[config["dataset_uuid"]]
                config.update({
                    "datasource_id":
                    dataset.id,
                    "datasource_type":
                    "view" if dataset.is_sqllab_view else "table",
                    "datasource_name":
                    dataset.table_name,
                })
                config["params"].update({"datasource": dataset.uid})
                if config["query_context"]:
                    # TODO (betodealmeida): export query_context as object, not string
                    query_context = json.loads(config["query_context"])
                    query_context["datasource"] = {
                        "id": dataset.id,
                        "type": "table"
                    }
                    config["query_context"] = json.dumps(query_context)

                import_chart(session, config, overwrite=overwrite)
示例#4
0
    def _import(session: Session,
                configs: Dict[str, Any],
                overwrite: bool = False) -> None:
        # discover datasets associated with charts
        dataset_uuids: Set[str] = set()
        for file_name, config in configs.items():
            if file_name.startswith("charts/"):
                dataset_uuids.add(config["dataset_uuid"])

        # discover databases associated with datasets
        database_uuids: Set[str] = set()
        for file_name, config in configs.items():
            if file_name.startswith(
                    "datasets/") and config["uuid"] in dataset_uuids:
                database_uuids.add(config["database_uuid"])

        # import related databases
        database_ids: Dict[str, int] = {}
        for file_name, config in configs.items():
            if file_name.startswith(
                    "databases/") and config["uuid"] in database_uuids:
                database = import_database(session, config, overwrite=False)
                database_ids[str(database.uuid)] = database.id

        # import datasets with the correct parent ref
        datasets: Dict[str, SqlaTable] = {}
        for file_name, config in configs.items():
            if (file_name.startswith("datasets/")
                    and config["database_uuid"] in database_ids):
                config["database_id"] = database_ids[config["database_uuid"]]
                dataset = import_dataset(session, config, overwrite=False)
                datasets[str(dataset.uuid)] = dataset

        # import charts with the correct parent ref
        for file_name, config in configs.items():
            if file_name.startswith(
                    "charts/") and config["dataset_uuid"] in datasets:
                # update datasource id, type, and name
                dataset = datasets[config["dataset_uuid"]]
                config.update({
                    "datasource_id": dataset.id,
                    "datasource_type": "table",
                    "datasource_name": dataset.table_name,
                })
                config["params"].update({"datasource": dataset.uid})

                if "query_context" in config:
                    del config["query_context"]

                import_chart(session, config, overwrite=overwrite)
示例#5
0
    def _import_bundle(self, session: Session) -> None:
        # first import databases
        database_ids: Dict[str, int] = {}
        for file_name, config in self._configs.items():
            if file_name.startswith("databases/"):
                database = import_database(session, config, overwrite=True)
                database_ids[str(database.uuid)] = database.id

        # import related datasets
        for file_name, config in self._configs.items():
            if (file_name.startswith("datasets/")
                    and config["database_uuid"] in database_ids):
                config["database_id"] = database_ids[config["database_uuid"]]
                # overwrite=False prevents deleting any non-imported columns/metrics
                import_dataset(session, config, overwrite=False)
示例#6
0
    def _import(session: Session,
                configs: Dict[str, Any],
                overwrite: bool = False) -> None:
        # discover datasets associated with charts
        dataset_uuids: Set[str] = set()
        for file_name, config in configs.items():
            if file_name.startswith("charts/"):
                dataset_uuids.add(config["dataset_uuid"])

        # discover databases associated with datasets
        database_uuids: Set[str] = set()
        for file_name, config in configs.items():
            if file_name.startswith(
                    "datasets/") and config["uuid"] in dataset_uuids:
                database_uuids.add(config["database_uuid"])

        # import related databases
        database_ids: Dict[str, int] = {}
        for file_name, config in configs.items():
            if file_name.startswith(
                    "databases/") and config["uuid"] in database_uuids:
                database = import_database(session, config, overwrite=False)
                database_ids[str(database.uuid)] = database.id

        # import datasets with the correct parent ref
        dataset_info: Dict[str, Dict[str, Any]] = {}
        for file_name, config in configs.items():
            if (file_name.startswith("datasets/")
                    and config["database_uuid"] in database_ids):
                config["database_id"] = database_ids[config["database_uuid"]]
                dataset = import_dataset(session, config, overwrite=False)
                dataset_info[str(dataset.uuid)] = {
                    "datasource_id": dataset.id,
                    "datasource_type":
                    "view" if dataset.is_sqllab_view else "table",
                    "datasource_name": dataset.table_name,
                }

        # import charts with the correct parent ref
        for file_name, config in configs.items():
            if (file_name.startswith("charts/")
                    and config["dataset_uuid"] in dataset_info):
                # update datasource id, type, and name
                config.update(dataset_info[config["dataset_uuid"]])
                import_chart(session, config, overwrite=overwrite)
示例#7
0
def test_import_database_managed_externally(session: Session) -> None:
    """
    Test importing a database that is managed externally.
    """
    from superset.databases.commands.importers.v1.utils import import_database
    from superset.models.core import Database
    from tests.integration_tests.fixtures.importexport import database_config

    engine = session.get_bind()
    Database.metadata.create_all(engine)  # pylint: disable=no-member

    config = copy.deepcopy(database_config)
    config["is_managed_externally"] = True
    config["external_url"] = "https://example.org/my_database"

    database = import_database(session, config)
    assert database.is_managed_externally is True
    assert database.external_url == "https://example.org/my_database"
示例#8
0
    def _import_bundle(self, session: Session) -> None:
        # discover databases associated with datasets
        database_uuids: Set[str] = set()
        for file_name, config in self._configs.items():
            if file_name.startswith("datasets/"):
                database_uuids.add(config["database_uuid"])

        # import related databases
        database_ids: Dict[str, int] = {}
        for file_name, config in self._configs.items():
            if file_name.startswith(
                    "databases/") and config["uuid"] in database_uuids:
                database = import_database(session, config, overwrite=False)
                database_ids[str(database.uuid)] = database.id

        # import datasets with the correct parent ref
        for file_name, config in self._configs.items():
            if (file_name.startswith("datasets/")
                    and config["database_uuid"] in database_ids):
                config["database_id"] = database_ids[config["database_uuid"]]
                import_dataset(session, config, overwrite=True)
示例#9
0
    def _import(session: Session,
                configs: Dict[str, Any],
                overwrite: bool = False) -> None:
        # discover databases associated with saved queries
        database_uuids: Set[str] = set()
        for file_name, config in configs.items():
            if file_name.startswith("queries/"):
                database_uuids.add(config["database_uuid"])

        # import related databases
        database_ids: Dict[str, int] = {}
        for file_name, config in configs.items():
            if file_name.startswith(
                    "databases/") and config["uuid"] in database_uuids:
                database = import_database(session, config, overwrite=False)
                database_ids[str(database.uuid)] = database.id

        # import saved queries with the correct parent ref
        for file_name, config in configs.items():
            if (file_name.startswith("queries/")
                    and config["database_uuid"] in database_ids):
                config["db_id"] = database_ids[config["database_uuid"]]
                import_saved_query(session, config, overwrite=overwrite)
示例#10
0
    def _import(session: Session,
                configs: Dict[str, Any],
                overwrite: bool = False) -> None:
        # discover charts associated with dashboards
        chart_uuids: Set[str] = set()
        for file_name, config in configs.items():
            if file_name.startswith("dashboards/"):
                chart_uuids.update(find_chart_uuids(config["position"]))

        # discover datasets associated with charts
        dataset_uuids: Set[str] = set()
        for file_name, config in configs.items():
            if file_name.startswith(
                    "charts/") and config["uuid"] in chart_uuids:
                dataset_uuids.add(config["dataset_uuid"])

        # discover databases associated with datasets
        database_uuids: Set[str] = set()
        for file_name, config in configs.items():
            if file_name.startswith(
                    "datasets/") and config["uuid"] in dataset_uuids:
                database_uuids.add(config["database_uuid"])

        # import related databases
        database_ids: Dict[str, int] = {}
        for file_name, config in configs.items():
            if file_name.startswith(
                    "databases/") and config["uuid"] in database_uuids:
                database = import_database(session, config, overwrite=False)
                database_ids[str(database.uuid)] = database.id

        # import datasets with the correct parent ref
        dataset_info: Dict[str, Dict[str, Any]] = {}
        for file_name, config in configs.items():
            if (file_name.startswith("datasets/")
                    and config["database_uuid"] in database_ids):
                config["database_id"] = database_ids[config["database_uuid"]]
                dataset = import_dataset(session, config, overwrite=False)
                dataset_info[str(dataset.uuid)] = {
                    "datasource_id": dataset.id,
                    "datasource_type":
                    "view" if dataset.is_sqllab_view else "table",
                    "datasource_name": dataset.table_name,
                }

        # import charts with the correct parent ref
        chart_ids: Dict[str, int] = {}
        for file_name, config in configs.items():
            if (file_name.startswith("charts/")
                    and config["dataset_uuid"] in dataset_info):
                # update datasource id, type, and name
                config.update(dataset_info[config["dataset_uuid"]])
                chart = import_chart(session, config, overwrite=False)
                chart_ids[str(chart.uuid)] = chart.id

        # store the existing relationship between dashboards and charts
        existing_relationships = session.execute(
            select(
                [dashboard_slices.c.dashboard_id,
                 dashboard_slices.c.slice_id])).fetchall()

        # import dashboards
        dashboard_chart_ids: List[Tuple[int, int]] = []
        for file_name, config in configs.items():
            if file_name.startswith("dashboards/"):
                dashboard = import_dashboard(session,
                                             config,
                                             overwrite=overwrite)

                for uuid in find_chart_uuids(config["position"]):
                    chart_id = chart_ids[uuid]
                    if (dashboard.id, chart_id) not in existing_relationships:
                        dashboard_chart_ids.append((dashboard.id, chart_id))

        # set ref in the dashboard_slices table
        values = [{
            "dashboard_id": dashboard_id,
            "slice_id": chart_id
        } for (dashboard_id, chart_id) in dashboard_chart_ids]
        # pylint: disable=no-value-for-parameter (sqlalchemy/issues/4656)
        session.execute(dashboard_slices.insert(), values)
示例#11
0
    def _import(session: Session,
                configs: Dict[str, Any],
                overwrite: bool = False) -> None:
        # import databases
        database_ids: Dict[str, int] = {}
        for file_name, config in configs.items():
            if file_name.startswith("databases/"):
                database = import_database(session,
                                           config,
                                           overwrite=overwrite)
                database_ids[str(database.uuid)] = database.id

        # import datasets
        # TODO (betodealmeida): once we have all examples being imported we can
        # have a stable UUID for the database stored in the dataset YAML; for
        # now we need to fetch the current ID.
        examples_id = (db.session.query(Database).filter_by(
            database_name="examples").one().id)
        dataset_info: Dict[str, Dict[str, Any]] = {}
        for file_name, config in configs.items():
            if file_name.startswith("datasets/"):
                config["database_id"] = examples_id
                dataset = import_dataset(session, config, overwrite=overwrite)
                dataset_info[str(dataset.uuid)] = {
                    "datasource_id": dataset.id,
                    "datasource_type":
                    "view" if dataset.is_sqllab_view else "table",
                    "datasource_name": dataset.table_name,
                }

        # import charts
        chart_ids: Dict[str, int] = {}
        for file_name, config in configs.items():
            if file_name.startswith("charts/"):
                # update datasource id, type, and name
                config.update(dataset_info[config["dataset_uuid"]])
                chart = import_chart(session, config, overwrite=overwrite)
                chart_ids[str(chart.uuid)] = chart.id

        # store the existing relationship between dashboards and charts
        existing_relationships = session.execute(
            select(
                [dashboard_slices.c.dashboard_id,
                 dashboard_slices.c.slice_id])).fetchall()

        # import dashboards
        dashboard_chart_ids: List[Tuple[int, int]] = []
        for file_name, config in configs.items():
            if file_name.startswith("dashboards/"):
                config = update_id_refs(config, chart_ids)
                dashboard = import_dashboard(session,
                                             config,
                                             overwrite=overwrite)
                for uuid in find_chart_uuids(config["position"]):
                    chart_id = chart_ids[uuid]
                    if (dashboard.id, chart_id) not in existing_relationships:
                        dashboard_chart_ids.append((dashboard.id, chart_id))

        # set ref in the dashboard_slices table
        values = [{
            "dashboard_id": dashboard_id,
            "slice_id": chart_id
        } for (dashboard_id, chart_id) in dashboard_chart_ids]
        # pylint: disable=no-value-for-parameter (sqlalchemy/issues/4656)
        session.execute(dashboard_slices.insert(), values)
示例#12
0
    def _import(
        session: Session,
        configs: Dict[str, Any],
        overwrite: bool = False,
        force_data: bool = False,
    ) -> None:
        # import databases
        database_ids: Dict[str, int] = {}
        for file_name, config in configs.items():
            if file_name.startswith("databases/"):
                database = import_database(session, config, overwrite=overwrite)
                database_ids[str(database.uuid)] = database.id

        # import datasets
        # If database_uuid is not in the list of UUIDs it means that the examples
        # database was created before its UUID was frozen, so it has a random UUID.
        # We need to determine its ID so we can point the dataset to it.
        examples_db = (
            db.session.query(Database).filter_by(database_name="examples").first()
        )
        dataset_info: Dict[str, Dict[str, Any]] = {}
        for file_name, config in configs.items():
            if file_name.startswith("datasets/"):
                # find the ID of the corresponding database
                if config["database_uuid"] not in database_ids:
                    if examples_db is None:
                        raise Exception("Cannot find examples database")
                    config["database_id"] = examples_db.id
                else:
                    config["database_id"] = database_ids[config["database_uuid"]]

                dataset = import_dataset(
                    session, config, overwrite=overwrite, force_data=force_data
                )
                dataset_info[str(dataset.uuid)] = {
                    "datasource_id": dataset.id,
                    "datasource_type": "view" if dataset.is_sqllab_view else "table",
                    "datasource_name": dataset.table_name,
                }

        # import charts
        chart_ids: Dict[str, int] = {}
        for file_name, config in configs.items():
            if file_name.startswith("charts/"):
                # update datasource id, type, and name
                config.update(dataset_info[config["dataset_uuid"]])
                chart = import_chart(session, config, overwrite=overwrite)
                chart_ids[str(chart.uuid)] = chart.id

        # store the existing relationship between dashboards and charts
        existing_relationships = session.execute(
            select([dashboard_slices.c.dashboard_id, dashboard_slices.c.slice_id])
        ).fetchall()

        # import dashboards
        dashboard_chart_ids: List[Tuple[int, int]] = []
        for file_name, config in configs.items():
            if file_name.startswith("dashboards/"):
                config = update_id_refs(config, chart_ids, dataset_info)
                dashboard = import_dashboard(session, config, overwrite=overwrite)
                dashboard.published = True

                for uuid in find_chart_uuids(config["position"]):
                    chart_id = chart_ids[uuid]
                    if (dashboard.id, chart_id) not in existing_relationships:
                        dashboard_chart_ids.append((dashboard.id, chart_id))

        # set ref in the dashboard_slices table
        values = [
            {"dashboard_id": dashboard_id, "slice_id": chart_id}
            for (dashboard_id, chart_id) in dashboard_chart_ids
        ]
        # pylint: disable=no-value-for-parameter # sqlalchemy/issues/4656
        session.execute(dashboard_slices.insert(), values)
示例#13
0
    def _import(  # pylint: disable=arguments-differ, too-many-locals, too-many-branches
        session: Session,
        configs: Dict[str, Any],
        overwrite: bool = False,
        force_data: bool = False,
    ) -> None:
        # import databases
        database_ids: Dict[str, int] = {}
        for file_name, config in configs.items():
            if file_name.startswith("databases/"):
                database = import_database(session, config, overwrite=overwrite)
                database_ids[str(database.uuid)] = database.id

        # import datasets
        # If database_uuid is not in the list of UUIDs it means that the examples
        # database was created before its UUID was frozen, so it has a random UUID.
        # We need to determine its ID so we can point the dataset to it.
        examples_db = get_example_database()
        dataset_info: Dict[str, Dict[str, Any]] = {}
        for file_name, config in configs.items():
            if file_name.startswith("datasets/"):
                # find the ID of the corresponding database
                if config["database_uuid"] not in database_ids:
                    if examples_db is None:
                        raise Exception("Cannot find examples database")
                    config["database_id"] = examples_db.id
                else:
                    config["database_id"] = database_ids[config["database_uuid"]]

                # set schema
                if config["schema"] is None:
                    config["schema"] = get_example_default_schema()

                dataset = import_dataset(
                    session, config, overwrite=overwrite, force_data=force_data
                )

                try:
                    dataset = import_dataset(
                        session, config, overwrite=overwrite, force_data=force_data
                    )
                except MultipleResultsFound:
                    # Multiple result can be found for datasets. There was a bug in
                    # load-examples that resulted in datasets being loaded with a NULL
                    # schema. Users could then add a new dataset with the same name in
                    # the correct schema, resulting in duplicates, since the uniqueness
                    # constraint was not enforced correctly in the application logic.
                    # See https://github.com/apache/superset/issues/16051.
                    continue

                dataset_info[str(dataset.uuid)] = {
                    "datasource_id": dataset.id,
                    "datasource_type": "table",
                    "datasource_name": dataset.table_name,
                }

        # import charts
        chart_ids: Dict[str, int] = {}
        for file_name, config in configs.items():
            if (
                file_name.startswith("charts/")
                and config["dataset_uuid"] in dataset_info
            ):
                # update datasource id, type, and name
                config.update(dataset_info[config["dataset_uuid"]])
                chart = import_chart(session, config, overwrite=overwrite)
                chart_ids[str(chart.uuid)] = chart.id

        # store the existing relationship between dashboards and charts
        existing_relationships = session.execute(
            select([dashboard_slices.c.dashboard_id, dashboard_slices.c.slice_id])
        ).fetchall()

        # import dashboards
        dashboard_chart_ids: List[Tuple[int, int]] = []
        for file_name, config in configs.items():
            if file_name.startswith("dashboards/"):
                try:
                    config = update_id_refs(config, chart_ids, dataset_info)
                except KeyError:
                    continue

                dashboard = import_dashboard(session, config, overwrite=overwrite)
                dashboard.published = True

                for uuid in find_chart_uuids(config["position"]):
                    chart_id = chart_ids[uuid]
                    if (dashboard.id, chart_id) not in existing_relationships:
                        dashboard_chart_ids.append((dashboard.id, chart_id))

        # set ref in the dashboard_slices table
        values = [
            {"dashboard_id": dashboard_id, "slice_id": chart_id}
            for (dashboard_id, chart_id) in dashboard_chart_ids
        ]
        session.execute(dashboard_slices.insert(), values)