def delete_unmanaged_views_and_tables_from_dataset(
    bq_client: BigQueryClient,
    dataset_id: str,
    managed_tables: Set[BigQueryAddress],
    dry_run: bool,
) -> Set[BigQueryAddress]:
    """This function takes in a set of managed views/tables and compares it to the list of
    tables BigQuery has. The function then deletes any views/tables that are in BigQuery but not
    in the set of managed views/tables. It then returns a set of the BigQueryAddress's
    from these unmanaged views/tables that are to be deleted."""
    unmanaged_views_and_tables: Set[BigQueryAddress] = set()
    dataset_ref = bq_client.dataset_ref_for_id(dataset_id)
    if not bq_client.dataset_exists(dataset_ref):
        raise ValueError("Dataset %s does not exist in BigQuery" % dataset_id)
    for table in list(bq_client.list_tables(dataset_id)):
        table_bq_address = BigQueryAddress.from_list_item(table)
        if table_bq_address not in managed_tables:
            unmanaged_views_and_tables.add(table_bq_address)
    for view in unmanaged_views_and_tables:
        if dry_run:
            logging.info(
                "[DRY RUN] Regular run would delete unmanaged table/view %s from dataset %s.",
                view.table_id,
                view.dataset_id,
            )

        else:
            logging.info(
                "Deleting unmanaged table/view %s from dataset %s.",
                view.table_id,
                view.dataset_id,
            )

            bq_client.delete_table(view.dataset_id, view.table_id)
    return unmanaged_views_and_tables
Пример #2
0
def delete_temp_table_if_exists(
        big_query_client: BigQueryClient, temp_table_name: str,
        cloud_sql_to_bq_config: CloudSqlToBQConfig) -> None:
    dataset_ref = cloud_sql_to_bq_config.get_dataset_ref(big_query_client)
    if not big_query_client.table_exists(dataset_ref=dataset_ref,
                                         table_id=temp_table_name):
        logging.info('Delete temp table failed, table [%s] does not exist.',
                     temp_table_name)
        return
    big_query_client.delete_table(dataset_id=cloud_sql_to_bq_config.dataset_id,
                                  table_id=temp_table_name)
    logging.info('Deleted temporary table [%s]', temp_table_name)
Пример #3
0
def _create_or_update_view_and_materialize_if_necessary(
    bq_client: BigQueryClient,
    view: BigQueryView,
    parent_results: Dict[BigQueryView, bool],
    force_materialize: bool,
) -> bool:
    """Creates or updates the provided view in BigQuery and materializes that view into a table when appropriate.
    Returns True if this view or any views in its parent chain have been updated from the version that was saved in
    BigQuery before this update.
    """
    parent_changed = any(parent_results.values())
    view_changed = False
    dataset_ref = bq_client.dataset_ref_for_id(view.dataset_id)

    try:
        existing_view = bq_client.get_table(dataset_ref, view.view_id)
        if existing_view.view_query != view.view_query:
            # If the view query has changed, the view has changed
            view_changed = True
        old_schema = existing_view.schema
    except exceptions.NotFound:
        view_changed = True
        old_schema = None

    # TODO(https://issuetracker.google.com/issues/180636362): Currently we have to delete and recreate the view for
    # changes from underlying tables to be reflected in its schema.
    if old_schema is not None:
        bq_client.delete_table(dataset_ref.dataset_id, view.view_id)
    updated_view = bq_client.create_or_update_view(view)

    if updated_view.schema != old_schema:
        # We also check for schema changes, just in case a parent view or table has added a column
        view_changed = True

    if view.materialized_address:
        materialized_view_dataset_ref = bq_client.dataset_ref_for_id(
            view.materialized_address.dataset_id)
        if (view_changed or parent_changed or
                not bq_client.table_exists(materialized_view_dataset_ref,
                                           view.materialized_address.table_id)
                or force_materialize):
            bq_client.materialize_view_to_table(view)
        else:
            logging.info(
                "Skipping materialization of view [%s.%s] which has not changed.",
                view.dataset_id,
                view.view_id,
            )
    return view_changed or parent_changed or force_materialize