Exemplo n.º 1
0
def _delete_empty_datasets() -> None:
    """Deletes all empty datasets in BigQuery."""
    bq_client = BigQueryClientImpl()
    datasets = bq_client.list_datasets()

    for dataset_resource in datasets:
        dataset_ref = bq_client.dataset_ref_for_id(dataset_resource.dataset_id)
        dataset = bq_client.get_dataset(dataset_ref)
        tables = peekable(bq_client.list_tables(dataset.dataset_id))
        created_time = dataset.created
        dataset_age_seconds = (datetime.datetime.now(datetime.timezone.utc) -
                               created_time).total_seconds()

        if not tables and dataset_age_seconds > DATASET_DELETION_MIN_SECONDS:
            logging.info(
                "Dataset %s is empty and was not created very recently. Deleting...",
                dataset_ref.dataset_id,
            )
            bq_client.delete_dataset(dataset_ref)
Exemplo n.º 2
0
def main(dry_run: bool) -> None:
    client = BigQueryClientImpl()
    datasets = list(client.list_datasets())
    candidate_deletable_datasets = [
        d for d in datasets if d.dataset_id.startswith("temp_dataset_")
    ]

    cutoff_date = (datetime.now() - timedelta(days=1)).replace(tzinfo=pytz.UTC)
    for candidate in candidate_deletable_datasets:
        dataset = client.get_dataset(candidate.dataset_id)
        if dataset.modified is not None and dataset.modified < cutoff_date:
            if dry_run:
                logging.info("[Dry-run] Would delete %s", dataset.dataset_id)
            else:
                logging.info("Deleting %s...", dataset.dataset_id)
                client.delete_dataset(dataset,
                                      delete_contents=True,
                                      not_found_ok=True)
        else:
            logging.info("Skipping %s because it was created too recently.",
                         dataset.dataset_id)