def test_delete_bucket(self, mock_hook): operator = GCSDeleteBucketOperator(task_id=TASK_ID, bucket_name=TEST_BUCKET) operator.execute(None) mock_hook.return_value.delete_bucket.assert_called_once_with( bucket_name=TEST_BUCKET, force=True)
) copy_file = GCSToGCSOperator( task_id="copy_file", source_bucket=BUCKET_1, source_object=BUCKET_FILE_LOCATION, destination_bucket=BUCKET_2, destination_object=BUCKET_FILE_LOCATION, ) delete_files = GCSDeleteObjectsOperator( task_id="delete_files", bucket_name=BUCKET_1, objects=[BUCKET_FILE_LOCATION] ) # [START howto_operator_gcs_delete_bucket] delete_bucket_1 = GCSDeleteBucketOperator(task_id="delete_bucket", bucket_name=BUCKET_1) delete_bucket_2 = GCSDeleteBucketOperator(task_id="delete_bucket", bucket_name=BUCKET_2) # [END howto_operator_gcs_delete_bucket] [create_bucket1, create_bucket2] >> list_buckets >> list_buckets_result [create_bucket1, create_bucket2] >> upload_file upload_file >> [download_file, copy_file] upload_file >> gcs_bucket_create_acl_entry_task >> gcs_object_create_acl_entry_task >> delete_files create_bucket1 >> delete_bucket_1 create_bucket2 >> delete_bucket_2 create_bucket2 >> copy_file create_bucket1 >> copy_file list_buckets >> delete_bucket_1 upload_file >> delete_bucket_1 create_bucket1 >> upload_file >> delete_bucket_1
destination_project_dataset_table=f"{DATASET_NAME}.{TABLE_NAME}", write_disposition='WRITE_TRUNCATE', ) read_data_from_gcs_many_chunks = BigQueryInsertJobOperator( task_id="read_data_from_gcs_many_chunks", configuration={ "query": { "query": f"SELECT COUNT(*) FROM `{GCP_PROJECT_ID}.{DATASET_NAME}.{TABLE_NAME}`", "useLegacySql": False, } }, ) delete_bucket = GCSDeleteBucketOperator( task_id="delete_bucket", bucket_name=GCS_BUCKET, ) delete_dataset = BigQueryDeleteDatasetOperator( task_id="delete_dataset", project_id=GCP_PROJECT_ID, dataset_id=DATASET_NAME, delete_contents=True, ) chain( create_bucket, create_dataset, create_table, run_operator, load_csv,
schedule_interval='@once', start_date=datetime(2021, 1, 1), catchup=False, tags=['example'], ) as dag: create_s3_bucket = S3CreateBucketOperator(task_id="create_s3_bucket", bucket_name=S3BUCKET_NAME, region_name='us-east-1') create_gcs_bucket = GCSCreateBucketOperator( task_id="create_bucket", bucket_name=GCS_BUCKET, project_id=GCP_PROJECT_ID, ) # [START howto_transfer_s3togcs_operator] transfer_to_gcs = S3ToGCSOperator(task_id='s3_to_gcs_task', bucket=S3BUCKET_NAME, prefix=PREFIX, dest_gcs=GCS_BUCKET_URL) # [END howto_transfer_s3togcs_operator] delete_s3_bucket = S3DeleteBucketOperator(task_id='delete_s3_bucket', bucket_name=S3BUCKET_NAME, force_delete=True) delete_gcs_bucket = GCSDeleteBucketOperator(task_id='delete_gcs_bucket', bucket_name=GCS_BUCKET) (create_s3_bucket >> upload_file() >> create_gcs_bucket >> transfer_to_gcs >> delete_s3_bucket >> delete_gcs_bucket)
from airflow.providers.google.cloud.operators.gcs import GCSListObjectsOperator from airflow.providers.google.cloud.operators.gcs import GCSDeleteBucketOperator from airflow import models import uuid import os from airflow.utils.dates import days_ago from airflow.utils.state import State UUID = uuid.uuid4() PROJECT_ID = os.environ.get("GOOGLE_CLOUD_PROJECT", "leah-playground") BUCKET_NAME = f"leah-{UUID}" with models.DAG( "example_gcs", start_date=days_ago(1), schedule_interval=None, ) as dag: create_bucket = GCSCreateBucketOperator(task_id="create_bucket", bucket_name=BUCKET_NAME, project_id=PROJECT_ID) list_objects = GCSListObjectsOperator(task_id="list_objects", bucket=BUCKET_NAME) list_buckets_result = BashOperator( task_id="list_buckets_result", bash_command="echo \"{{ task_instance.xcom_pull('list_objects') }}\"", ) delete_bucket = GCSDeleteBucketOperator(task_id="delete_bucket", bucket_name=BUCKET_NAME) create_bucket >> list_objects >> delete_bucket if __name__ == "__main__": dag.clear(dag_run_state=State.NONE) dag.run()
# Assumes existence of Airflow Variable set to name of GCP Project PROJECT_ID = models.Variable.get("gcp_project") with models.DAG( "example_gcs", start_date=days_ago(1), schedule_interval=None, ) as dag: generate_uuid = PythonOperator(task_id='generate_uuid', python_callable=lambda: str(uuid.uuid4())) create_bucket = GCSCreateBucketOperator( task_id="create_bucket", bucket_name="{{ task_instance.xcom_pull('generate_uuid') }}", project_id=PROJECT_ID) list_objects = GCSListObjectsOperator( task_id="list_objects", bucket="{{ task_instance.xcom_pull('generate_uuid') }}") list_buckets_result = BashOperator( task_id="list_buckets_result", bash_command="echo \"{{ task_instance.xcom_pull('list_objects') }}\"", ) delete_bucket = GCSDeleteBucketOperator( task_id="delete_bucket", bucket_name="{{ task_instance.xcom_pull('generate_uuid') }}") generate_uuid >> create_bucket >> list_objects >> list_buckets_result >> delete_bucket if __name__ == "__main__": dag.clear(dag_run_state=State.NONE) dag.run()