Exemplo n.º 1
0
    def test_delete_bucket(self, mock_hook):
        operator = GCSDeleteBucketOperator(task_id=TASK_ID,
                                           bucket_name=TEST_BUCKET)

        operator.execute(None)
        mock_hook.return_value.delete_bucket.assert_called_once_with(
            bucket_name=TEST_BUCKET, force=True)
    )

    copy_file = GCSToGCSOperator(
        task_id="copy_file",
        source_bucket=BUCKET_1,
        source_object=BUCKET_FILE_LOCATION,
        destination_bucket=BUCKET_2,
        destination_object=BUCKET_FILE_LOCATION,
    )

    delete_files = GCSDeleteObjectsOperator(
        task_id="delete_files", bucket_name=BUCKET_1, objects=[BUCKET_FILE_LOCATION]
    )

    # [START howto_operator_gcs_delete_bucket]
    delete_bucket_1 = GCSDeleteBucketOperator(task_id="delete_bucket", bucket_name=BUCKET_1)
    delete_bucket_2 = GCSDeleteBucketOperator(task_id="delete_bucket", bucket_name=BUCKET_2)
    # [END howto_operator_gcs_delete_bucket]

    [create_bucket1, create_bucket2] >> list_buckets >> list_buckets_result
    [create_bucket1, create_bucket2] >> upload_file
    upload_file >> [download_file, copy_file]
    upload_file >> gcs_bucket_create_acl_entry_task >> gcs_object_create_acl_entry_task >> delete_files

    create_bucket1 >> delete_bucket_1
    create_bucket2 >> delete_bucket_2
    create_bucket2 >> copy_file
    create_bucket1 >> copy_file
    list_buckets >> delete_bucket_1
    upload_file >> delete_bucket_1
    create_bucket1 >> upload_file >> delete_bucket_1
        destination_project_dataset_table=f"{DATASET_NAME}.{TABLE_NAME}",
        write_disposition='WRITE_TRUNCATE',
    )

    read_data_from_gcs_many_chunks = BigQueryInsertJobOperator(
        task_id="read_data_from_gcs_many_chunks",
        configuration={
            "query": {
                "query": f"SELECT COUNT(*) FROM `{GCP_PROJECT_ID}.{DATASET_NAME}.{TABLE_NAME}`",
                "useLegacySql": False,
            }
        },
    )

    delete_bucket = GCSDeleteBucketOperator(
        task_id="delete_bucket",
        bucket_name=GCS_BUCKET,
    )

    delete_dataset = BigQueryDeleteDatasetOperator(
        task_id="delete_dataset",
        project_id=GCP_PROJECT_ID,
        dataset_id=DATASET_NAME,
        delete_contents=True,
    )

    chain(
        create_bucket,
        create_dataset,
        create_table,
        run_operator,
        load_csv,
Exemplo n.º 4
0
        schedule_interval='@once',
        start_date=datetime(2021, 1, 1),
        catchup=False,
        tags=['example'],
) as dag:
    create_s3_bucket = S3CreateBucketOperator(task_id="create_s3_bucket",
                                              bucket_name=S3BUCKET_NAME,
                                              region_name='us-east-1')

    create_gcs_bucket = GCSCreateBucketOperator(
        task_id="create_bucket",
        bucket_name=GCS_BUCKET,
        project_id=GCP_PROJECT_ID,
    )
    # [START howto_transfer_s3togcs_operator]
    transfer_to_gcs = S3ToGCSOperator(task_id='s3_to_gcs_task',
                                      bucket=S3BUCKET_NAME,
                                      prefix=PREFIX,
                                      dest_gcs=GCS_BUCKET_URL)
    # [END howto_transfer_s3togcs_operator]

    delete_s3_bucket = S3DeleteBucketOperator(task_id='delete_s3_bucket',
                                              bucket_name=S3BUCKET_NAME,
                                              force_delete=True)

    delete_gcs_bucket = GCSDeleteBucketOperator(task_id='delete_gcs_bucket',
                                                bucket_name=GCS_BUCKET)

    (create_s3_bucket >> upload_file() >> create_gcs_bucket >> transfer_to_gcs
     >> delete_s3_bucket >> delete_gcs_bucket)
Exemplo n.º 5
0
from airflow.providers.google.cloud.operators.gcs import GCSListObjectsOperator
from airflow.providers.google.cloud.operators.gcs import GCSDeleteBucketOperator
from airflow import models
import uuid
import os
from airflow.utils.dates import days_ago
from airflow.utils.state import State
UUID = uuid.uuid4()
PROJECT_ID = os.environ.get("GOOGLE_CLOUD_PROJECT", "leah-playground")
BUCKET_NAME = f"leah-{UUID}"

with models.DAG(
    "example_gcs",
    start_date=days_ago(1),
    schedule_interval=None,
) as dag:
    create_bucket = GCSCreateBucketOperator(task_id="create_bucket", bucket_name=BUCKET_NAME, project_id=PROJECT_ID)

    list_objects = GCSListObjectsOperator(task_id="list_objects", bucket=BUCKET_NAME)
    list_buckets_result = BashOperator(
        task_id="list_buckets_result",
        bash_command="echo \"{{ task_instance.xcom_pull('list_objects') }}\"",
    )
    delete_bucket = GCSDeleteBucketOperator(task_id="delete_bucket", bucket_name=BUCKET_NAME)

    create_bucket >> list_objects >> delete_bucket


if __name__ == "__main__":
    dag.clear(dag_run_state=State.NONE)
    dag.run()
Exemplo n.º 6
0
# Assumes existence of Airflow Variable set to name of GCP Project
PROJECT_ID = models.Variable.get("gcp_project")

with models.DAG(
        "example_gcs",
        start_date=days_ago(1),
        schedule_interval=None,
) as dag:
    generate_uuid = PythonOperator(task_id='generate_uuid',
                                   python_callable=lambda: str(uuid.uuid4()))
    create_bucket = GCSCreateBucketOperator(
        task_id="create_bucket",
        bucket_name="{{ task_instance.xcom_pull('generate_uuid') }}",
        project_id=PROJECT_ID)
    list_objects = GCSListObjectsOperator(
        task_id="list_objects",
        bucket="{{ task_instance.xcom_pull('generate_uuid') }}")
    list_buckets_result = BashOperator(
        task_id="list_buckets_result",
        bash_command="echo \"{{ task_instance.xcom_pull('list_objects') }}\"",
    )
    delete_bucket = GCSDeleteBucketOperator(
        task_id="delete_bucket",
        bucket_name="{{ task_instance.xcom_pull('generate_uuid') }}")

    generate_uuid >> create_bucket >> list_objects >> list_buckets_result >> delete_bucket

if __name__ == "__main__":
    dag.clear(dag_run_state=State.NONE)
    dag.run()