Exemplo n.º 1
0
    def test_execute(self, mock_hook):
        operator = GCSToLocalFilesystemOperator(
            task_id=TASK_ID,
            bucket=TEST_BUCKET,
            object_name=TEST_OBJECT,
            filename=LOCAL_FILE_PATH,
        )

        operator.execute(None)
        mock_hook.return_value.download.assert_called_once_with(
            bucket_name=TEST_BUCKET, object_name=TEST_OBJECT, filename=LOCAL_FILE_PATH
        )
Exemplo n.º 2
0
        pipeline_options={
            'output': GCS_OUTPUT,
        },
        job_class='org.apache.beam.examples.WordCount',
        dataflow_config={
            "check_if_running": CheckJobRunning.IgnoreJob,
            "location": 'europe-west3',
            "poll_sleep": 10,
        },
    )
    # [END howto_operator_start_java_job_jar_on_gcs]

    # [START howto_operator_start_java_job_local_jar]
    jar_to_local = GCSToLocalFilesystemOperator(
        task_id="jar-to-local",
        bucket=GCS_JAR_BUCKET_NAME,
        object_name=GCS_JAR_OBJECT_NAME,
        filename="/tmp/dataflow-{{ ds_nodash }}.jar",
    )

    start_java_job_local = BeamRunJavaPipelineOperator(
        task_id="start-java-job-local",
        jar="/tmp/dataflow-{{ ds_nodash }}.jar",
        pipeline_options={
            'output': GCS_OUTPUT,
        },
        job_class='org.apache.beam.examples.WordCount',
        dataflow_config={
            "check_if_running": CheckJobRunning.WaitForRun,
            "location": 'europe-west3',
            "poll_sleep": 10,
        },
Exemplo n.º 3
0
    # [END howto_operator_gcs_bucket_create_acl_entry_task]

    # [START howto_operator_gcs_object_create_acl_entry_task]
    gcs_object_create_acl_entry_task = GCSObjectCreateAclEntryOperator(
        bucket=BUCKET_1,
        object_name=BUCKET_FILE_LOCATION,
        entity=GCS_ACL_ENTITY,
        role=GCS_ACL_OBJECT_ROLE,
        task_id="gcs_object_create_acl_entry_task",
    )
    # [END howto_operator_gcs_object_create_acl_entry_task]

    # [START howto_operator_gcs_download_file_task]
    download_file = GCSToLocalFilesystemOperator(
        task_id="download_file",
        object_name=BUCKET_FILE_LOCATION,
        bucket=BUCKET_1,
        filename=PATH_TO_SAVED_FILE,
    )
    # [END howto_operator_gcs_download_file_task]

    copy_file = GCSToGCSOperator(
        task_id="copy_file",
        source_bucket=BUCKET_1,
        source_object=BUCKET_FILE_LOCATION,
        destination_bucket=BUCKET_2,
        destination_object=BUCKET_FILE_LOCATION,
    )

    delete_files = GCSDeleteObjectsOperator(
        task_id="delete_files", bucket_name=BUCKET_1, objects=[BUCKET_FILE_LOCATION]
    )
Exemplo n.º 4
0
        'output': '/tmp/example_beam',
    },
    "trigger_rule": "all_done",
}

with models.DAG(
        "example_beam_native_java_direct_runner",
        schedule_interval=None,  # Override to match your needs
        start_date=days_ago(1),
        tags=['example'],
) as dag_native_java_direct_runner:

    # [START howto_operator_start_java_direct_runner_pipeline]
    jar_to_local_direct_runner = GCSToLocalFilesystemOperator(
        task_id="jar_to_local_direct_runner",
        bucket=GCS_JAR_DIRECT_RUNNER_BUCKET_NAME,
        object_name=GCS_JAR_DIRECT_RUNNER_OBJECT_NAME,
        filename="/tmp/beam_wordcount_direct_runner_{{ ds_nodash }}.jar",
    )

    start_java_pipeline_direct_runner = BeamRunJavaPipelineOperator(
        task_id="start_java_pipeline_direct_runner",
        jar="/tmp/beam_wordcount_direct_runner_{{ ds_nodash }}.jar",
        pipeline_options={
            'output': '/tmp/start_java_pipeline_direct_runner',
            'inputFile': GCS_INPUT,
        },
        job_class='org.apache.beam.examples.WordCount',
    )

    jar_to_local_direct_runner >> start_java_pipeline_direct_runner
    # [END howto_operator_start_java_direct_runner_pipeline]
Exemplo n.º 5
0
# under the License.

import os

from airflow import models
from airflow.providers.google.cloud.transfers.gcs_to_local import GCSToLocalFilesystemOperator
from airflow.utils.dates import days_ago

PROJECT_ID = os.environ.get("GCP_PROJECT_ID", "example-id")
BUCKET = os.environ.get("GCP_GCS_BUCKET", "test-gcs-example-bucket")

PATH_TO_REMOTE_FILE = os.environ.get("GCP_GCS_PATH_TO_UPLOAD_FILE",
                                     "test-gcs-example-remote.txt")
PATH_TO_LOCAL_FILE = os.environ.get("GCP_GCS_PATH_TO_SAVED_FILE",
                                    "test-gcs-example-local.txt")

with models.DAG(
        "example_gcs_to_local",
        start_date=days_ago(1),
        schedule_interval=None,
        tags=['example'],
) as dag:
    # [START howto_operator_gcs_download_file_task]
    download_file = GCSToLocalFilesystemOperator(
        task_id="download_file",
        object_name=PATH_TO_REMOTE_FILE,
        bucket=BUCKET,
        filename=PATH_TO_LOCAL_FILE,
    )
    # [END howto_operator_gcs_download_file_task]