Exemplo n.º 1
0
 def _add_benchmarks(self, task_group):
     with TaskGroup(task_group, prefix_group_id=True,
                    dag=self.dag) as benchmarks:
         benchmark_tasks = self._get_e2e_benchmarks(
             task_group).get_benchmarks()
         chain(*benchmark_tasks)
     return benchmarks
Exemplo n.º 2
0
 def test_chain_not_support_type(self):
     dag = DAG(dag_id='test_chain', start_date=datetime.now())
     [op1, op2] = [
         DummyOperator(task_id='t{i}'.format(i=i), dag=dag)
         for i in range(1, 3)
     ]
     with self.assertRaises(TypeError):
         chain([op1, op2], 1)  # noqa
Exemplo n.º 3
0
 def test_chain_different_length_iterable(self):
     dag = DAG(dag_id='test_chain', start_date=datetime.now())
     [op1, op2, op3, op4, op5] = [
         DummyOperator(task_id='t{i}'.format(i=i), dag=dag)
         for i in range(1, 6)
     ]
     with self.assertRaises(AirflowException):
         chain([op1, op2], [op3, op4, op5])
Exemplo n.º 4
0
    def test_chain(self):
        dag = DAG(dag_id='test_chain', start_date=datetime.now())
        [op1, op2, op3, op4, op5, op6] = [DummyOperator(task_id=f't{i}', dag=dag) for i in range(1, 7)]
        chain(op1, [op2, op3], [op4, op5], op6)

        assert {op2, op3} == set(op1.get_direct_relatives(upstream=False))
        assert [op4] == op2.get_direct_relatives(upstream=False)
        assert [op5] == op3.get_direct_relatives(upstream=False)
        assert {op4, op5} == set(op6.get_direct_relatives(upstream=True))
Exemplo n.º 5
0
    def build(self):
        installer = self._get_openshift_installer()
        install_cluster = installer.get_install_task()
        with TaskGroup("benchmarks", prefix_group_id=False, dag=self.dag) as benchmarks:
            benchmark_tasks = self._get_e2e_benchmarks().get_benchmarks()
            chain(*benchmark_tasks)

        if self.config.cleanup_on_success:
            cleanup_cluster = installer.get_cleanup_task()
            install_cluster >> benchmarks >> cleanup_cluster
        else:
            install_cluster >> benchmarks
Exemplo n.º 6
0
    def test_chain(self):
        dag = DAG(dag_id='test_chain', start_date=datetime.now())
        [op1, op2, op3, op4, op5, op6] = [
            DummyOperator(task_id='t{i}'.format(i=i), dag=dag)
            for i in range(1, 7)
        ]
        chain(op1, [op2, op3], [op4, op5], op6)

        self.assertCountEqual([op2, op3], op1.get_direct_relatives(upstream=False))
        self.assertEqual([op4], op2.get_direct_relatives(upstream=False))
        self.assertEqual([op5], op3.get_direct_relatives(upstream=False))
        self.assertCountEqual([op4, op5], op6.get_direct_relatives(upstream=True))
Exemplo n.º 7
0
    def build(self):
        installer = self._get_openshift_installer()
        install_cluster = installer.get_install_task()
        connect_to_platform = self._get_platform_connector().get_task()
        final_status=final_dag_status.get_task(self.dag)
        with TaskGroup("benchmarks", prefix_group_id=False, dag=self.dag) as benchmarks:
            benchmark_tasks = self._get_e2e_benchmarks().get_benchmarks()
            chain(*benchmark_tasks)

        if self.config.cleanup_on_success:
            cleanup_cluster = installer.get_cleanup_task()
            install_cluster >> connect_to_platform >> benchmarks >> cleanup_cluster >> final_status
        else:
            install_cluster >> connect_to_platform >> benchmarks
Exemplo n.º 8
0
    def build(self):       

        installer = self._get_openshift_installer()
        initialize_cluster = installer.initialize_cluster_task()
        connect_to_platform = self._get_platform_connector().get_task()

        with TaskGroup("utils", prefix_group_id=False, dag=self.dag) as utils:
            utils_tasks = self._get_scale_ci_diagnosis().get_utils()
            chain(*utils_tasks)

        with TaskGroup("benchmarks", prefix_group_id=False, dag=self.dag) as benchmarks:
            benchmark_tasks = self._get_e2e_benchmarks().get_benchmarks()
            chain(*benchmark_tasks)

        initialize_cluster >> connect_to_platform >> benchmarks >> utils
Exemplo n.º 9
0
    def build(self):
        installer = self._get_openshift_installer()
        install_cluster = installer.get_install_task()
        connect_to_platform = self._get_platform_connector().get_task()
        with TaskGroup("benchmarks", prefix_group_id=False,
                       dag=self.dag) as benchmarks:
            benchmark_tasks = self._get_e2e_benchmarks().get_benchmarks()
            chain(*benchmark_tasks)

        rosa_post_installation = self._get_rosa_postinstall_setup(
        )._get_rosa_postinstallation()

        if self.config.cleanup_on_success:
            cleanup_cluster = installer.get_cleanup_task()
            install_cluster >> rosa_post_installation >> connect_to_platform >> benchmarks >> cleanup_cluster
        else:
            install_cluster >> rosa_post_installation >> connect_to_platform >> benchmarks
Exemplo n.º 10
0
    # [END howto_operator_gcp_transfer_wait_operation]

    # [START howto_operator_gcp_transfer_cancel_operation]
    cancel_operation = CloudDataTransferServiceCancelOperationOperator(
        task_id="cancel_operation",
        operation_name="{{task_instance.xcom_pull("
        "'wait_for_second_operation_to_start', key='sensed_operations')[0]['name']}}",
    )
    # [END howto_operator_gcp_transfer_cancel_operation]

    # [START howto_operator_gcp_transfer_delete_job]
    delete_transfer_from_aws_job = CloudDataTransferServiceDeleteJobOperator(
        task_id="delete_transfer_from_aws_job",
        job_name=
        "{{task_instance.xcom_pull('create_transfer_job_from_aws')['name']}}",
        project_id=GCP_PROJECT_ID,
    )
    # [END howto_operator_gcp_transfer_delete_job]

    chain(
        create_transfer_job_from_aws,
        wait_for_operation_to_start,
        pause_operation,
        list_operations,
        get_operation,
        resume_operation,
        wait_for_operation_to_end,
        cancel_operation,
        delete_transfer_from_aws_job,
    )
Exemplo n.º 11
0
                "sourceFormat": "DATASTORE_BACKUP",
                "compression": "NONE",
                "csvOptions": {
                    "skipLeadingRows": 1
                },
            },
        },
    )
    # [END howto_operator_create_external_table_multiple_types]

    read_data_from_gcs_multiple_types = BigQueryInsertJobOperator(
        task_id="execute_query",
        configuration={
            "query": {
                "query":
                f"SELECT COUNT(*) FROM `{GCP_PROJECT_ID}.{DATASET_NAME}.firestore_data`",
                "useLegacySql": False,
            }
        },
    )

    chain(
        # Firestore
        export_database_to_gcs,
        # BigQuery
        create_dataset,
        create_external_table_multiple_types,
        read_data_from_gcs_multiple_types,
        delete_dataset,
    )
Exemplo n.º 12
0
        dag_id=DAG_ID,
        default_args=default_args,
        description=
        "An example DAG demonstrating lineage emission within an Airflow DAG.",
        schedule_interval=None,
        start_date=days_ago(1),
        catchup=False,
        dagrun_timeout=timedelta(minutes=5),
        tags=["datahub demo"],
) as dag:
    emit_lineage_task = DatahubEmitterOperator(
        task_id="emit_lineage",
        datahub_conn_id="datahub_rest",
        mces=[
            builder.make_lineage_mce(upstream_urns=[
                builder.make_dataset_urn("glue", "mydb.tableA"),
                builder.make_dataset_urn("glue", "mydb.tableB"),
            ],
                                     downstream_urn=builder.make_dataset_urn(
                                         "glue",
                                         "mydb.tableC",
                                     ))
        ],
    )
    get_airflow_cfg_operator = PythonOperator(
        task_id="get_airflow_cfg_task", python_callable=print_airflow_cfg)
    get_print_env_vars_operator = PythonOperator(
        task_id="get_print_env_vars_task", python_callable=print_env_vars)

chain(emit_lineage_task, get_airflow_cfg_operator, get_print_env_vars_operator)
Exemplo n.º 13
0
            'type': 'video',
            'fields': 'items/id/videoId',
        },
        google_api_response_via_xcom='video_ids_response',
        s3_destination_key=f'{S3_BUCKET_NAME}/youtube_search.json',
        s3_overwrite=True,
    )
    # [END howto_transfer_google_api_youtube_search_to_s3]

    task_transform_video_ids = transform_video_ids()

    # [START howto_transfer_google_api_youtube_list_to_s3]
    task_video_data_to_s3 = GoogleApiToS3Operator(
        task_id='video_data_to_s3',
        google_api_service_name='youtube',
        google_api_service_version='v3',
        google_api_endpoint_path='youtube.videos.list',
        google_api_endpoint_params={
            'part': YOUTUBE_VIDEO_PARTS,
            'maxResults': 50,
            'fields': YOUTUBE_VIDEO_FIELDS,
        },
        google_api_endpoint_params_via_xcom='video_ids',
        s3_destination_key=f'{S3_BUCKET_NAME}/youtube_videos.json',
        s3_overwrite=True,
    )
    # [END howto_transfer_google_api_youtube_list_to_s3]

    chain(task_video_ids_to_s3, task_transform_video_ids,
          task_video_data_to_s3)
Exemplo n.º 14
0
        zone=GCE_ZONE,
        resource_id=GCE_INSTANCE,
        body={
            'machineType':
            f'zones/{GCE_ZONE}/machineTypes/{GCE_SHORT_MACHINE_TYPE_NAME}'
        },
        task_id='gcp_compute_set_machine_type',
    )
    # [END howto_operator_gce_set_machine_type]
    # Duplicate set machine type for idempotence testing
    # [START howto_operator_gce_set_machine_type_no_project_id]
    gce_set_machine_type2 = ComputeEngineSetMachineTypeOperator(
        zone=GCE_ZONE,
        resource_id=GCE_INSTANCE,
        body={
            'machineType':
            f'zones/{GCE_ZONE}/machineTypes/{GCE_SHORT_MACHINE_TYPE_NAME}'
        },
        task_id='gcp_compute_set_machine_type2',
    )
    # [END howto_operator_gce_set_machine_type_no_project_id]

    chain(
        gce_instance_start,
        gce_instance_start2,
        gce_instance_stop,
        gce_instance_stop2,
        gce_set_machine_type,
        gce_set_machine_type2,
    )
Exemplo n.º 15
0
    )

    # [START howto_operator_emr_add_steps]
    step_adder = EmrAddStepsOperator(
        task_id='add_steps',
        job_flow_id=cluster_creator.output,
        steps=SPARK_STEPS,
    )
    # [END howto_operator_emr_add_steps]

    # [START howto_sensor_emr_step_sensor]
    step_checker = EmrStepSensor(
        task_id='watch_step',
        job_flow_id=cluster_creator.output,
        step_id="{{ task_instance.xcom_pull(task_ids='add_steps', key='return_value')[0] }}",
    )
    # [END howto_sensor_emr_step_sensor]

    # [START howto_operator_emr_terminate_job_flow]
    cluster_remover = EmrTerminateJobFlowOperator(
        task_id='remove_cluster',
        job_flow_id=cluster_creator.output,
    )
    # [END howto_operator_emr_terminate_job_flow]

    chain(
        step_adder,
        step_checker,
        cluster_remover,
    )
Exemplo n.º 16
0
    )
    # [END howto_operator_glue_crawler]

    # [START howto_sensor_glue_crawler]
    wait_for_crawl = GlueCrawlerSensor(task_id='wait_for_crawl', crawler_name=GLUE_CRAWLER_NAME)
    # [END howto_sensor_glue_crawler]

    # [START howto_operator_glue]
    job_name = 'example_glue_job'
    submit_glue_job = GlueJobOperator(
        task_id='submit_glue_job',
        job_name=job_name,
        wait_for_completion=False,
        script_location=f's3://{GLUE_EXAMPLE_S3_BUCKET}/etl_script.py',
        s3_bucket=GLUE_EXAMPLE_S3_BUCKET,
        iam_role_name=GLUE_CRAWLER_ROLE.split('/')[-1],
        create_job_kwargs={'GlueVersion': '3.0', 'NumberOfWorkers': 2, 'WorkerType': 'G.1X'},
    )
    # [END howto_operator_glue]

    # [START howto_sensor_glue]
    wait_for_job = GlueJobSensor(
        task_id='wait_for_job',
        job_name=job_name,
        # Job ID extracted from previous Glue Job Operator task
        run_id=submit_glue_job.output,
    )
    # [END howto_sensor_glue]

    chain(setup_upload_artifacts_to_s3, crawl_s3, wait_for_crawl, submit_glue_job, wait_for_job)
Exemplo n.º 17
0
SNS_TOPIC_ARN = getenv("SNS_TOPIC_ARN",
                       "arn:aws:sns:<region>:<account number>:MyTopic")
RDS_DB_IDENTIFIER = getenv("RDS_DB_IDENTIFIER", "database-identifier")

with DAG(
        dag_id='example_rds_event',
        schedule_interval=None,
        start_date=datetime(2021, 1, 1),
        tags=['example'],
        catchup=False,
) as dag:
    # [START howto_operator_rds_create_event_subscription]
    create_subscription = RdsCreateEventSubscriptionOperator(
        task_id='create_subscription',
        subscription_name=SUBSCRIPTION_NAME,
        sns_topic_arn=SNS_TOPIC_ARN,
        source_type='db-instance',
        source_ids=[RDS_DB_IDENTIFIER],
        event_categories=['availability'],
    )
    # [END howto_operator_rds_create_event_subscription]

    # [START howto_operator_rds_delete_event_subscription]
    delete_subscription = RdsDeleteEventSubscriptionOperator(
        task_id='delete_subscription',
        subscription_name=SUBSCRIPTION_NAME,
    )
    # [END howto_operator_rds_delete_event_subscription]

    chain(create_subscription, delete_subscription)
Exemplo n.º 18
0
    delete_task = DmsDeleteTaskOperator(
        task_id='delete_task',
        replication_task_arn=create_task.output,
        trigger_rule='all_done',
    )
    # [END howto_operator_dms_delete_task]

    delete_db_instance = RdsDeleteDbInstanceOperator(
        task_id='delete_db_instance',
        db_instance_identifier=RDS_INSTANCE_NAME,
        rds_kwargs={
            "SkipFinalSnapshot": True,
        },
        trigger_rule='all_done',
    )

    chain(
        create_db_instance,
        create_sample_table(),
        create_dms_assets(),
        create_task,
        start_task,
        describe_tasks,
        await_task_start,
        stop_task,
        await_task_stop,
        delete_task,
        delete_dms_assets(),
        delete_db_instance,
    )
Exemplo n.º 19
0
with DAG(
    dag_id='example_ec2',
    start_date=datetime(2021, 1, 1),
    tags=['example'],
    catchup=False,
) as dag:
    # [START howto_operator_ec2_start_instance]
    start_instance = EC2StartInstanceOperator(
        task_id="ec2_start_instance",
        instance_id=INSTANCE_ID,
    )
    # [END howto_operator_ec2_start_instance]

    # [START howto_sensor_ec2_instance_state]
    instance_state = EC2InstanceStateSensor(
        task_id="ec2_instance_state",
        instance_id=INSTANCE_ID,
        target_state="running",
    )
    # [END howto_sensor_ec2_instance_state]

    # [START howto_operator_ec2_stop_instance]
    stop_instance = EC2StopInstanceOperator(
        task_id="ec2_stop_instance",
        instance_id=INSTANCE_ID,
    )
    # [END howto_operator_ec2_stop_instance]

    chain(start_instance, instance_state, stop_instance)
Exemplo n.º 20
0
        source=SOURCE_NAME,
        flow_name=FLOW_NAME,
        source_field="LastModifiedDate",
        filter_date="3000-01-01",  # Future date, so no records to dump
    )
    # [END howto_operator_appflow_run_after]

    # [START howto_operator_appflow_shortcircuit]
    campaign_dump_short_circuit = AppflowRecordsShortCircuitOperator(
        task_id="campaign_dump_short_circuit",
        flow_name=FLOW_NAME,
        appflow_run_task_id=
        "campaign_dump_after",  # Should shortcircuit, no records expected
    )
    # [END howto_operator_appflow_shortcircuit]

    should_be_skipped = BashOperator(
        task_id="should_be_skipped",
        bash_command="echo 1",
    )

    chain(
        campaign_dump,
        campaign_dump_full,
        campaign_dump_daily,
        campaign_dump_before,
        campaign_dump_after,
        campaign_dump_short_circuit,
        should_be_skipped,
    )
Exemplo n.º 21
0
    },
}
# [END howto_operator_emr_eks_config]

with DAG(
        dag_id='example_emr_eks',
        start_date=datetime(2021, 1, 1),
        tags=['example'],
        catchup=False,
) as dag:
    # [START howto_operator_emr_container]
    job_starter = EmrContainerOperator(
        task_id="start_job",
        virtual_cluster_id=VIRTUAL_CLUSTER_ID,
        execution_role_arn=JOB_ROLE_ARN,
        release_label="emr-6.3.0-latest",
        job_driver=JOB_DRIVER_ARG,
        configuration_overrides=CONFIGURATION_OVERRIDES_ARG,
        name="pi.py",
        wait_for_completion=False,
    )
    # [END howto_operator_emr_container]

    # [START howto_sensor_emr_container]
    job_waiter = EmrContainerSensor(task_id="job_waiter",
                                    virtual_cluster_id=VIRTUAL_CLUSTER_ID,
                                    job_id=str(job_starter.output))
    # [END howto_sensor_emr_container]

    chain(job_starter, job_waiter)
Exemplo n.º 22
0
    update_queue = CloudTasksQueueUpdateOperator(
        task_queue=Queue(stackdriver_logging_config=dict(sampling_ratio=1)),
        location=LOCATION,
        queue_name=QUEUE_ID,
        update_mask={"paths": ["stackdriver_logging_config.sampling_ratio"]},
        task_id="update_queue",
    )

    list_queue = CloudTasksQueuesListOperator(location=LOCATION,
                                              task_id="list_queue")

    chain(
        create_queue,
        update_queue,
        pause_queue,
        resume_queue,
        purge_queue,
        get_queue,
        list_queue,
        delete_queue,
    )

    # Tasks operations
    create_task = CloudTasksTaskCreateOperator(
        location=LOCATION,
        queue_name=QUEUE_ID,
        task=TASK,
        task_name=TASK_NAME,
        retry=Retry(maximum=10.0),
        timeout=5,
        task_id="create_task_to_run",
    )
        configuration={
            "query": {
                "query": f"SELECT COUNT(*) FROM `{GCP_PROJECT_ID}.{DATASET_NAME}.{TABLE_NAME}`",
                "useLegacySql": False,
            }
        },
    )

    delete_bucket = GCSDeleteBucketOperator(
        task_id="delete_bucket",
        bucket_name=GCS_BUCKET,
    )

    delete_dataset = BigQueryDeleteDatasetOperator(
        task_id="delete_dataset",
        project_id=GCP_PROJECT_ID,
        dataset_id=DATASET_NAME,
        delete_contents=True,
    )

    chain(
        create_bucket,
        create_dataset,
        create_table,
        run_operator,
        load_csv,
        read_data_from_gcs_many_chunks,
        delete_bucket,
        delete_dataset,
    )
args = {"owner": "airflow", "start_date": START_DATE}

for dag_no in range(1, DAG_COUNT + 1):
    dag = DAG(
        dag_id=safe_dag_id("__".join([
            DAG_PREFIX,
            f"SHAPE={SHAPE.name.lower()}",
            f"DAGS_COUNT={dag_no}_of_{DAG_COUNT}",
            f"TASKS_COUNT=${TASKS_COUNT}",
            f"START_DATE=${START_DATE_ENV}",
            f"SCHEDULE_INTERVAL=${SCHEDULE_INTERVAL_ENV}",
        ])),
        is_paused_upon_creation=False,
        default_args=args,
        schedule_interval=SCHEDULE_INTERVAL,
    )

    tasks = [
        BashOperator(task_id="__".join(["tasks", f"{i}_of_{TASKS_COUNT}"]),
                     bash_command='echo test"',
                     dag=dag) for i in range(1, TASKS_COUNT + 1)
    ]
    if SHAPE == DagShape.NO_STRUCTURE:
        # Do nothing
        pass
    elif SHAPE == DagShape.LINEAR:
        chain(*tasks)

    globals()[f"dag_{dag_no}"] = dag
Exemplo n.º 25
0
                                     python_callable=load_users_data)

    load_tweets_data = PythonOperator(task_id="load_tweets_data",
                                      python_callable=load_tweets_data)

    load_mentions_data = PythonOperator(task_id="load_mentions_data",
                                        python_callable=load_mentions_data)

    load_hashtags_data = PythonOperator(task_id="load_hashtags_data",
                                        python_callable=load_hashtags_data)

    load_urls_data = PythonOperator(task_id="load_urls_data",
                                    python_callable=load_urls_data)

    load_medias_data = PythonOperator(task_id="load_medias_data",
                                      python_callable=load_medias_data)

    load_scores_data = PythonOperator(task_id="load_scores_data",
                                      python_callable=load_scores_data)

    chain(parse_tweets_data, score_users, [
        transform_users_data, transform_tweets_data, transform_entities_data,
        transform_scores_data
    ], [
        load_users_data, load_tweets_data,
        [
            load_mentions_data, load_hashtags_data, load_medias_data,
            load_urls_data
        ], load_scores_data
    ])
    # [START howto_operator_get_build_trigger]
    get_build_trigger = CloudBuildGetBuildTriggerOperator(
        task_id="get_build_trigger",
        project_id=GCP_PROJECT_ID,
        trigger_id=create_build_trigger.output['id'],
    )
    # [END howto_operator_get_build_trigger]

    # [START howto_operator_delete_build_trigger]
    delete_build_trigger = CloudBuildDeleteBuildTriggerOperator(
        task_id="delete_build_trigger",
        project_id=GCP_PROJECT_ID,
        trigger_id=create_build_trigger.output['id'],
    )
    # [END howto_operator_delete_build_trigger]

    # [START howto_operator_list_build_triggers]
    list_build_triggers = CloudBuildListBuildTriggersOperator(
        task_id="list_build_triggers", project_id=GCP_PROJECT_ID, location="global", page_size=5
    )
    # [END howto_operator_list_build_triggers]

    chain(
        create_build_trigger,
        run_build_trigger,
        update_build_trigger,
        get_build_trigger,
        delete_build_trigger,
        list_build_triggers,
    )
from airflow.utils import dates

args = {
    'owner': 'airflow',
}

dag = DAG(
    dag_id='example_short_circuit_operator',
    default_args=args,
    start_date=dates.days_ago(2),
    tags=['example'],
)

cond_true = ShortCircuitOperator(
    task_id='condition_is_True',
    python_callable=lambda: True,
    dag=dag,
)

cond_false = ShortCircuitOperator(
    task_id='condition_is_False',
    python_callable=lambda: False,
    dag=dag,
)

ds_true = [DummyOperator(task_id='true_' + str(i), dag=dag) for i in [1, 2]]
ds_false = [DummyOperator(task_id='false_' + str(i), dag=dag) for i in [1, 2]]

chain(cond_true, *ds_true)
chain(cond_false, *ds_false)
Exemplo n.º 28
0
    update_tag_template = BashOperator(task_id="update_tag_template",
                                       bash_command="echo update_tag_template")

    update_tag_template_field = BashOperator(
        task_id="update_tag_template_field",
        bash_command="echo update_tag_template_field")

    # Create
    create_tasks = [
        create_entry_group,
        create_entry_gcs,
        create_tag_template,
        create_tag_template_field,
        create_tag,
    ]
    chain(*create_tasks)

    create_entry_group >> delete_entry_group
    create_entry_group >> create_entry_group_result
    create_entry_group >> create_entry_group_result2

    create_entry_gcs >> delete_entry
    create_entry_gcs >> create_entry_gcs_result
    create_entry_gcs >> create_entry_gcs_result2

    create_tag_template >> delete_tag_template_field
    create_tag_template >> create_tag_template_result
    create_tag_template >> create_tag_template_result2

    create_tag_template_field >> delete_tag_template_field
    create_tag_template_field >> create_tag_template_field_result
Exemplo n.º 29
0
    # [END howto_transfer_dynamodb_to_s3]

    # [START howto_transfer_dynamodb_to_s3_segmented]
    # Segmenting allows the transfer to be parallelized into {segment} number of parallel tasks.
    backup_db_segment_1 = DynamoDBToS3Operator(
        task_id='backup-1',
        dynamodb_table_name=TABLE_NAME,
        s3_bucket_name=BUCKET_NAME,
        # Max output file size in bytes.  If the Table is too large, multiple files will be created.
        file_size=1000,
        dynamodb_scan_kwargs={
            "TotalSegments": 2,
            "Segment": 0,
        },
    )

    backup_db_segment_2 = DynamoDBToS3Operator(
        task_id="backup-2",
        dynamodb_table_name=TABLE_NAME,
        s3_bucket_name=BUCKET_NAME,
        # Max output file size in bytes.  If the Table is too large, multiple files will be created.
        file_size=1000,
        dynamodb_scan_kwargs={
            "TotalSegments": 2,
            "Segment": 1,
        },
    )
    # [END howto_transfer_dynamodb_to_s3_segmented]

    chain(backup_db, [backup_db_segment_1, backup_db_segment_2])
Exemplo n.º 30
0
    # [START howto_operator_s3_delete_objects]
    delete_objects = S3DeleteObjectsOperator(
        task_id="s3_delete_objects",
        bucket=BUCKET_NAME_2,
        keys=KEY_2,
    )
    # [END howto_operator_s3_delete_objects]

    # [START howto_operator_s3_delete_bucket]
    delete_bucket = S3DeleteBucketOperator(task_id='s3_delete_bucket',
                                           bucket_name=BUCKET_NAME,
                                           force_delete=True)
    # [END howto_operator_s3_delete_bucket]

    chain(
        create_bucket,
        put_tagging,
        get_tagging,
        delete_tagging,
        create_object,
        list_prefixes,
        list_keys,
        [sensor_one_key, sensor_two_keys, sensor_key_with_function],
        copy_object,
        transforms_file,
        sensor_keys_unchanged,
        delete_objects,
        delete_bucket,
    )