Exemplo n.º 1
0
          schedule_interval="0 * * * *"
          )


wait_on_data_model_creation = ExternalTaskSensor(
    task_id="wait_on_data_model_creation",
    dag=dag,
    external_dag_id="02_data_etl_dag",
    external_task_id="end_task",
    mode="reschedule",
    poke_interval=120
)


run_data_quality_checks = AWSRedshiftDataQualityOperator(
    task_id='run_data_quality_checks',
    dag=dag,
    conn_id=AWS_REDSHIFT_CONN_ID,
    checks=data_quality_checks
)


end_task = DummyOperator(
    task_id="end_task",
    dag=dag
    )

# set dependencies
wait_on_data_model_creation.set_downstream(run_data_quality_checks)
run_data_quality_checks.set_downstream(end_task)
Exemplo n.º 2
0
    cluster_type="redshift",
    time_zone=local_tz)

wait_on_tasks_completed = ExternalTaskSensor(
    task_id="wait_on_tasks_completed",
    dag=dag,
    external_dag_id="03_data_quality_check_dag",
    external_task_id="end_task",
    mode="reschedule",
    poke_interval=120)

terminate_emr_cluster = AWSTerminateClusterOperator(
    task_id="terminate_emr_cluster",
    dag=dag,
    conn_id=AWS_CONN_ID,
    cluster_creation_task=DAG_NAME + ".create_emr_cluster",
    cluster_type="emr")

terminate_redshift_cluster = AWSTerminateClusterOperator(
    task_id="terminate_redshift_cluster",
    dag=dag,
    conn_id=AWS_CONN_ID,
    cluster_creation_task=DAG_NAME + ".create_redshift_cluster",
    cluster_type="redshift")

# set dependencies
wait_on_tasks_completed.set_upstream(
    [create_emr_cluster, create_redshift_cluster])
wait_on_tasks_completed.set_downstream(
    [terminate_emr_cluster, terminate_redshift_cluster])