cluster_type="redshift", time_zone=local_tz) wait_on_tasks_completed = ExternalTaskSensor( task_id="wait_on_tasks_completed", dag=dag, external_dag_id="03_data_quality_check_dag", external_task_id="end_task", mode="reschedule", poke_interval=120) terminate_emr_cluster = AWSTerminateClusterOperator( task_id="terminate_emr_cluster", dag=dag, conn_id=AWS_CONN_ID, cluster_creation_task=DAG_NAME + ".create_emr_cluster", cluster_type="emr") terminate_redshift_cluster = AWSTerminateClusterOperator( task_id="terminate_redshift_cluster", dag=dag, conn_id=AWS_CONN_ID, cluster_creation_task=DAG_NAME + ".create_redshift_cluster", cluster_type="redshift") # set dependencies wait_on_tasks_completed.set_upstream( [create_emr_cluster, create_redshift_cluster]) wait_on_tasks_completed.set_downstream( [terminate_emr_cluster, terminate_redshift_cluster])
def set_dependencies(yaml_specs, tasks, latest_only=True, **kwargs): dependencies = get_dependencies(yaml_specs) external_dependencies = get_external_dependencies(yaml_specs) if latest_only: latest_only_operator = LatestOnlyOperator(task_id='latest_only', dag=kwargs["dag"]) #external_dependencies external_tasks = {} tasks_with_external_dependencies = [] for task in external_dependencies: task_id = task["task_id"] tasks_with_external_dependencies.append(task_id) external_dependencies = task["external_dependencies"] for external_dependency in external_dependencies: external_dag, external_task = list(external_dependency.items())[0] wait_for_whole_dag = external_task == r"all" task_name = "wait_for_DAG_" + external_dag if wait_for_whole_dag else "wait_for_" + external_task if task_name not in external_tasks.keys(): if wait_for_whole_dag: wait_for_task = ExternalTaskSensor( dag=kwargs["dag"], task_id=task_name, external_dag_id=external_dag, external_task_id=None, poke_interval=20, timeout=60, retries=25) external_tasks[task_name] = wait_for_task else: wait_for_task = ExternalTaskSensor( dag=kwargs["dag"], task_id=task_name, external_dag_id=external_dag, external_task_id=external_task, poke_interval=60, timeout=60, retries=25) external_tasks[task_name] = wait_for_task if latest_only: wait_for_task.set_upstream(latest_only_operator) tasks[task_id].set_upstream(external_tasks[task_name]) # local dependencies for task in dependencies: task_id = task["task_id"] spec_dependencies = task["dependencies"] valid_dependencies = [ spec_dependency for spec_dependency in spec_dependencies if spec_dependency in tasks.keys() and spec_dependency != task_id ] if len( valid_dependencies ) == 0 and latest_only and task_id not in tasks_with_external_dependencies: tasks[task_id].set_upstream(latest_only_operator) else: for spec_dependency in valid_dependencies: tasks[task_id].set_upstream(tasks[spec_dependency])