def test_dagrun_success_when_all_skipped(self): """ Tests that a DAG run succeeds when all tasks are skipped """ dag = DAG(dag_id='test_dagrun_success_when_all_skipped', start_date=timezone.datetime(2017, 1, 1)) dag_task1 = ShortCircuitOperator(task_id='test_short_circuit_false', dag=dag, python_callable=lambda: False) dag_task2 = DummyOperator(task_id='test_state_skipped1', dag=dag) dag_task3 = DummyOperator(task_id='test_state_skipped2', dag=dag) dag_task1.set_downstream(dag_task2) dag_task2.set_downstream(dag_task3) initial_task_states = { 'test_short_circuit_false': State.SUCCESS, 'test_state_skipped1': State.SKIPPED, 'test_state_skipped2': State.SKIPPED, } dag_run = self.create_dag_run(dag=dag, state=State.RUNNING, task_states=initial_task_states) updated_dag_state = dag_run.update_state() self.assertEqual(State.SUCCESS, updated_dag_state)
def test_dagrun_success_when_all_skipped(self): """ Tests that a DAG run succeeds when all tasks are skipped """ dag = DAG( dag_id='test_dagrun_success_when_all_skipped', start_date=datetime.datetime(2017, 1, 1) ) dag_task1 = ShortCircuitOperator( task_id='test_short_circuit_false', dag=dag, python_callable=lambda: False) dag_task2 = DummyOperator( task_id='test_state_skipped1', dag=dag) dag_task3 = DummyOperator( task_id='test_state_skipped2', dag=dag) dag_task1.set_downstream(dag_task2) dag_task2.set_downstream(dag_task3) initial_task_states = { 'test_short_circuit_false': State.SUCCESS, 'test_state_skipped1': State.SKIPPED, 'test_state_skipped2': State.SKIPPED, } dag_run = self.create_dag_run(dag=dag, state=State.RUNNING, task_states=initial_task_states) updated_dag_state = dag_run.update_state() self.assertEqual(State.SUCCESS, updated_dag_state)
<tr><td><b> Task ID: </b></td><td>{{ task_instance.task_id }}</td></tr> <tr><td><b> Execution Date: </b></td><td>{{ task_instance.execution_date }}</td></tr> <tr><td><b> Start Date: </b></td><td>{{ task_instance.start_date }}</td></tr> <tr><td><b> End Date: </b></td><td>{{ task_instance.end_date }}</td></tr> <tr><td><b> Host Name: </b></td><td>{{ task_instance.hostname }}</td></tr> <tr><td><b> Unix Name: </b></td><td>{{ task_instance.unixname }}</td></tr> <tr><td><b> Job ID: </b></td><td>{{ task_instance.job_id }}</td></tr> <tr><td><b> Queued Date Time: </b></td><td>{{ task_instance.queued_dttm }}</td></tr> <tr><td><b> Log URL: </b></td><td><a href="{{ task_instance.log_url }}">{{ task_instance.log_url }}</a></td></tr> </table> <h2>Processes Killed</h2> <ul> {% for process_killed in task_instance.xcom_pull(task_ids='kill_halted_tasks', key='kill_halted_tasks.processes_to_kill') %} <li>Process {{loop.index}}</li> <ul> {% for key, value in process_killed.iteritems() %} <li>{{ key }}: {{ value }}</li> {% endfor %} </ul> {% endfor %} </ul> </body> </html> """, dag=dag) kill_halted_tasks.set_downstream(email_or_not_branch) email_or_not_branch.set_downstream(send_processes_killed_email)
# -*- coding: utf-8 -*- # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from datetime import datetime from airflow.models import DAG from airflow.operators.python_operator import ShortCircuitOperator from airflow.operators.dummy_operator import DummyOperator # DAG that has its short circuit op fail and skip multiple downstream tasks dag = DAG(dag_id='test_dagrun_short_circuit_false', start_date=datetime(2017, 1, 1)) dag_task1 = ShortCircuitOperator(task_id='test_short_circuit_false', dag=dag, python_callable=lambda: False) dag_task2 = DummyOperator(task_id='test_state_skipped1', dag=dag) dag_task3 = DummyOperator(task_id='test_state_skipped2', dag=dag) dag_task1.set_downstream(dag_task2) dag_task2.set_downstream(dag_task3)
def docker_move_subdag(host_top_dir, input_path, output_path): host_path = f"{host_top_dir}/{input_path}" with DAG("docker_backup_db", default_args=default_args, schedule_interval=timedelta(minutes=10)) as dag: locate_file_cmd = """ sleep 10 find {{params.source_location}} -type f -printf "%f\n" | head -1 """ t_view = BashOperator(task_id="view_file", bash_command=locate_file_cmd, xcom_push=True, params={"source_location": host_path}) def is_data_available(*args, **kwargs): ti = kwargs["ti"] data = ti.xcom_pull(key=None, task_ids="view_file") return data is not None t_is_data_available = ShortCircuitOperator( task_id="check_if_data_available", python_callable=is_data_available) t_move = DockerOperator( api_version="auto", docker_url= "tcp://socat:2375", # replace it with swarm/docker endpoint image="centos:latest", network_mode="bridge", volumes=[ f"{host_path}:{input_path}", f"{host_top_dir}/{input_path}:{output_path}", ], command=[ "/bin/bash", "-c", "/bin/sleep 30; " "/bin/mv {{params.source_location}}/{{ ti.xcom_pull('view_file') }} {{params.target_location}};" "/bin/echo '{{params.target_location}}/{{ ti.xcom_pull('view_file') }}';", ], task_id="move_data", xcom_push=True, params={ "source_location": f"{input_path}", "target_location": f"{output_path}" }, ) print_templated_cmd = """ cat {{ ti.xcom_pull('move_data') }} """ t_print = DockerOperator( api_version="auto", docker_url="tcp://socat:2375", image="centos:latest", volumes=[f"{host_top_dir}/{output_path}:{output_path}"], command=print_templated_cmd, task_id="print", ) t_view.set_downstream(t_is_data_available) t_is_data_available.set_downstream(t_move) t_move.set_downstream(t_print)
end.set_upstream(ssh_tasks) # if no hive scripts generrated short circuit step in the begining of main dag return ssh_dag dag = DAG( 's3_convert_json_to_parquet_emr_ssh', default_args=defautlt_args, dagrun_timeout=timedelta(hours=1), schedule_interval='0 3 * * *' ) step_entities_partitions = ShortCircuitOperator( task_id='step_entities_partitions', python_callable=gen_hive_scripts, provide_context=True, dag=dag) step_ssh_subdag = SubDagOperator( task_id='step_jobs_submit', subdag=get_sub_ssh_cmds_dag(dag, 'step_jobs_submit',defautlt_args), default_args=defautlt_args, dag=dag) step_end = DummyOperator( task_id='ssh_end', dag=dag) step_entities_partitions.set_downstream(step_ssh_subdag) step_ssh_subdag.set_downstream(step_end)
<table> <tr><td><b> Task ID: </b></td><td>{{ task_instance.task_id }}</td></tr> <tr><td><b> Execution Date: </b></td><td>{{ task_instance.execution_date }}</td></tr> <tr><td><b> Start Date: </b></td><td>{{ task_instance.start_date }}</td></tr> <tr><td><b> End Date: </b></td><td>{{ task_instance.end_date }}</td></tr> <tr><td><b> Host Name: </b></td><td>{{ task_instance.hostname }}</td></tr> <tr><td><b> Unix Name: </b></td><td>{{ task_instance.unixname }}</td></tr> <tr><td><b> Job ID: </b></td><td>{{ task_instance.job_id }}</td></tr> <tr><td><b> Queued Date Time: </b></td><td>{{ task_instance.queued_dttm }}</td></tr> <tr><td><b> Log URL: </b></td><td><a href="{{ task_instance.log_url }}">{{ task_instance.log_url }}</a></td></tr> </table> <h2>Processes Killed</h2> <ul> {% for process_killed in task_instance.xcom_pull(task_ids='kill_halted_tasks', key='kill_halted_tasks.processes_to_kill') %} <li>Process {{loop.index}}</li> <ul> {% for key, value in process_killed.iteritems() %} <li>{{ key }}: {{ value }}</li> {% endfor %} </ul> {% endfor %} </ul> </body> </html> """, dag=DAG_OBJ) KILL_HALTED_TASKS_OPR.set_downstream(EMAIL_OR_NOT_BRANCH_OPR) EMAIL_OR_NOT_BRANCH_OPR.set_downstream(SEND_PROCESSES_KILLED_EMAIL_OPR)