def test_bash_operator(self): op = BashOperator(task_id='test_bash_operator', bash_command="echo success", dag=self.dag) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_task_fail_duration(self): """If a task fails, the duration should be recorded in TaskFail""" op1 = BashOperator( task_id='pass_sleepy', bash_command='sleep 3', dag=self.dag) op2 = BashOperator( task_id='fail_sleepy', bash_command='sleep 5', execution_timeout=timedelta(seconds=3), retry_delay=timedelta(seconds=0), dag=self.dag) session = settings.Session() try: op1.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) except Exception: # pylint: disable=broad-except pass try: op2.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) except Exception: # pylint: disable=broad-except pass op1_fails = session.query(TaskFail).filter_by( task_id='pass_sleepy', dag_id=self.dag.dag_id, execution_date=DEFAULT_DATE).all() op2_fails = session.query(TaskFail).filter_by( task_id='fail_sleepy', dag_id=self.dag.dag_id, execution_date=DEFAULT_DATE).all() self.assertEqual(0, len(op1_fails)) self.assertEqual(1, len(op2_fails)) self.assertGreaterEqual(sum([f.duration for f in op2_fails]), 3)
def test_bash_operator_multi_byte_output(self): op = BashOperator( task_id='test_multi_byte_bash_operator', bash_command="echo \u2600", dag=self.dag, output_encoding='utf-8') op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_echo_env_variables(self): """ Test that env variables are exported correctly to the task bash environment. """ now = datetime.utcnow() now = now.replace(tzinfo=timezone.utc) dag = DAG( dag_id='bash_op_test', default_args={ 'owner': 'airflow', 'retries': 100, 'start_date': DEFAULT_DATE }, schedule_interval='@daily', dagrun_timeout=timedelta(minutes=60), ) dag.create_dagrun( run_type=DagRunType.MANUAL, execution_date=DEFAULT_DATE, start_date=now, state=State.RUNNING, external_trigger=False, ) with NamedTemporaryFile() as tmp_file: task = BashOperator( task_id='echo_env_vars', dag=dag, bash_command='echo $AIRFLOW_HOME>> {0};' 'echo $PYTHONPATH>> {0};' 'echo $AIRFLOW_CTX_DAG_ID >> {0};' 'echo $AIRFLOW_CTX_TASK_ID>> {0};' 'echo $AIRFLOW_CTX_EXECUTION_DATE>> {0};' 'echo $AIRFLOW_CTX_DAG_RUN_ID>> {0};'.format(tmp_file.name), ) with mock.patch.dict( 'os.environ', { 'AIRFLOW_HOME': 'MY_PATH_TO_AIRFLOW_HOME', 'PYTHONPATH': 'AWESOME_PYTHONPATH' }): task.run(DEFAULT_DATE, DEFAULT_DATE, ignore_first_depends_on_past=True, ignore_ti_state=True) with open(tmp_file.name) as file: output = ''.join(file.readlines()) self.assertIn('MY_PATH_TO_AIRFLOW_HOME', output) # exported in run-tests as part of PYTHONPATH self.assertIn('AWESOME_PYTHONPATH', output) self.assertIn('bash_op_test', output) self.assertIn('echo_env_vars', output) self.assertIn(DEFAULT_DATE.isoformat(), output) self.assertIn( DagRun.generate_run_id(DagRunType.MANUAL, DEFAULT_DATE), output)
def test_bash_operator(self): op = BashOperator(task_id='test_bash_operator', bash_command="echo success", dag=self.dag) self.dag.create_dagrun(run_type=DagRunType.MANUAL, state=State.RUNNING, execution_date=DEFAULT_DATE) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_bash_operator_multi_byte_output(self): op = BashOperator(task_id='test_multi_byte_bash_operator', bash_command="echo \u2600", dag=self.dag, output_encoding='utf-8') self.dag.create_dagrun(run_type=DagRunType.MANUAL, state=State.RUNNING, execution_date=DEFAULT_DATE) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_on_failure_callback(self): mock_failure_callback = MagicMock() op = BashOperator( task_id='check_on_failure_callback', bash_command="exit 1", dag=self.dag, on_failure_callback=mock_failure_callback, ) with pytest.raises(AirflowException): op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) mock_failure_callback.assert_called_once()
def test_on_failure_callback(self): # Annoying workaround for nonlocal not existing in python 2 data = {'called': False} def check_failure(context, test_case=self): # pylint: disable=unused-argument data['called'] = True error = context.get("exception") test_case.assertIsInstance(error, AirflowException) op = BashOperator( task_id='check_on_failure_callback', bash_command="exit 1", dag=self.dag, on_failure_callback=check_failure, ) with pytest.raises(AirflowException): op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) assert data['called']
def test_bash_operator_kill(self): import psutil sleep_time = "100%d" % os.getpid() op = BashOperator( task_id='test_bash_operator_kill', execution_timeout=timedelta(seconds=1), bash_command=f"/bin/bash -c 'sleep {sleep_time}'", dag=self.dag, ) with pytest.raises(AirflowTaskTimeout): op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) sleep(2) pid = -1 for proc in psutil.process_iter(): if proc.cmdline() == ['sleep', sleep_time]: pid = proc.pid if pid != -1: os.kill(pid, signal.SIGTERM) self.fail( "BashOperator's subprocess still running after stopping on timeout!" )
def test_external_task_sensor_fn_multiple_execution_dates(self): bash_command_code = """ {% set s=execution_date.time().second %} echo "second is {{ s }}" if [[ $(( {{ s }} % 60 )) == 1 ]] then exit 1 fi exit 0 """ dag_external_id = TEST_DAG_ID + '_external' dag_external = DAG( dag_external_id, default_args=self.args, schedule_interval=timedelta(seconds=1)) task_external_with_failure = BashOperator( task_id="task_external_with_failure", bash_command=bash_command_code, retries=0, dag=dag_external) task_external_without_failure = DummyOperator( task_id="task_external_without_failure", retries=0, dag=dag_external) task_external_without_failure.run( start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + timedelta(seconds=1), ignore_ti_state=True) session = settings.Session() TI = TaskInstance try: task_external_with_failure.run( start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + timedelta(seconds=1), ignore_ti_state=True) # The test_with_failure task is excepted to fail # once per minute (the run on the first second of # each minute). except Exception as e: # pylint: disable=broad-except failed_tis = session.query(TI).filter( TI.dag_id == dag_external_id, TI.state == State.FAILED, TI.execution_date == DEFAULT_DATE + timedelta(seconds=1)).all() if len(failed_tis) == 1 and \ failed_tis[0].task_id == 'task_external_with_failure': pass else: raise e dag_id = TEST_DAG_ID dag = DAG( dag_id, default_args=self.args, schedule_interval=timedelta(minutes=1)) task_without_failure = ExternalTaskSensor( task_id='task_without_failure', external_dag_id=dag_external_id, external_task_id='task_external_without_failure', execution_date_fn=lambda dt: [dt + timedelta(seconds=i) for i in range(2)], allowed_states=['success'], retries=0, timeout=1, poke_interval=1, dag=dag) task_with_failure = ExternalTaskSensor( task_id='task_with_failure', external_dag_id=dag_external_id, external_task_id='task_external_with_failure', execution_date_fn=lambda dt: [dt + timedelta(seconds=i) for i in range(2)], allowed_states=['success'], retries=0, timeout=1, poke_interval=1, dag=dag) task_without_failure.run( start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) with self.assertRaises(AirflowSensorTimeout): task_with_failure.run( start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)