def test_fractional_seconds(self): """ Tests if fractional seconds are stored in the database """ dag = DAG(TEST_DAG_ID + 'test_fractional_seconds') dag.schedule_interval = '@once' dag.add_task(BaseOperator( task_id="faketastic", owner='Also fake', start_date=datetime(2015, 1, 2, 0, 0))) start_date = timezone.utcnow() run = dag.create_dagrun( run_id='test_' + start_date.isoformat(), execution_date=start_date, start_date=start_date, state=State.RUNNING, external_trigger=False ) run.refresh_from_db() self.assertEqual(start_date, run.execution_date, "dag run execution_date loses precision") self.assertEqual(start_date, run.start_date, "dag run start_date loses precision ")
def test_schedule_dag_fake_scheduled_previous(self): """ Test scheduling a dag where there is a prior DagRun which has the same run_id as the next run should have """ delta = timedelta(hours=1) dag = DAG(self.TEST_SCHEDULE_DAG_FAKE_SCHEDULED_PREVIOUS_DAG_ID, schedule_interval=delta, start_date=DEFAULT_DATE) dag.add_task(BaseOperator( task_id="faketastic", owner='Also fake', start_date=DEFAULT_DATE)) dag_file_processor = DagFileProcessor(dag_ids=[], log=mock.MagicMock()) dag.create_dagrun(run_id=DagRun.id_for_date(DEFAULT_DATE), execution_date=DEFAULT_DATE, state=State.SUCCESS, external_trigger=True) dag_run = dag_file_processor.create_dag_run(dag) self.assertIsNotNone(dag_run) self.assertEqual(dag.dag_id, dag_run.dag_id) self.assertIsNotNone(dag_run.run_id) self.assertNotEqual('', dag_run.run_id) self.assertEqual( DEFAULT_DATE + delta, dag_run.execution_date, msg='dag_run.execution_date did not match expectation: {0}' .format(dag_run.execution_date) ) self.assertEqual(State.RUNNING, dag_run.state) self.assertFalse(dag_run.external_trigger)
def test_externally_triggered_dagrun(self): TI = TaskInstance # Create the dagrun between two "scheduled" execution dates of the DAG execution_date = DEFAULT_DATE + timedelta(days=2) execution_ds = execution_date.strftime('%Y-%m-%d') execution_ds_nodash = execution_ds.replace('-', '') dag = DAG( TEST_DAG_ID, default_args=self.args, schedule_interval=timedelta(weeks=1), start_date=DEFAULT_DATE) task = DummyOperator(task_id='test_externally_triggered_dag_context', dag=dag) dag.create_dagrun(run_id=DagRun.id_for_date(execution_date), execution_date=execution_date, state=State.RUNNING, external_trigger=True) task.run( start_date=execution_date, end_date=execution_date) ti = TI(task=task, execution_date=execution_date) context = ti.get_template_context() # next_ds/prev_ds should be the execution date for manually triggered runs self.assertEqual(context['next_ds'], execution_ds) self.assertEqual(context['next_ds_nodash'], execution_ds_nodash) self.assertEqual(context['prev_ds'], execution_ds) self.assertEqual(context['prev_ds_nodash'], execution_ds_nodash)
class BashOperatorTestCase(unittest.TestCase): def test_echo_env_variables(self): """ Test that env variables are exported correctly to the task bash environment. """ now = datetime.utcnow() now = now.replace(tzinfo=timezone.utc) self.dag = DAG(dag_id='bash_op_test', default_args={ 'owner': 'airflow', 'retries': 100, 'start_date': DEFAULT_DATE }, schedule_interval='@daily', dagrun_timeout=timedelta(minutes=60)) self.dag.create_dagrun( run_id='manual__' + DEFAULT_DATE.isoformat(), execution_date=DEFAULT_DATE, start_date=now, state=State.RUNNING, external_trigger=False, ) import tempfile with tempfile.NamedTemporaryFile() as f: fname = f.name t = BashOperator( task_id='echo_env_vars', dag=self.dag, bash_command='echo $AIRFLOW_HOME>> {0};' 'echo $PYTHONPATH>> {0};' 'echo $AIRFLOW_CTX_DAG_ID >> {0};' 'echo $AIRFLOW_CTX_TASK_ID>> {0};' 'echo $AIRFLOW_CTX_EXECUTION_DATE>> {0};' 'echo $AIRFLOW_CTX_DAG_RUN_ID>> {0};'.format(fname)) original_AIRFLOW_HOME = os.environ['AIRFLOW_HOME'] os.environ['AIRFLOW_HOME'] = 'MY_PATH_TO_AIRFLOW_HOME' t.run(DEFAULT_DATE, DEFAULT_DATE, ignore_first_depends_on_past=True, ignore_ti_state=True) with open(fname, 'r') as fr: output = ''.join(fr.readlines()) self.assertIn('MY_PATH_TO_AIRFLOW_HOME', output) # exported in run_unit_tests.sh as part of PYTHONPATH self.assertIn('tests/test_utils', output) self.assertIn('bash_op_test', output) self.assertIn('echo_env_vars', output) self.assertIn(DEFAULT_DATE.isoformat(), output) self.assertIn('manual__' + DEFAULT_DATE.isoformat(), output) os.environ['AIRFLOW_HOME'] = original_AIRFLOW_HOME
class BashOperatorTestCase(unittest.TestCase): def test_echo_env_variables(self): """ Test that env variables are exported correctly to the task bash environment. """ now = datetime.utcnow() now = now.replace(tzinfo=timezone.utc) self.dag = DAG( dag_id='bash_op_test', default_args={ 'owner': 'airflow', 'retries': 100, 'start_date': DEFAULT_DATE }, schedule_interval='@daily', dagrun_timeout=timedelta(minutes=60)) self.dag.create_dagrun( run_id='manual__' + DEFAULT_DATE.isoformat(), execution_date=DEFAULT_DATE, start_date=now, state=State.RUNNING, external_trigger=False, ) import tempfile with tempfile.NamedTemporaryFile() as f: fname = f.name t = BashOperator( task_id='echo_env_vars', dag=self.dag, bash_command='echo $AIRFLOW_HOME>> {0};' 'echo $PYTHONPATH>> {0};' 'echo $AIRFLOW_CTX_DAG_ID >> {0};' 'echo $AIRFLOW_CTX_TASK_ID>> {0};' 'echo $AIRFLOW_CTX_EXECUTION_DATE>> {0};' 'echo $AIRFLOW_CTX_DAG_RUN_ID>> {0};'.format(fname) ) original_AIRFLOW_HOME = os.environ['AIRFLOW_HOME'] os.environ['AIRFLOW_HOME'] = 'MY_PATH_TO_AIRFLOW_HOME' t.run(DEFAULT_DATE, DEFAULT_DATE, ignore_first_depends_on_past=True, ignore_ti_state=True) with open(fname, 'r') as fr: output = ''.join(fr.readlines()) self.assertIn('MY_PATH_TO_AIRFLOW_HOME', output) # exported in run_unit_tests.sh as part of PYTHONPATH self.assertIn('tests/test_utils', output) self.assertIn('bash_op_test', output) self.assertIn('echo_env_vars', output) self.assertIn(DEFAULT_DATE.isoformat(), output) self.assertIn('manual__' + DEFAULT_DATE.isoformat(), output) os.environ['AIRFLOW_HOME'] = original_AIRFLOW_HOME
def test_echo_env_variables(self): """ Test that env variables are exported correctly to the task bash environment. """ now = datetime.utcnow() now = now.replace(tzinfo=timezone.utc) dag = DAG(dag_id='bash_op_test', default_args={ 'owner': 'airflow', 'retries': 100, 'start_date': DEFAULT_DATE }, schedule_interval='@daily', dagrun_timeout=timedelta(minutes=60)) dag.create_dagrun( run_id='manual__' + DEFAULT_DATE.isoformat(), execution_date=DEFAULT_DATE, start_date=now, state=State.RUNNING, external_trigger=False, ) with NamedTemporaryFile() as tmp_file: task = BashOperator(task_id='echo_env_vars', dag=dag, bash_command='echo $AIRFLOW_HOME>> {0};' 'echo $PYTHONPATH>> {0};' 'echo $AIRFLOW_CTX_DAG_ID >> {0};' 'echo $AIRFLOW_CTX_TASK_ID>> {0};' 'echo $AIRFLOW_CTX_EXECUTION_DATE>> {0};' 'echo $AIRFLOW_CTX_DAG_RUN_ID>> {0};'.format( tmp_file.name)) with unittest.mock.patch.dict( 'os.environ', { 'AIRFLOW_HOME': 'MY_PATH_TO_AIRFLOW_HOME', 'PYTHONPATH': 'AWESOME_PYTHONPATH' }): task.run(DEFAULT_DATE, DEFAULT_DATE, ignore_first_depends_on_past=True, ignore_ti_state=True) with open(tmp_file.name, 'r') as file: output = ''.join(file.readlines()) self.assertIn('MY_PATH_TO_AIRFLOW_HOME', output) # exported in run-tests as part of PYTHONPATH self.assertIn('AWESOME_PYTHONPATH', output) self.assertIn('bash_op_test', output) self.assertIn('echo_env_vars', output) self.assertIn(DEFAULT_DATE.isoformat(), output) self.assertIn('manual__' + DEFAULT_DATE.isoformat(), output)
def _create_dagruns(dag: DAG, execution_dates: List[datetime], state: TaskInstanceState, run_type: DagRunType) -> List[DagRun]: """ Infers from the dates which dag runs need to be created and does so. :param dag: the dag to create dag runs for :param execution_dates: list of execution dates to evaluate :param state: the state to set the dag run to :param run_type: The prefix will be used to construct dag run id: {run_id_prefix}__{execution_date} :return: newly created and existing dag runs for the execution dates supplied """ # find out if we need to create any dag runs dag_runs = DagRun.find(dag_id=dag.dag_id, execution_date=execution_dates) dates_to_create = list( set(execution_dates) - {dag_run.execution_date for dag_run in dag_runs}) for date in dates_to_create: dag_run = dag.create_dagrun( execution_date=date, start_date=timezone.utcnow(), external_trigger=False, state=state, run_type=run_type, ) dag_runs.append(dag_run) return dag_runs
class BranchOperatorTest(unittest.TestCase): def setUp(self): self.dag = DAG('branch_operator_test', default_args={ 'owner': 'airflow', 'start_date': DEFAULT_DATE}, schedule_interval=INTERVAL) self.branch_op = BranchPythonOperator(task_id='make_choice', dag=self.dag, python_callable=lambda: 'branch_1') self.branch_1 = DummyOperator(task_id='branch_1', dag=self.dag) self.branch_1.set_upstream(self.branch_op) self.branch_2 = DummyOperator(task_id='branch_2', dag=self.dag) self.branch_2.set_upstream(self.branch_op) self.dag.clear() def test_without_dag_run(self): """This checks the defensive against non existent tasks in a dag run""" self.branch_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) session = Session() tis = session.query(TI).filter( TI.dag_id == self.dag.dag_id, TI.execution_date == DEFAULT_DATE ) session.close() for ti in tis: if ti.task_id == 'make_choice': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'branch_1': # should exist with state None self.assertEquals(ti.state, State.NONE) elif ti.task_id == 'branch_2': self.assertEquals(ti.state, State.SKIPPED) else: raise def test_with_dag_run(self): dr = self.dag.create_dagrun( run_id="manual__", start_date=datetime.datetime.now(), execution_date=DEFAULT_DATE, state=State.RUNNING ) self.branch_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) tis = dr.get_task_instances() for ti in tis: if ti.task_id == 'make_choice': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'branch_1': self.assertEquals(ti.state, State.NONE) elif ti.task_id == 'branch_2': self.assertEquals(ti.state, State.SKIPPED) else: raise
def test_with_dag_run(self): value = False dag = DAG('shortcircuit_operator_test_with_dag_run', default_args={ 'owner': 'airflow', 'start_date': DEFAULT_DATE }, schedule_interval=INTERVAL) short_op = ShortCircuitOperator(task_id='make_choice', dag=dag, python_callable=lambda: value) branch_1 = DummyOperator(task_id='branch_1', dag=dag) branch_1.set_upstream(short_op) branch_2 = DummyOperator(task_id='branch_2', dag=dag) branch_2.set_upstream(branch_1) upstream = DummyOperator(task_id='upstream', dag=dag) upstream.set_downstream(short_op) dag.clear() logging.error("Tasks {}".format(dag.tasks)) dr = dag.create_dagrun( run_id="manual__", start_date=datetime.datetime.now(), execution_date=DEFAULT_DATE, state=State.RUNNING ) upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) tis = dr.get_task_instances() self.assertEqual(len(tis), 4) for ti in tis: if ti.task_id == 'make_choice': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'upstream': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2': self.assertEquals(ti.state, State.SKIPPED) else: raise value = True dag.clear() dr.verify_integrity() upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) tis = dr.get_task_instances() self.assertEqual(len(tis), 4) for ti in tis: if ti.task_id == 'make_choice': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'upstream': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2': self.assertEquals(ti.state, State.NONE) else: raise
def test_external_dag_sensor(self): other_dag = DAG('other_dag', default_args=self.args, end_date=DEFAULT_DATE, schedule_interval='@once') other_dag.create_dagrun(run_id='test', start_date=DEFAULT_DATE, execution_date=DEFAULT_DATE, state=State.SUCCESS) t = ExternalTaskSensor(task_id='test_external_dag_sensor_check', external_dag_id='other_dag', external_task_id=None, dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_external_dag_sensor(self): other_dag = DAG( 'other_dag', default_args=self.args, end_date=DEFAULT_DATE, schedule_interval='@once') other_dag.create_dagrun( run_id='test', start_date=DEFAULT_DATE, execution_date=DEFAULT_DATE, state=State.SUCCESS) t = ExternalTaskSensor( task_id='test_external_dag_sensor_check', external_dag_id='other_dag', external_task_id=None, dag=self.dag ) t.run( start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True )
class ShortCircuitOperatorTest(unittest.TestCase): def setUp(self): self.dag = DAG('shortcircuit_operator_test', default_args={ 'owner': 'airflow', 'start_date': DEFAULT_DATE}, schedule_interval=INTERVAL) self.short_op = ShortCircuitOperator(task_id='make_choice', dag=self.dag, python_callable=lambda: self.value) self.branch_1 = DummyOperator(task_id='branch_1', dag=self.dag) self.branch_1.set_upstream(self.short_op) self.branch_2 = DummyOperator(task_id='branch_2', dag=self.dag) self.branch_2.set_upstream(self.branch_1) self.upstream = DummyOperator(task_id='upstream', dag=self.dag) self.upstream.set_downstream(self.short_op) self.dag.clear() self.value = True def test_without_dag_run(self): """This checks the defensive against non existent tasks in a dag run""" self.value = False self.short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) session = Session() tis = session.query(TI).filter( TI.dag_id == self.dag.dag_id, TI.execution_date == DEFAULT_DATE ) for ti in tis: if ti.task_id == 'make_choice': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'upstream': # should not exist raise elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2': self.assertEquals(ti.state, State.SKIPPED) else: raise self.value = True self.dag.clear() self.short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) for ti in tis: if ti.task_id == 'make_choice': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'upstream': # should not exist raise elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2': self.assertEquals(ti.state, State.NONE) else: raise session.close() def test_with_dag_run(self): self.value = False logging.error("Tasks {}".format(self.dag.tasks)) dr = self.dag.create_dagrun( run_id="manual__", start_date=datetime.datetime.now(), execution_date=DEFAULT_DATE, state=State.RUNNING ) self.upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) self.short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) tis = dr.get_task_instances() self.assertEqual(len(tis), 4) for ti in tis: if ti.task_id == 'make_choice': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'upstream': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2': self.assertEquals(ti.state, State.SKIPPED) else: raise self.value = True self.dag.clear() dr.verify_integrity() self.upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) self.short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) tis = dr.get_task_instances() self.assertEqual(len(tis), 4) for ti in tis: if ti.task_id == 'make_choice': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'upstream': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2': self.assertEquals(ti.state, State.NONE) else: raise
class TestBashOperator(unittest.TestCase): def test_echo_env_variables(self): """ Test that env variables are exported correctly to the task bash environment. """ now = datetime.utcnow() now = now.replace(tzinfo=timezone.utc) self.dag = DAG(dag_id='bash_op_test', default_args={ 'owner': 'airflow', 'retries': 100, 'start_date': DEFAULT_DATE }, schedule_interval='@daily', dagrun_timeout=timedelta(minutes=60)) self.dag.create_dagrun( run_id='manual__' + DEFAULT_DATE.isoformat(), execution_date=DEFAULT_DATE, start_date=now, state=State.RUNNING, external_trigger=False, ) with NamedTemporaryFile() as tmp_file: task = BashOperator(task_id='echo_env_vars', dag=self.dag, bash_command='echo $AIRFLOW_HOME>> {0};' 'echo $PYTHONPATH>> {0};' 'echo $AIRFLOW_CTX_DAG_ID >> {0};' 'echo $AIRFLOW_CTX_TASK_ID>> {0};' 'echo $AIRFLOW_CTX_EXECUTION_DATE>> {0};' 'echo $AIRFLOW_CTX_DAG_RUN_ID>> {0};'.format( tmp_file.name)) with unittest.mock.patch.dict( 'os.environ', { 'AIRFLOW_HOME': 'MY_PATH_TO_AIRFLOW_HOME', 'PYTHONPATH': 'AWESOME_PYTHONPATH' }): task.run(DEFAULT_DATE, DEFAULT_DATE, ignore_first_depends_on_past=True, ignore_ti_state=True) with open(tmp_file.name, 'r') as file: output = ''.join(file.readlines()) self.assertIn('MY_PATH_TO_AIRFLOW_HOME', output) # exported in run-tests as part of PYTHONPATH self.assertIn('AWESOME_PYTHONPATH', output) self.assertIn('bash_op_test', output) self.assertIn('echo_env_vars', output) self.assertIn(DEFAULT_DATE.isoformat(), output) self.assertIn('manual__' + DEFAULT_DATE.isoformat(), output) def test_return_value(self): bash_operator = BashOperator(bash_command='echo "stdout"', task_id='test_return_value', dag=None) return_value = bash_operator.execute(context={}) self.assertEqual(return_value, 'stdout') def test_raise_exception_on_non_zero_exit_code(self): bash_operator = BashOperator(bash_command='exit 42', task_id='test_return_value', dag=None) with self.assertRaisesRegex( AirflowException, "Bash command failed\\. The command returned a non-zero exit code\\." ): bash_operator.execute(context={}) def test_task_retries(self): bash_operator = BashOperator(bash_command='echo "stdout"', task_id='test_task_retries', retries=2, dag=None) self.assertEqual(bash_operator.retries, 2) def test_default_retries(self): bash_operator = BashOperator(bash_command='echo "stdout"', task_id='test_default_retries', dag=None) self.assertEqual(bash_operator.retries, 0) @mock.patch.dict('os.environ', clear=True) @mock.patch( "airflow.operators.bash_operator.TemporaryDirectory", **{ # type: ignore 'return_value.__enter__.return_value': '/tmp/airflowtmpcatcat' }) @mock.patch( "airflow.operators.bash_operator.Popen", **{ # type: ignore 'return_value.stdout.readline.side_effect': [b'BAR', b'BAZ'], 'return_value.returncode': 0 }) def test_should_exec_subprocess(self, mock_popen, mock_temporary_directory): bash_operator = BashOperator(bash_command='echo "stdout"', task_id='test_return_value', dag=None) bash_operator.execute({}) mock_popen.assert_called_once_with(['bash', '-c', 'echo "stdout"'], cwd='/tmp/airflowtmpcatcat', env={}, preexec_fn=mock.ANY, stderr=STDOUT, stdout=PIPE)
class BaseSensorTest(unittest.TestCase): def setUp(self): configuration.load_test_config() args = { 'owner': 'airflow', 'start_date': DEFAULT_DATE } self.dag = DAG(TEST_DAG_ID, default_args=args) session = settings.Session() session.query(TaskReschedule).delete() session.query(DagRun).delete() session.query(TaskInstance).delete() session.commit() def _make_dag_run(self): return self.dag.create_dagrun( run_id='manual__', start_date=timezone.utcnow(), execution_date=DEFAULT_DATE, state=State.RUNNING ) def _make_sensor(self, return_value, **kwargs): poke_interval = 'poke_interval' timeout = 'timeout' if poke_interval not in kwargs: kwargs[poke_interval] = 0 if timeout not in kwargs: kwargs[timeout] = 0 sensor = DummySensor( task_id=SENSOR_OP, return_value=return_value, dag=self.dag, **kwargs ) dummy_op = DummyOperator( task_id=DUMMY_OP, dag=self.dag ) dummy_op.set_upstream(sensor) return sensor @classmethod def _run(cls, task): task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) def test_ok(self): sensor = self._make_sensor(True) dr = self._make_dag_run() self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.SUCCESS) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) def test_fail(self): sensor = self._make_sensor(False) dr = self._make_dag_run() with self.assertRaises(AirflowSensorTimeout): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.FAILED) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) def test_soft_fail(self): sensor = self._make_sensor(False, soft_fail=True) dr = self._make_dag_run() self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: self.assertEquals(ti.state, State.SKIPPED) def test_soft_fail_with_retries(self): sensor = self._make_sensor( return_value=False, soft_fail=True, retries=1, retry_delay=timedelta(milliseconds=1)) dr = self._make_dag_run() # first run fails and task instance is marked up to retry with self.assertRaises(AirflowSensorTimeout): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.UP_FOR_RETRY) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) sleep(0.001) # after retry DAG run is skipped self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: self.assertEquals(ti.state, State.SKIPPED) def test_ok_with_reschedule(self): sensor = self._make_sensor( return_value=None, poke_interval=10, timeout=25, mode='reschedule') sensor.poke = Mock(side_effect=[False, False, True]) dr = self._make_dag_run() # first poke returns False and task is re-scheduled date1 = timezone.utcnow() with freeze_time(date1): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: # verify task is re-scheduled, i.e. state set to NONE self.assertEquals(ti.state, State.NONE) # verify one row in task_reschedule table task_reschedules = TaskReschedule.find_for_task_instance(ti) self.assertEquals(len(task_reschedules), 1) self.assertEquals(task_reschedules[0].start_date, date1) self.assertEquals(task_reschedules[0].reschedule_date, date1 + timedelta(seconds=sensor.poke_interval)) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) # second poke returns False and task is re-scheduled date2 = date1 + timedelta(seconds=sensor.poke_interval) with freeze_time(date2): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: # verify task is re-scheduled, i.e. state set to NONE self.assertEquals(ti.state, State.NONE) # verify two rows in task_reschedule table task_reschedules = TaskReschedule.find_for_task_instance(ti) self.assertEquals(len(task_reschedules), 2) self.assertEquals(task_reschedules[1].start_date, date2) self.assertEquals(task_reschedules[1].reschedule_date, date2 + timedelta(seconds=sensor.poke_interval)) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) # third poke returns True and task succeeds date3 = date2 + timedelta(seconds=sensor.poke_interval) with freeze_time(date3): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.SUCCESS) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) def test_fail_with_reschedule(self): sensor = self._make_sensor( return_value=False, poke_interval=10, timeout=5, mode='reschedule') dr = self._make_dag_run() # first poke returns False and task is re-scheduled date1 = timezone.utcnow() with freeze_time(date1): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.NONE) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) # second poke returns False, timeout occurs date2 = date1 + timedelta(seconds=sensor.poke_interval) with freeze_time(date2): with self.assertRaises(AirflowSensorTimeout): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.FAILED) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) def test_soft_fail_with_reschedule(self): sensor = self._make_sensor( return_value=False, poke_interval=10, timeout=5, soft_fail=True, mode='reschedule') dr = self._make_dag_run() # first poke returns False and task is re-scheduled date1 = timezone.utcnow() with freeze_time(date1): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.NONE) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) # second poke returns False, timeout occurs date2 = date1 + timedelta(seconds=sensor.poke_interval) with freeze_time(date2): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: self.assertEquals(ti.state, State.SKIPPED) def test_ok_with_reschedule_and_retry(self): sensor = self._make_sensor( return_value=None, poke_interval=10, timeout=5, retries=1, retry_delay=timedelta(seconds=10), mode='reschedule') sensor.poke = Mock(side_effect=[False, False, False, True]) dr = self._make_dag_run() # first poke returns False and task is re-scheduled date1 = timezone.utcnow() with freeze_time(date1): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.NONE) # verify one row in task_reschedule table task_reschedules = TaskReschedule.find_for_task_instance(ti) self.assertEquals(len(task_reschedules), 1) self.assertEquals(task_reschedules[0].start_date, date1) self.assertEquals(task_reschedules[0].reschedule_date, date1 + timedelta(seconds=sensor.poke_interval)) self.assertEqual(task_reschedules[0].try_number, 1) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) # second poke fails and task instance is marked up to retry date2 = date1 + timedelta(seconds=sensor.poke_interval) with freeze_time(date2): with self.assertRaises(AirflowSensorTimeout): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.UP_FOR_RETRY) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) # third poke returns False and task is rescheduled again date3 = date2 + timedelta(seconds=sensor.poke_interval) + sensor.retry_delay with freeze_time(date3): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.NONE) # verify one row in task_reschedule table task_reschedules = TaskReschedule.find_for_task_instance(ti) self.assertEquals(len(task_reschedules), 1) self.assertEquals(task_reschedules[0].start_date, date3) self.assertEquals(task_reschedules[0].reschedule_date, date3 + timedelta(seconds=sensor.poke_interval)) self.assertEqual(task_reschedules[0].try_number, 2) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) # fourth poke return True and task succeeds date4 = date3 + timedelta(seconds=sensor.poke_interval) with freeze_time(date4): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.SUCCESS) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) def test_should_include_ready_to_reschedule_dep(self): sensor = self._make_sensor(True) deps = sensor.deps self.assertTrue(ReadyToRescheduleDep() in deps) def test_invalid_mode(self): with self.assertRaises(AirflowException): self._make_sensor( return_value=True, mode='foo') def test_ok_with_custom_reschedule_exception(self): sensor = self._make_sensor( return_value=None, mode='reschedule') date1 = timezone.utcnow() date2 = date1 + timedelta(seconds=60) date3 = date1 + timedelta(seconds=120) sensor.poke = Mock(side_effect=[ AirflowRescheduleException(date2), AirflowRescheduleException(date3), True, ]) dr = self._make_dag_run() # first poke returns False and task is re-scheduled with freeze_time(date1): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: # verify task is re-scheduled, i.e. state set to NONE self.assertEquals(ti.state, State.NONE) # verify one row in task_reschedule table task_reschedules = TaskReschedule.find_for_task_instance(ti) self.assertEquals(len(task_reschedules), 1) self.assertEquals(task_reschedules[0].start_date, date1) self.assertEquals(task_reschedules[0].reschedule_date, date2) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) # second poke returns False and task is re-scheduled with freeze_time(date2): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: # verify task is re-scheduled, i.e. state set to NONE self.assertEquals(ti.state, State.NONE) # verify two rows in task_reschedule table task_reschedules = TaskReschedule.find_for_task_instance(ti) self.assertEquals(len(task_reschedules), 2) self.assertEquals(task_reschedules[1].start_date, date2) self.assertEquals(task_reschedules[1].reschedule_date, date3) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) # third poke returns True and task succeeds with freeze_time(date3): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.SUCCESS) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) def test_reschedule_with_test_mode(self): sensor = self._make_sensor( return_value=None, poke_interval=10, timeout=25, mode='reschedule') sensor.poke = Mock(side_effect=[False]) dr = self._make_dag_run() # poke returns False and AirflowRescheduleException is raised date1 = timezone.utcnow() with freeze_time(date1): for dt in self.dag.date_range(DEFAULT_DATE, end_date=DEFAULT_DATE): TaskInstance(sensor, dt).run( ignore_ti_state=True, test_mode=True) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: # in test mode state is not modified self.assertEquals(ti.state, State.NONE) # in test mode no reschedule request is recorded task_reschedules = TaskReschedule.find_for_task_instance(ti) self.assertEquals(len(task_reschedules), 0) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE)
class LatestOnlyOperatorTest(unittest.TestCase): def setUp(self): super(LatestOnlyOperatorTest, self).setUp() configuration.load_test_config() self.dag = DAG('test_dag', default_args={ 'owner': 'airflow', 'start_date': DEFAULT_DATE }, schedule_interval=INTERVAL) self.addCleanup(self.dag.clear) freezer = freeze_time(FROZEN_NOW) freezer.start() self.addCleanup(freezer.stop) def test_run(self): task = LatestOnlyOperator(task_id='latest', dag=self.dag) task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) def test_skipping(self): latest_task = LatestOnlyOperator(task_id='latest', dag=self.dag) downstream_task = DummyOperator(task_id='downstream', dag=self.dag) downstream_task2 = DummyOperator(task_id='downstream_2', dag=self.dag) downstream_task.set_upstream(latest_task) downstream_task2.set_upstream(downstream_task) latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task2.run(start_date=DEFAULT_DATE, end_date=END_DATE) latest_instances = get_task_instances('latest') exec_date_to_latest_state = { ti.execution_date: ti.state for ti in latest_instances } self.assertEqual( { timezone.datetime(2016, 1, 1): 'success', timezone.datetime(2016, 1, 1, 12): 'success', timezone.datetime(2016, 1, 2): 'success', }, exec_date_to_latest_state) downstream_instances = get_task_instances('downstream') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances } self.assertEqual( { timezone.datetime(2016, 1, 1): 'skipped', timezone.datetime(2016, 1, 1, 12): 'skipped', timezone.datetime(2016, 1, 2): 'success', }, exec_date_to_downstream_state) downstream_instances = get_task_instances('downstream_2') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances } self.assertEqual( { timezone.datetime(2016, 1, 1): 'skipped', timezone.datetime(2016, 1, 1, 12): 'skipped', timezone.datetime(2016, 1, 2): 'success', }, exec_date_to_downstream_state) def test_skipping_dagrun(self): latest_task = LatestOnlyOperator(task_id='latest', dag=self.dag) downstream_task = DummyOperator(task_id='downstream', dag=self.dag) downstream_task2 = DummyOperator(task_id='downstream_2', dag=self.dag) downstream_task.set_upstream(latest_task) downstream_task2.set_upstream(downstream_task) dr1 = self.dag.create_dagrun(run_id="manual__1", start_date=timezone.utcnow(), execution_date=DEFAULT_DATE, state=State.RUNNING) dr2 = self.dag.create_dagrun(run_id="manual__2", start_date=timezone.utcnow(), execution_date=timezone.datetime( 2016, 1, 1, 12), state=State.RUNNING) dr2 = self.dag.create_dagrun(run_id="manual__3", start_date=timezone.utcnow(), execution_date=END_DATE, state=State.RUNNING) latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task2.run(start_date=DEFAULT_DATE, end_date=END_DATE) latest_instances = get_task_instances('latest') exec_date_to_latest_state = { ti.execution_date: ti.state for ti in latest_instances } self.assertEqual( { timezone.datetime(2016, 1, 1): 'success', timezone.datetime(2016, 1, 1, 12): 'success', timezone.datetime(2016, 1, 2): 'success', }, exec_date_to_latest_state) downstream_instances = get_task_instances('downstream') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances } self.assertEqual( { timezone.datetime(2016, 1, 1): 'skipped', timezone.datetime(2016, 1, 1, 12): 'skipped', timezone.datetime(2016, 1, 2): 'success', }, exec_date_to_downstream_state) downstream_instances = get_task_instances('downstream_2') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances } self.assertEqual( { timezone.datetime(2016, 1, 1): 'skipped', timezone.datetime(2016, 1, 1, 12): 'skipped', timezone.datetime(2016, 1, 2): 'success', }, exec_date_to_downstream_state)
class LatestOnlyOperatorTest(unittest.TestCase): def setUp(self): super().setUp() configuration.load_test_config() self.dag = DAG( 'test_dag', default_args={ 'owner': 'airflow', 'start_date': DEFAULT_DATE}, schedule_interval=INTERVAL) self.addCleanup(self.dag.clear) freezer = freeze_time(FROZEN_NOW) freezer.start() self.addCleanup(freezer.stop) def test_run(self): task = LatestOnlyOperator( task_id='latest', dag=self.dag) task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) def test_skipping(self): latest_task = LatestOnlyOperator( task_id='latest', dag=self.dag) downstream_task = DummyOperator( task_id='downstream', dag=self.dag) downstream_task2 = DummyOperator( task_id='downstream_2', dag=self.dag) downstream_task.set_upstream(latest_task) downstream_task2.set_upstream(downstream_task) latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task2.run(start_date=DEFAULT_DATE, end_date=END_DATE) latest_instances = get_task_instances('latest') exec_date_to_latest_state = { ti.execution_date: ti.state for ti in latest_instances} self.assertEqual({ timezone.datetime(2016, 1, 1): 'success', timezone.datetime(2016, 1, 1, 12): 'success', timezone.datetime(2016, 1, 2): 'success'}, exec_date_to_latest_state) downstream_instances = get_task_instances('downstream') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances} self.assertEqual({ timezone.datetime(2016, 1, 1): 'skipped', timezone.datetime(2016, 1, 1, 12): 'skipped', timezone.datetime(2016, 1, 2): 'success'}, exec_date_to_downstream_state) downstream_instances = get_task_instances('downstream_2') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances} self.assertEqual({ timezone.datetime(2016, 1, 1): 'skipped', timezone.datetime(2016, 1, 1, 12): 'skipped', timezone.datetime(2016, 1, 2): 'success'}, exec_date_to_downstream_state) def test_skipping_dagrun(self): latest_task = LatestOnlyOperator( task_id='latest', dag=self.dag) downstream_task = DummyOperator( task_id='downstream', dag=self.dag) downstream_task2 = DummyOperator( task_id='downstream_2', dag=self.dag) downstream_task.set_upstream(latest_task) downstream_task2.set_upstream(downstream_task) self.dag.create_dagrun( run_id="manual__1", start_date=timezone.utcnow(), execution_date=DEFAULT_DATE, state=State.RUNNING ) self.dag.create_dagrun( run_id="manual__2", start_date=timezone.utcnow(), execution_date=timezone.datetime(2016, 1, 1, 12), state=State.RUNNING ) self.dag.create_dagrun( run_id="manual__3", start_date=timezone.utcnow(), execution_date=END_DATE, state=State.RUNNING ) latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task2.run(start_date=DEFAULT_DATE, end_date=END_DATE) latest_instances = get_task_instances('latest') exec_date_to_latest_state = { ti.execution_date: ti.state for ti in latest_instances} self.assertEqual({ timezone.datetime(2016, 1, 1): 'success', timezone.datetime(2016, 1, 1, 12): 'success', timezone.datetime(2016, 1, 2): 'success'}, exec_date_to_latest_state) downstream_instances = get_task_instances('downstream') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances} self.assertEqual({ timezone.datetime(2016, 1, 1): 'skipped', timezone.datetime(2016, 1, 1, 12): 'skipped', timezone.datetime(2016, 1, 2): 'success'}, exec_date_to_downstream_state) downstream_instances = get_task_instances('downstream_2') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances} self.assertEqual({ timezone.datetime(2016, 1, 1): 'skipped', timezone.datetime(2016, 1, 1, 12): 'skipped', timezone.datetime(2016, 1, 2): 'success'}, exec_date_to_downstream_state)
class TestBashOperator(unittest.TestCase): def test_echo_env_variables(self): """ Test that env variables are exported correctly to the task bash environment. """ now = datetime.utcnow() now = now.replace(tzinfo=timezone.utc) self.dag = DAG( dag_id='bash_op_test', default_args={ 'owner': 'airflow', 'retries': 100, 'start_date': DEFAULT_DATE }, schedule_interval='@daily', dagrun_timeout=timedelta(minutes=60)) self.dag.create_dagrun( run_id='manual__' + DEFAULT_DATE.isoformat(), execution_date=DEFAULT_DATE, start_date=now, state=State.RUNNING, external_trigger=False, ) with NamedTemporaryFile() as tmp_file: task = BashOperator( task_id='echo_env_vars', dag=self.dag, bash_command='echo $AIRFLOW_HOME>> {0};' 'echo $PYTHONPATH>> {0};' 'echo $AIRFLOW_CTX_DAG_ID >> {0};' 'echo $AIRFLOW_CTX_TASK_ID>> {0};' 'echo $AIRFLOW_CTX_EXECUTION_DATE>> {0};' 'echo $AIRFLOW_CTX_DAG_RUN_ID>> {0};'.format(tmp_file.name) ) original_AIRFLOW_HOME = os.environ['AIRFLOW_HOME'] os.environ['AIRFLOW_HOME'] = 'MY_PATH_TO_AIRFLOW_HOME' task.run(DEFAULT_DATE, DEFAULT_DATE, ignore_first_depends_on_past=True, ignore_ti_state=True) with open(tmp_file.name, 'r') as file: output = ''.join(file.readlines()) self.assertIn('MY_PATH_TO_AIRFLOW_HOME', output) # exported in run-tests as part of PYTHONPATH self.assertIn('tests/test_utils', output) self.assertIn('bash_op_test', output) self.assertIn('echo_env_vars', output) self.assertIn(DEFAULT_DATE.isoformat(), output) self.assertIn('manual__' + DEFAULT_DATE.isoformat(), output) os.environ['AIRFLOW_HOME'] = original_AIRFLOW_HOME def test_return_value(self): bash_operator = BashOperator( bash_command='echo "stdout"', task_id='test_return_value', dag=None ) return_value = bash_operator.execute(context={}) self.assertEqual(return_value, 'stdout') def test_task_retries(self): bash_operator = BashOperator( bash_command='echo "stdout"', task_id='test_task_retries', retries=2, dag=None ) self.assertEqual(bash_operator.retries, 2) @mock.patch.object(configuration.conf, 'getint', return_value=3) def test_default_retries(self, mock_config): bash_operator = BashOperator( bash_command='echo "stdout"', task_id='test_default_retries', dag=None ) self.assertEqual(bash_operator.retries, 3)
class BaseSensorTest(unittest.TestCase): def setUp(self): configuration.load_test_config() args = {'owner': 'airflow', 'start_date': DEFAULT_DATE} self.dag = DAG(TEST_DAG_ID, default_args=args) session = settings.Session() session.query(DagRun).delete() session.query(TaskInstance).delete() session.commit() def _make_dag_run(self): return self.dag.create_dagrun(run_id='manual__', start_date=timezone.utcnow(), execution_date=DEFAULT_DATE, state=State.RUNNING) def _make_sensor(self, return_value, **kwargs): poke_interval = 'poke_interval' timeout = 'timeout' if poke_interval not in kwargs: kwargs[poke_interval] = 0 if timeout not in kwargs: kwargs[timeout] = 0 sensor = DummySensor(task_id=SENSOR_OP, return_value=return_value, dag=self.dag, **kwargs) dummy_op = DummyOperator(task_id=DUMMY_OP, dag=self.dag) dummy_op.set_upstream(sensor) return sensor @classmethod def _run(cls, task): task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) def test_ok(self): sensor = self._make_sensor(True) dr = self._make_dag_run() self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.SUCCESS) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) def test_fail(self): sensor = self._make_sensor(False) dr = self._make_dag_run() with self.assertRaises(AirflowSensorTimeout): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.FAILED) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) def test_soft_fail(self): sensor = self._make_sensor(False, soft_fail=True) dr = self._make_dag_run() self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: self.assertEquals(ti.state, State.SKIPPED) def test_soft_fail_with_retries(self): sensor = self._make_sensor(return_value=False, soft_fail=True, retries=1, retry_delay=timedelta(milliseconds=1)) dr = self._make_dag_run() # first run fails and task instance is marked up to retry with self.assertRaises(AirflowSensorTimeout): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.UP_FOR_RETRY) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) sleep(0.001) # after retry DAG run is skipped self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: self.assertEquals(ti.state, State.SKIPPED)
class SmartSensorTest(unittest.TestCase): def setUp(self): os.environ['AIRFLOW__SMART_SENSER__USE_SMART_SENSOR'] = 'true' os.environ[ 'AIRFLOW__SMART_SENSER__SENSORS_ENABLED'] = 'DummySmartSensor' args = {'owner': 'airflow', 'start_date': DEFAULT_DATE} self.dag = DAG(TEST_DAG_ID, default_args=args) self.sensor_dag = DAG(TEST_SENSOR_DAG_ID, default_args=args) self.log = logging.getLogger('BaseSmartTest') session = settings.Session() session.query(DagRun).delete() session.query(TaskInstance).delete() session.query(SensorInstance).delete() session.commit() def tearDown(self): session = settings.Session() session.query(DagRun).delete() session.query(TaskInstance).delete() session.query(SensorInstance).delete() session.commit() os.environ.pop('AIRFLOW__SMART_SENSER__USE_SMART_SENSOR') os.environ.pop('AIRFLOW__SMART_SENSER__SENSORS_ENABLED') def _make_dag_run(self): return self.dag.create_dagrun(run_id='manual__' + TEST_DAG_ID, start_date=timezone.utcnow(), execution_date=DEFAULT_DATE, state=State.RUNNING) def _make_sensor_dag_run(self): return self.sensor_dag.create_dagrun(run_id='manual__' + TEST_SENSOR_DAG_ID, start_date=timezone.utcnow(), execution_date=DEFAULT_DATE, state=State.RUNNING) def _make_sensor(self, return_value, **kwargs): poke_interval = 'poke_interval' timeout = 'timeout' if poke_interval not in kwargs: kwargs[poke_interval] = 0 if timeout not in kwargs: kwargs[timeout] = 0 sensor = DummySensor(task_id=SENSOR_OP, return_value=return_value, dag=self.sensor_dag, **kwargs) return sensor def _make_sensor_instance(self, index, return_value, **kwargs): poke_interval = 'poke_interval' timeout = 'timeout' if poke_interval not in kwargs: kwargs[poke_interval] = 0 if timeout not in kwargs: kwargs[timeout] = 0 task_id = SENSOR_OP + str(index) sensor = DummySensor(task_id=task_id, return_value=return_value, dag=self.sensor_dag, **kwargs) ti = TaskInstance(task=sensor, execution_date=DEFAULT_DATE) return ti def _make_smart_operator(self, index, **kwargs): poke_interval = 'poke_interval' smart_sensor_timeout = 'smart_sensor_timeout' if poke_interval not in kwargs: kwargs[poke_interval] = 0 if smart_sensor_timeout not in kwargs: kwargs[smart_sensor_timeout] = 0 smart_task = DummySmartSensor(task_id=SMART_OP + "_" + str(index), dag=self.dag, **kwargs) dummy_op = DummyOperator(task_id=DUMMY_OP, dag=self.dag) dummy_op.set_upstream(smart_task) return smart_task @classmethod def _run(cls, task): task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) def test_load_sensor_works(self): # Mock two sensor tasks return True and one return False # The hashcode for si1 and si2 should be same. Test dedup on these two instances si1 = self._make_sensor_instance(1, True) si2 = self._make_sensor_instance(2, True) si3 = self._make_sensor_instance(3, False) # Confirm initial state smart = self._make_smart_operator(0) smart.flush_cached_sensor_poke_results() self.assertEqual(len(smart.cached_dedup_works), 0) self.assertEqual(len(smart.cached_sensor_exceptions), 0) si1.run(ignore_all_deps=True) # Test single sensor smart._load_sensor_works() self.assertEqual(len(smart.sensor_works), 1) self.assertEqual(len(smart.cached_dedup_works), 0) self.assertEqual(len(smart.cached_sensor_exceptions), 0) si2.run(ignore_all_deps=True) si3.run(ignore_all_deps=True) # Test multiple sensors with duplication smart._load_sensor_works() self.assertEqual(len(smart.sensor_works), 3) self.assertEqual(len(smart.cached_dedup_works), 0) self.assertEqual(len(smart.cached_sensor_exceptions), 0) def test_execute_single_task_with_dup(self): sensor_dr = self._make_sensor_dag_run() si1 = self._make_sensor_instance(1, True) si2 = self._make_sensor_instance(2, True) si3 = self._make_sensor_instance(3, False, timeout=0) si1.run(ignore_all_deps=True) si2.run(ignore_all_deps=True) si3.run(ignore_all_deps=True) smart = self._make_smart_operator(0) smart.flush_cached_sensor_poke_results() smart._load_sensor_works() self.assertEqual(len(smart.sensor_works), 3) for sensor_work in smart.sensor_works: _, task_id, _ = sensor_work.ti_key if task_id == SENSOR_OP + "1": smart._execute_sensor_work(sensor_work) break self.assertEqual(len(smart.cached_dedup_works), 1) tis = sensor_dr.get_task_instances() for ti in tis: if ti.task_id == SENSOR_OP + "1": self.assertEqual(ti.state, State.SUCCESS) if ti.task_id == SENSOR_OP + "2": self.assertEqual(ti.state, State.SUCCESS) if ti.task_id == SENSOR_OP + "3": self.assertEqual(ti.state, State.SENSING) for sensor_work in smart.sensor_works: _, task_id, _ = sensor_work.ti_key if task_id == SENSOR_OP + "2": smart._execute_sensor_work(sensor_work) break self.assertEqual(len(smart.cached_dedup_works), 1) time.sleep(1) for sensor_work in smart.sensor_works: _, task_id, _ = sensor_work.ti_key if task_id == SENSOR_OP + "3": smart._execute_sensor_work(sensor_work) break self.assertEqual(len(smart.cached_dedup_works), 2) tis = sensor_dr.get_task_instances() for ti in tis: # Timeout=0, the Failed poke lead to task fail if ti.task_id == SENSOR_OP + "3": self.assertEqual(ti.state, State.FAILED) def test_smart_operator_timeout(self): sensor_dr = self._make_sensor_dag_run() si1 = self._make_sensor_instance(1, False, timeout=10) smart = self._make_smart_operator(0, poke_interval=6) smart.poke = Mock(side_effect=[False, False, False, False]) date1 = timezone.utcnow() with freeze_time(date1): si1.run(ignore_all_deps=True) smart.flush_cached_sensor_poke_results() smart._load_sensor_works() for sensor_work in smart.sensor_works: smart._execute_sensor_work(sensor_work) # Before timeout the state should be SENSING sis = sensor_dr.get_task_instances() for sensor_instance in sis: if sensor_instance.task_id == SENSOR_OP + "1": self.assertEqual(sensor_instance.state, State.SENSING) date2 = date1 + datetime.timedelta(seconds=smart.poke_interval) with freeze_time(date2): smart.flush_cached_sensor_poke_results() smart._load_sensor_works() for sensor_work in smart.sensor_works: smart._execute_sensor_work(sensor_work) sis = sensor_dr.get_task_instances() for sensor_instance in sis: if sensor_instance.task_id == SENSOR_OP + "1": self.assertEqual(sensor_instance.state, State.SENSING) date3 = date2 + datetime.timedelta(seconds=smart.poke_interval) with freeze_time(date3): smart.flush_cached_sensor_poke_results() smart._load_sensor_works() for sensor_work in smart.sensor_works: smart._execute_sensor_work(sensor_work) sis = sensor_dr.get_task_instances() for sensor_instance in sis: if sensor_instance.task_id == SENSOR_OP + "1": self.assertEqual(sensor_instance.state, State.FAILED) def test_register_in_sensor_service(self): si1 = self._make_sensor_instance(1, True) si1.run(ignore_all_deps=True) self.assertEqual(si1.state, State.SENSING) session = settings.Session() SI = SensorInstance sensor_instance = session.query(SI).filter( SI.dag_id == si1.dag_id, SI.task_id == si1.task_id, SI.execution_date == si1.execution_date) \ .first() self.assertIsNotNone(sensor_instance) self.assertEqual(sensor_instance.state, State.SENSING) self.assertEqual(sensor_instance.operator, "DummySensor")
class TestLatestOnlyOperator(unittest.TestCase): def setUp(self): super().setUp() self.dag = DAG('test_dag', default_args={ 'owner': 'airflow', 'start_date': DEFAULT_DATE }, schedule_interval=INTERVAL) with db.create_session() as session: session.query(DagRun).delete() session.query(TaskInstance).delete() freezer = freeze_time(FROZEN_NOW) freezer.start() self.addCleanup(freezer.stop) def test_run(self): task = LatestOnlyOperator(task_id='latest', dag=self.dag) task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) def test_skipping_non_latest(self): latest_task = LatestOnlyOperator(task_id='latest', dag=self.dag) downstream_task = DummyOperator(task_id='downstream', dag=self.dag) downstream_task2 = DummyOperator(task_id='downstream_2', dag=self.dag) downstream_task3 = DummyOperator(task_id='downstream_3', trigger_rule=TriggerRule.NONE_FAILED, dag=self.dag) downstream_task.set_upstream(latest_task) downstream_task2.set_upstream(downstream_task) downstream_task3.set_upstream(downstream_task) self.dag.create_dagrun( run_id="scheduled__1", start_date=timezone.utcnow(), execution_date=DEFAULT_DATE, state=State.RUNNING, ) self.dag.create_dagrun( run_id="scheduled__2", start_date=timezone.utcnow(), execution_date=timezone.datetime(2016, 1, 1, 12), state=State.RUNNING, ) self.dag.create_dagrun( run_id="scheduled__3", start_date=timezone.utcnow(), execution_date=END_DATE, state=State.RUNNING, ) latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task2.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task3.run(start_date=DEFAULT_DATE, end_date=END_DATE) latest_instances = get_task_instances('latest') exec_date_to_latest_state = { ti.execution_date: ti.state for ti in latest_instances } self.assertEqual( { timezone.datetime(2016, 1, 1): 'success', timezone.datetime(2016, 1, 1, 12): 'success', timezone.datetime(2016, 1, 2): 'success' }, exec_date_to_latest_state) downstream_instances = get_task_instances('downstream') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances } self.assertEqual( { timezone.datetime(2016, 1, 1): 'skipped', timezone.datetime(2016, 1, 1, 12): 'skipped', timezone.datetime(2016, 1, 2): 'success' }, exec_date_to_downstream_state) downstream_instances = get_task_instances('downstream_2') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances } self.assertEqual( { timezone.datetime(2016, 1, 1): None, timezone.datetime(2016, 1, 1, 12): None, timezone.datetime(2016, 1, 2): 'success' }, exec_date_to_downstream_state) downstream_instances = get_task_instances('downstream_3') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances } self.assertEqual( { timezone.datetime(2016, 1, 1): 'success', timezone.datetime(2016, 1, 1, 12): 'success', timezone.datetime(2016, 1, 2): 'success' }, exec_date_to_downstream_state) def test_not_skipping_external(self): latest_task = LatestOnlyOperator(task_id='latest', dag=self.dag) downstream_task = DummyOperator(task_id='downstream', dag=self.dag) downstream_task2 = DummyOperator(task_id='downstream_2', dag=self.dag) downstream_task.set_upstream(latest_task) downstream_task2.set_upstream(downstream_task) self.dag.create_dagrun( run_id="manual__1", start_date=timezone.utcnow(), execution_date=DEFAULT_DATE, state=State.RUNNING, external_trigger=True, ) self.dag.create_dagrun( run_id="manual__2", start_date=timezone.utcnow(), execution_date=timezone.datetime(2016, 1, 1, 12), state=State.RUNNING, external_trigger=True, ) self.dag.create_dagrun( run_id="manual__3", start_date=timezone.utcnow(), execution_date=END_DATE, state=State.RUNNING, external_trigger=True, ) latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task2.run(start_date=DEFAULT_DATE, end_date=END_DATE) latest_instances = get_task_instances('latest') exec_date_to_latest_state = { ti.execution_date: ti.state for ti in latest_instances } self.assertEqual( { timezone.datetime(2016, 1, 1): 'success', timezone.datetime(2016, 1, 1, 12): 'success', timezone.datetime(2016, 1, 2): 'success' }, exec_date_to_latest_state) downstream_instances = get_task_instances('downstream') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances } self.assertEqual( { timezone.datetime(2016, 1, 1): 'success', timezone.datetime(2016, 1, 1, 12): 'success', timezone.datetime(2016, 1, 2): 'success' }, exec_date_to_downstream_state) downstream_instances = get_task_instances('downstream_2') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances } self.assertEqual( { timezone.datetime(2016, 1, 1): 'success', timezone.datetime(2016, 1, 1, 12): 'success', timezone.datetime(2016, 1, 2): 'success' }, exec_date_to_downstream_state)
class BaseSensorTest(unittest.TestCase): def setUp(self): configuration.load_test_config() args = { 'owner': 'airflow', 'start_date': DEFAULT_DATE } self.dag = DAG(TEST_DAG_ID, default_args=args) session = settings.Session() session.query(DagRun).delete() session.query(TaskInstance).delete() session.commit() def _make_dag_run(self): return self.dag.create_dagrun( run_id='manual__', start_date=timezone.utcnow(), execution_date=DEFAULT_DATE, state=State.RUNNING ) def _make_sensor(self, return_value, **kwargs): poke_interval = 'poke_interval' timeout = 'timeout' if poke_interval not in kwargs: kwargs[poke_interval] = 0 if timeout not in kwargs: kwargs[timeout] = 0 sensor = DummySensor( task_id=SENSOR_OP, return_value=return_value, dag=self.dag, **kwargs ) dummy_op = DummyOperator( task_id=DUMMY_OP, dag=self.dag ) dummy_op.set_upstream(sensor) return sensor @classmethod def _run(cls, task): task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) def test_ok(self): sensor = self._make_sensor(True) dr = self._make_dag_run() self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.SUCCESS) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) def test_fail(self): sensor = self._make_sensor(False) dr = self._make_dag_run() with self.assertRaises(AirflowSensorTimeout): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.FAILED) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) def test_soft_fail(self): sensor = self._make_sensor(False, soft_fail=True) dr = self._make_dag_run() self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: self.assertEquals(ti.state, State.SKIPPED) def test_soft_fail_with_retries(self): sensor = self._make_sensor( return_value=False, soft_fail=True, retries=1, retry_delay=timedelta(milliseconds=1)) dr = self._make_dag_run() # first run fails and task instance is marked up to retry with self.assertRaises(AirflowSensorTimeout): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.UP_FOR_RETRY) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) sleep(0.001) # after retry DAG run is skipped self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: self.assertEquals(ti.state, State.SKIPPED)