def test_python_callable_keyword_arguments_are_templatized(self): """Test PythonSensor op_kwargs are templatized""" recorded_calls = [] task = PythonSensor( task_id='python_sensor', timeout=0.01, poke_interval=0.3, # a Mock instance cannot be used as a callable function or test fails with a # TypeError: Object of type Mock is not JSON serializable python_callable=build_recording_function(recorded_calls), op_kwargs={ 'an_int': 4, 'a_date': date(2019, 1, 1), 'a_templated_string': "dag {{dag.dag_id}} ran on {{ds}}." }, dag=self.dag) self.dag.create_dagrun(run_id='manual__' + DEFAULT_DATE.isoformat(), execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, state=State.RUNNING) with self.assertRaises(AirflowSensorTimeout): task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) # 2 calls: first: at start, second: before timeout self.assertEqual(2, len(recorded_calls)) self._assert_calls_equal( recorded_calls[0], Call(an_int=4, a_date=date(2019, 1, 1), a_templated_string="dag {} ran on {}.".format( self.dag.dag_id, DEFAULT_DATE.date().isoformat())))
def test_python_sensor_true(self): t = PythonSensor(task_id='python_sensor_check_true', python_callable=lambda: True, dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_python_sensor_raise(self): t = PythonSensor( task_id='python_sensor_check_raise', python_callable=lambda: 1 / 0, dag=self.dag) with self.assertRaises(ZeroDivisionError): t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_python_sensor_raise(self): t = PythonSensor( task_id='python_sensor_check_raise', python_callable=lambda: 1 / 0, dag=self.dag) with self.assertRaises(ZeroDivisionError): t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_python_sensor_false(self): t = PythonSensor( task_id='python_sensor_check_false', timeout=1, python_callable=lambda: False, dag=self.dag) with self.assertRaises(AirflowSensorTimeout): t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_python_callable_arguments_are_templatized(self): """Test PythonSensor op_args are templatized""" recorded_calls = [] # Create a named tuple and ensure it is still preserved # after the rendering is done Named = namedtuple('Named', ['var1', 'var2']) named_tuple = Named('{{ ds }}', 'unchanged') task = PythonSensor( task_id='python_sensor', timeout=0.01, poke_interval=0.3, # a Mock instance cannot be used as a callable function or test fails with a # TypeError: Object of type Mock is not JSON serializable python_callable=build_recording_function(recorded_calls), op_args=[ 4, date(2019, 1, 1), "dag {{dag.dag_id}} ran on {{ds}}.", named_tuple ], dag=self.dag) self.dag.create_dagrun(run_id='manual__' + DEFAULT_DATE.isoformat(), execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, state=State.RUNNING) with self.assertRaises(AirflowSensorTimeout): task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) ds_templated = DEFAULT_DATE.date().isoformat() # 2 calls: first: at start, second: before timeout self.assertEqual(2, len(recorded_calls)) self._assert_calls_equal( recorded_calls[0], Call(4, date(2019, 1, 1), "dag {} ran on {}.".format(self.dag.dag_id, ds_templated), Named(ds_templated, 'unchanged')))
data=json.dumps({ "ExperimentName": "Face_detection_Haar_cascade_pipeline_REST", "RunSource": "SDK", "ParameterAssignments": { "sample_num": "1" } }), log_response=True, xcom_push=True) wait_face_detection_pipeline = PythonSensor( task_id='sense_face_detection_pipeline_end', poke_interval=10, timeout=60 * 10, # 10 minutes python_callable=wait_till_pipeline_end, op_kwargs={ 'experiment_name': 'Face_detection_Haar_cascade_pipeline_REST', 'task_xcom': "{{ ti.xcom_pull(task_ids='face_detection_haar_cascade', key='return_value') }}" }) blur_face = SimpleHttpOperator( task_id='face_blurring', endpoint=face_blurring_pipeline_endpoint, http_conn_id='azure_pipelines_http_endpoint', method='POST', headers={ 'Authorization': 'Bearer ' + pipeline_token, 'Content-Type': 'application/json' },
def test_reschedule_handling(self, mock_pool_full): """ Test that task reschedules are handled properly """ # Mock the pool with a pool with slots open since the pool doesn't actually exist mock_pool_full.return_value = False # Return values of the python sensor callable, modified during tests done = False fail = False def callable(): if fail: raise AirflowException() return done dag = models.DAG(dag_id='test_reschedule_handling') task = PythonSensor(task_id='test_reschedule_handling_sensor', poke_interval=0, mode='reschedule', python_callable=callable, retries=1, retry_delay=datetime.timedelta(seconds=0), dag=dag, owner='airflow', start_date=timezone.datetime(2016, 2, 1, 0, 0, 0)) ti = TI(task=task, execution_date=timezone.utcnow()) self.assertEqual(ti._try_number, 0) self.assertEqual(ti.try_number, 1) def run_ti_and_assert(run_date, expected_start_date, expected_end_date, expected_duration, expected_state, expected_try_number, expected_task_reschedule_count): with freeze_time(run_date): try: ti.run() except AirflowException: if not fail: raise ti.refresh_from_db() self.assertEqual(ti.state, expected_state) self.assertEqual(ti._try_number, expected_try_number) self.assertEqual(ti.try_number, expected_try_number + 1) self.assertEqual(ti.start_date, expected_start_date) self.assertEqual(ti.end_date, expected_end_date) self.assertEqual(ti.duration, expected_duration) trs = TaskReschedule.find_for_task_instance(ti) self.assertEqual(len(trs), expected_task_reschedule_count) date1 = timezone.utcnow() date2 = date1 + datetime.timedelta(minutes=1) date3 = date2 + datetime.timedelta(minutes=1) date4 = date3 + datetime.timedelta(minutes=1) # Run with multiple reschedules. # During reschedule the try number remains the same, but each reschedule is recorded. # The start date is expected to remain the initial date, hence the duration increases. # When finished the try number is incremented and there is no reschedule expected # for this try. done, fail = False, False run_ti_and_assert(date1, date1, date1, 0, State.UP_FOR_RESCHEDULE, 0, 1) done, fail = False, False run_ti_and_assert(date2, date1, date2, 60, State.UP_FOR_RESCHEDULE, 0, 2) done, fail = False, False run_ti_and_assert(date3, date1, date3, 120, State.UP_FOR_RESCHEDULE, 0, 3) done, fail = True, False run_ti_and_assert(date4, date1, date4, 180, State.SUCCESS, 1, 0) # Clear the task instance. dag.clear() ti.refresh_from_db() self.assertEqual(ti.state, State.NONE) self.assertEqual(ti._try_number, 1) # Run again after clearing with reschedules and a retry. # The retry increments the try number, and for that try no reschedule is expected. # After the retry the start date is reset, hence the duration is also reset. done, fail = False, False run_ti_and_assert(date1, date1, date1, 0, State.UP_FOR_RESCHEDULE, 1, 1) done, fail = False, True run_ti_and_assert(date2, date1, date2, 60, State.UP_FOR_RETRY, 2, 0) done, fail = False, False run_ti_and_assert(date3, date3, date3, 0, State.UP_FOR_RESCHEDULE, 2, 1) done, fail = True, False run_ti_and_assert(date4, date3, date4, 60, State.SUCCESS, 3, 0)
schedule_interval="0 16 * * *", description= "A batch workflow for ingesting supermarket promotions data, demonstrating the PythonSensor.", default_args={"depends_on_past": True}, ) create_metrics = DummyOperator(task_id="create_metrics", dag=dag) def _wait_for_supermarket(supermarket_id_): supermarket_path = Path("/opt/airflow/data/" + supermarket_id_) data_files = supermarket_path.glob("data-*.csv") success_file = supermarket_path / "_SUCCESS" return data_files and success_file.exists() for supermarket_id in range(1, 5): wait = PythonSensor( task_id=f"wait_for_supermarket_{supermarket_id}", python_callable=_wait_for_supermarket, op_kwargs={"supermarket_id_": f"supermarket{supermarket_id}"}, timeout=600, mode="reschedule", dag=dag, ) copy = DummyOperator(task_id=f"copy_to_raw_supermarket_{supermarket_id}", dag=dag) process = DummyOperator(task_id=f"process_supermarket_{supermarket_id}", dag=dag) wait >> copy >> process >> create_metrics
default_args={"depends_on_past": True}, ) def _wait_for_supermarket(supermarket_id_): supermarket_path = Path("/data/" + supermarket_id_) data_files = supermarket_path.glob("data-*.csv") success_file = supermarket_path / "_SUCCESS" return data_files and success_file.exists() for supermarket_id in [1, 2, 3, 4]: wait = PythonSensor( task_id=f"wait_for_supermarket_{supermarket_id}", python_callable=_wait_for_supermarket, op_kwargs={"supermarket_id": f"supermarket{supermarket_id}"}, provide_context=True, dag=dag, ) copy = DummyOperator(task_id=f"copy_to_raw_supermarket_{supermarket_id}", dag=dag) process = DummyOperator(task_id=f"process_supermarket_{supermarket_id}", dag=dag) generate_metrics = DummyOperator( task_id=f"generate_metrics_supermarket_{supermarket_id}", dag=dag ) compute_differences = DummyOperator( task_id=f"compute_differences_supermarket_{supermarket_id}", dag=dag ) update_dashboard = DummyOperator( task_id=f"update_dashboard_supermarket_{supermarket_id}", dag=dag ) notify_new_data = DummyOperator(
from pathlib import Path import airflow.utils.dates from airflow import DAG from airflow.contrib.sensors.python_sensor import PythonSensor dag = DAG( dag_id="06_listing_6_2", start_date=airflow.utils.dates.days_ago(3), schedule_interval="0 16 * * *", description="A batch workflow for ingesting supermarket promotions data.", default_args={"depends_on_past": True}, ) def _wait_for_supermarket(supermarket_id_): supermarket_path = Path("/opt/airflow/data/" + supermarket_id_) data_files = supermarket_path.glob("data-*.csv") success_file = supermarket_path / "_SUCCESS" return data_files and success_file.exists() wait_for_supermarket_1 = PythonSensor( task_id="wait_for_supermarket_1", python_callable=_wait_for_supermarket, op_kwargs={"supermarket_id": "supermarket1"}, dag=dag, )
start_date=airflow.utils.dates.days_ago(3), schedule_interval=None, ) def _wait_for_supermarket(supermarket_id_): supermarket_path = Path("/data/" + supermarket_id_) data_files = supermarket_path.glob("data-*.csv") success_file = supermarket_path / "_SUCCESS" return data_files and success_file.exists() for supermarket_id in range(1, 5): wait = PythonSensor( task_id=f"wait_for_supermarket_{supermarket_id}", python_callable=_wait_for_supermarket, op_kwargs={"supermarket_id_": f"supermarket{supermarket_id}"}, dag=dag1, ) copy = DummyOperator(task_id=f"copy_to_raw_supermarket_{supermarket_id}", dag=dag1) process = DummyOperator(task_id=f"process_supermarket_{supermarket_id}", dag=dag1) trigger_create_metrics_dag = TriggerDagRunOperator( task_id=f"trigger_create_metrics_dag_supermarket_{supermarket_id}", trigger_dag_id="create_metrics", dag=dag1, ) wait >> copy >> process >> trigger_create_metrics_dag compute_differences = DummyOperator(task_id=f"compute_differences", dag=dag2) update_dashboard = DummyOperator(task_id=f"update_dashboard", dag=dag2)
) unzip = BashOperator( task_id='unzip', bash_command='gunzip {{ ti.xcom_pull(task_ids="download") }}', ) extract_release = PythonOperator( task_id='extract_release', python_callable=extract_release_events, op_kwargs={'gz_file': '{{ ti.xcom_pull(task_ids="download") }}'}, ) check_release_present = PythonSensor( task_id="check_release_present", python_callable=check_if_file_is_empty, provide_context=True, op_kwargs={'task_id_to_get_full_path': 'extract_release'}, poke_interval=30, ) send_releases = PythonOperator( task_id="send_release_to_slack", python_callable=send_release_to_slack, op_kwargs={ 'release_file': '{{ ti.xcom_pull(task_ids="extract_release") }}' }, ) sensor >> get_gh_archive >> unzip >> extract_release >> check_release_present >> send_releases
def test_python_sensor_true(self): t = PythonSensor( task_id='python_sensor_check_true', python_callable=lambda: True, dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
default_args = { "owner": "godatadriven", "start_date": airflow.utils.dates.days_ago(14) } dag = DAG( dag_id="b_pythonsensor", default_args=default_args, schedule_interval="0 0 * * *", description="Example PythonSensor", ) def _time_for_coffee(): """I drink coffee between 6 and 12""" return 6 <= datetime.now().hour < 12 time_for_coffee = PythonSensor( task_id="time_for_coffee", python_callable=_time_for_coffee, mode="reschedule", dag=dag, ) make_coffee = BashOperator(task_id="make_coffee", bash_command="echo 'Time for coffee!'", dag=dag) time_for_coffee >> make_coffee