def assert_tasks_on_executor(self, executor): # start the executor executor.start() success_command = 'echo 1' fail_command = 'exit 1' executor.execute_async(key='success', command=success_command) executor.execute_async(key='fail', command=fail_command) success_future = next(k for k, v in executor.futures.items() if v == 'success') fail_future = next(k for k, v in executor.futures.items() if v == 'fail') # wait for the futures to execute, with a timeout timeout = timezone.utcnow() + timedelta(seconds=30) while not (success_future.done() and fail_future.done()): if timezone.utcnow() > timeout: raise ValueError( 'The futures should have finished; there is probably ' 'an error communciating with the Dask cluster.') # both tasks should have finished self.assertTrue(success_future.done()) self.assertTrue(fail_future.done()) # check task exceptions self.assertTrue(success_future.exception() is None) self.assertTrue(fail_future.exception() is not None)
def execute(self, context): started_at = timezone.utcnow() time_jump = self.params.get('time_jump') while not self.poke(context): if time_jump: started_at -= time_jump if (timezone.utcnow() - started_at).total_seconds() > self.timeout: if self.soft_fail: raise AirflowSkipException('Snap. Time is OUT.') else: raise AirflowSensorTimeout('Snap. Time is OUT.') time.sleep(self.poke_interval) self.log.info("Success criteria met. Exiting.")
def test_trigger_dag_for_date(self): url_template = '/api/experimental/dags/{}/dag_runs' dag_id = 'example_bash_operator' hour_from_now = utcnow() + timedelta(hours=1) execution_date = datetime(hour_from_now.year, hour_from_now.month, hour_from_now.day, hour_from_now.hour) datetime_string = execution_date.isoformat() # Test Correct execution response = self.app.post(url_template.format(dag_id), data=json.dumps( {'execution_date': datetime_string}), content_type="application/json") self.assertEqual(200, response.status_code) dagbag = DagBag() dag = dagbag.get_dag(dag_id) dag_run = dag.get_dagrun(execution_date) self.assertTrue( dag_run, 'Dag Run not found for execution date {}'.format(execution_date)) # Test error for nonexistent dag response = self.app.post( url_template.format('does_not_exist_dag'), data=json.dumps({'execution_date': execution_date.isoformat()}), content_type="application/json") self.assertEqual(404, response.status_code) # Test error for bad datetime format response = self.app.post(url_template.format(dag_id), data=json.dumps( {'execution_date': 'not_a_datetime'}), content_type="application/json") self.assertEqual(400, response.status_code)
def test_utc_transformations(self): """ Test whether what we are storing is what we are retrieving for datetimes """ dag_id = 'test_utc_transformations' start_date = utcnow() iso_date = start_date.isoformat() execution_date = start_date + datetime.timedelta(hours=1, days=1) dag = DAG( dag_id=dag_id, start_date=start_date, ) dag.clear() run = dag.create_dagrun( run_id=iso_date, state=State.NONE, execution_date=execution_date, start_date=start_date, session=self.session, ) self.assertEqual(execution_date, run.execution_date) self.assertEqual(start_date, run.start_date) self.assertEqual(execution_date.utcoffset().total_seconds(), 0.0) self.assertEqual(start_date.utcoffset().total_seconds(), 0.0) self.assertEqual(iso_date, run.run_id) self.assertEqual(run.start_date.isoformat(), run.run_id) dag.clear()
def test_dagrun_status(self): url_template = '/api/experimental/dags/{}/dag_runs/{}' dag_id = 'example_bash_operator' execution_date = utcnow().replace(microsecond=0) datetime_string = quote_plus(execution_date.isoformat()) wrong_datetime_string = quote_plus( datetime(1990, 1, 1, 1, 1, 1).isoformat()) # Create DagRun trigger_dag(dag_id=dag_id, run_id='test_task_instance_info_run', execution_date=execution_date) # Test Correct execution response = self.app.get(url_template.format(dag_id, datetime_string)) self.assertEqual(200, response.status_code) self.assertIn('state', response.data.decode('utf-8')) self.assertNotIn('error', response.data.decode('utf-8')) # Test error for nonexistent dag response = self.app.get( url_template.format('does_not_exist_dag', datetime_string), ) self.assertEqual(404, response.status_code) self.assertIn('error', response.data.decode('utf-8')) # Test error for nonexistent dag run (wrong execution_date) response = self.app.get( url_template.format(dag_id, wrong_datetime_string)) self.assertEqual(404, response.status_code) self.assertIn('error', response.data.decode('utf-8')) # Test error for bad datetime format response = self.app.get(url_template.format(dag_id, 'not_a_datetime')) self.assertEqual(400, response.status_code) self.assertIn('error', response.data.decode('utf-8'))
def prepare_dagruns(self): dagbag = models.DagBag(include_examples=True) self.bash_dag = dagbag.dags['example_bash_operator'] self.sub_dag = dagbag.dags['example_subdag_operator'] self.bash_dagrun = self.bash_dag.create_dagrun( run_id=self.run_id, execution_date=self.default_date, start_date=timezone.utcnow(), state=State.RUNNING) self.sub_dagrun = self.sub_dag.create_dagrun( run_id=self.run_id, execution_date=self.default_date, start_date=timezone.utcnow(), state=State.RUNNING)
def test_with_dag_run(self): value = False dag = DAG('shortcircuit_operator_test_with_dag_run', default_args={ 'owner': 'airflow', 'start_date': DEFAULT_DATE }, schedule_interval=INTERVAL) short_op = ShortCircuitOperator(task_id='make_choice', dag=dag, python_callable=lambda: value) branch_1 = DummyOperator(task_id='branch_1', dag=dag) branch_1.set_upstream(short_op) branch_2 = DummyOperator(task_id='branch_2', dag=dag) branch_2.set_upstream(branch_1) upstream = DummyOperator(task_id='upstream', dag=dag) upstream.set_downstream(short_op) dag.clear() logging.error("Tasks {}".format(dag.tasks)) dr = dag.create_dagrun(run_id="manual__", start_date=timezone.utcnow(), execution_date=DEFAULT_DATE, state=State.RUNNING) upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) tis = dr.get_task_instances() self.assertEqual(len(tis), 4) for ti in tis: if ti.task_id == 'make_choice': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'upstream': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2': self.assertEquals(ti.state, State.SKIPPED) else: raise value = True dag.clear() dr.verify_integrity() upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) tis = dr.get_task_instances() self.assertEqual(len(tis), 4) for ti in tis: if ti.task_id == 'make_choice': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'upstream': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2': self.assertEquals(ti.state, State.NONE) else: raise
def days_ago(n, hour=0, minute=0, second=0, microsecond=0): """ Get a datetime object representing `n` days ago. By default the time is set to midnight. """ today = timezone.utcnow().replace(hour=hour, minute=minute, second=second, microsecond=microsecond) return today - timedelta(days=n)
def delete_remote_resource(self): # check the remote file content remove_file_task = SSHOperator( task_id="test_check_file", ssh_hook=self.hook, command="rm {0}".format(self.test_remote_filepath), do_xcom_push=True, dag=self.dag ) self.assertIsNotNone(remove_file_task) ti3 = TaskInstance(task=remove_file_task, execution_date=timezone.utcnow()) ti3.run()
def test_trigger_dag(self): url_template = '/api/experimental/dags/{}/dag_runs' response = self.app.post( url_template.format('example_bash_operator'), data=json.dumps({'run_id': 'my_run' + utcnow().isoformat()}), content_type="application/json") self.assertEqual(200, response.status_code) response = self.app.post(url_template.format('does_not_exist_dag'), data=json.dumps({}), content_type="application/json") self.assertEqual(404, response.status_code)
def test_json_file_transfer_get(self): configuration.conf.set("core", "enable_xcom_pickling", "False") test_remote_file_content = \ "This is remote file content \n which is also multiline " \ "another line here \n this is last line. EOF" # create a test file remotely create_file_task = SSHOperator( task_id="test_create_file", ssh_hook=self.hook, command="echo '{0}' > {1}".format(test_remote_file_content, self.test_remote_filepath), do_xcom_push=True, dag=self.dag ) self.assertIsNotNone(create_file_task) ti1 = TaskInstance(task=create_file_task, execution_date=timezone.utcnow()) ti1.run() # get remote file to local get_test_task = SFTPOperator( task_id="test_sftp", ssh_hook=self.hook, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.GET, dag=self.dag ) self.assertIsNotNone(get_test_task) ti2 = TaskInstance(task=get_test_task, execution_date=timezone.utcnow()) ti2.run() # test the received content content_received = None with open(self.test_local_filepath, 'r') as f: content_received = f.read() self.assertEqual(content_received.strip(), test_remote_file_content.encode('utf-8').decode('utf-8'))
def test_pickle_file_transfer_put(self): configuration.conf.set("core", "enable_xcom_pickling", "True") test_local_file_content = \ b"This is local file content \n which is multiline " \ b"continuing....with other character\nanother line here \n this is last line" # create a test file locally with open(self.test_local_filepath, 'wb') as f: f.write(test_local_file_content) # put test file to remote put_test_task = SFTPOperator( task_id="test_sftp", ssh_hook=self.hook, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.PUT, dag=self.dag ) self.assertIsNotNone(put_test_task) ti2 = TaskInstance(task=put_test_task, execution_date=timezone.utcnow()) ti2.run() # check the remote file content check_file_task = SSHOperator( task_id="test_check_file", ssh_hook=self.hook, command="cat {0}".format(self.test_remote_filepath), do_xcom_push=True, dag=self.dag ) self.assertIsNotNone(check_file_task) ti3 = TaskInstance(task=check_file_task, execution_date=timezone.utcnow()) ti3.run() self.assertEqual( ti3.xcom_pull(task_ids='test_check_file', key='return_value').strip(), test_local_file_content)
def test_command_execution_with_env(self): configuration.conf.set("core", "enable_xcom_pickling", "True") task = SSHOperator( task_id="test", ssh_hook=self.hook, command="echo -n airflow", do_xcom_push=True, dag=self.dag, ) self.assertIsNotNone(task) ti = TaskInstance(task=task, execution_date=timezone.utcnow()) ti.run() self.assertIsNotNone(ti.duration) self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'), b'airflow')
def test_with_dag_run(self): dr = self.dag.create_dagrun(run_id="manual__", start_date=timezone.utcnow(), execution_date=DEFAULT_DATE, state=State.RUNNING) self.branch_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) tis = dr.get_task_instances() for ti in tis: if ti.task_id == 'make_choice': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'branch_1': self.assertEquals(ti.state, State.NONE) elif ti.task_id == 'branch_2': self.assertEquals(ti.state, State.SKIPPED) else: raise
def test_utcnow(self): now = timezone.utcnow() self.assertTrue(timezone.is_localized(now)) self.assertEquals(now.replace(tzinfo=None), now.astimezone(UTC).replace(tzinfo=None))
def test_skipping_dagrun(self): latest_task = LatestOnlyOperator(task_id='latest', dag=self.dag) downstream_task = DummyOperator(task_id='downstream', dag=self.dag) downstream_task2 = DummyOperator(task_id='downstream_2', dag=self.dag) downstream_task.set_upstream(latest_task) downstream_task2.set_upstream(downstream_task) self.dag.create_dagrun(run_id="manual__1", start_date=timezone.utcnow(), execution_date=DEFAULT_DATE, state=State.RUNNING) self.dag.create_dagrun(run_id="manual__2", start_date=timezone.utcnow(), execution_date=timezone.datetime( 2016, 1, 1, 12), state=State.RUNNING) self.dag.create_dagrun(run_id="manual__3", start_date=timezone.utcnow(), execution_date=END_DATE, state=State.RUNNING) latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task2.run(start_date=DEFAULT_DATE, end_date=END_DATE) latest_instances = get_task_instances('latest') exec_date_to_latest_state = { ti.execution_date: ti.state for ti in latest_instances } self.assertEqual( { timezone.datetime(2016, 1, 1): 'success', timezone.datetime(2016, 1, 1, 12): 'success', timezone.datetime(2016, 1, 2): 'success', }, exec_date_to_latest_state) downstream_instances = get_task_instances('downstream') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances } self.assertEqual( { timezone.datetime(2016, 1, 1): 'skipped', timezone.datetime(2016, 1, 1, 12): 'skipped', timezone.datetime(2016, 1, 2): 'success' }, exec_date_to_downstream_state) downstream_instances = get_task_instances('downstream_2') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances } self.assertEqual( { timezone.datetime(2016, 1, 1): 'skipped', timezone.datetime(2016, 1, 1, 12): 'skipped', timezone.datetime(2016, 1, 2): 'success' }, exec_date_to_downstream_state)
def test_clear(self): self.dag.clear(start_date=DEFAULT_DATE, end_date=timezone.utcnow())
def _make_dag_run(self): return self.dag.create_dagrun(run_id='manual__', start_date=timezone.utcnow(), execution_date=DEFAULT_DATE, state=State.RUNNING)