def test_schedule_dag_no_end_date_up_to_today_only(self): """ Tests that a Dag created without an end_date can only be scheduled up to and including the current datetime. For example, if today is 2016-01-01 and we are scheduling from a start_date of 2015-01-01, only jobs up to, but not including 2016-01-01 should be scheduled. """ session = settings.Session() delta = datetime.timedelta(days=1) now = utcnow() start_date = now.subtract(weeks=1) runs = (now - start_date).days dag_id = "test_schedule_dag_no_end_date_up_to_today_only" dag = DAG(dag_id=dag_id, start_date=start_date, schedule_interval=delta) dag.add_task(BaseOperator(task_id='faketastic', owner='Also fake')) dag_file_processor = DagFileProcessor(dag_ids=[], log=mock.MagicMock()) dag_runs = [] for _ in range(runs): dag_run = dag_file_processor.create_dag_run(dag) dag_runs.append(dag_run) # Mark the DagRun as complete dag_run.state = State.SUCCESS session.merge(dag_run) session.commit() # Attempt to schedule an additional dag run (for 2016-01-01) additional_dag_run = dag_file_processor.create_dag_run(dag) for dag_run in dag_runs: self.assertIsNotNone(dag_run) self.assertIsNone(additional_dag_run) self._clean_up(dag_id)
def test_schedule_dag_relativedelta(self): """ Tests scheduling a dag with a relativedelta schedule_interval """ dag_id = "test_schedule_dag_relativedelta" delta = relativedelta(hours=+1) dag = DAG(dag_id=dag_id, schedule_interval=delta) dag.add_task( BaseOperator(task_id="faketastic", owner='Also fake', start_date=datetime_tz(2015, 1, 2, 0, 0))) dag_file_processor = DagFileProcessor(dag_ids=[], log=mock.MagicMock()) dag_run = dag_file_processor.create_dag_run(dag) self.assertIsNotNone(dag_run) self.assertEqual(dag.dag_id, dag_run.dag_id) self.assertIsNotNone(dag_run.run_id) self.assertNotEqual('', dag_run.run_id) self.assertEqual( datetime_tz(2015, 1, 2, 0, 0), dag_run.execution_date, msg='dag_run.execution_date did not match expectation: {0}'.format( dag_run.execution_date)) self.assertEqual(State.RUNNING, dag_run.state) self.assertFalse(dag_run.external_trigger) dag_run2 = dag_file_processor.create_dag_run(dag) self.assertIsNotNone(dag_run2) self.assertEqual(dag.dag_id, dag_run2.dag_id) self.assertIsNotNone(dag_run2.run_id) self.assertNotEqual('', dag_run2.run_id) self.assertEqual( datetime_tz(2015, 1, 2, 0, 0) + delta, dag_run2.execution_date, msg='dag_run2.execution_date did not match expectation: {0}'. format(dag_run2.execution_date)) self.assertEqual(State.RUNNING, dag_run2.state) self.assertFalse(dag_run2.external_trigger) dag.clear() self._clean_up(dag_id)
def test_removed_task_instances_can_be_restored(self): def with_all_tasks_removed(dag): return DAG(dag_id=dag.dag_id, start_date=dag.start_date) dag = DAG('test_task_restoration', start_date=DEFAULT_DATE) dag.add_task(DummyOperator(task_id='flaky_task', owner='test')) dagrun = self.create_dag_run(dag) flaky_ti = dagrun.get_task_instances()[0] self.assertEqual('flaky_task', flaky_ti.task_id) self.assertEqual(State.NONE, flaky_ti.state) dagrun.dag = with_all_tasks_removed(dag) dagrun.verify_integrity() flaky_ti.refresh_from_db() self.assertEqual(State.NONE, flaky_ti.state) dagrun.dag.add_task(DummyOperator(task_id='flaky_task', owner='test')) dagrun.verify_integrity() flaky_ti.refresh_from_db() self.assertEqual(State.NONE, flaky_ti.state)
def test_task_instance_mutation_hook(self, state, mock_hook): def mutate_task_instance(task_instance): if task_instance.queue == 'queue1': task_instance.queue = 'queue2' else: task_instance.queue = 'queue1' mock_hook.side_effect = mutate_task_instance dag = DAG('test_task_instance_mutation_hook', start_date=DEFAULT_DATE) dag.add_task(DummyOperator(task_id='task_to_mutate', owner='test', queue='queue1')) dagrun = self.create_dag_run(dag) task = dagrun.get_task_instances()[0] session = settings.Session() task.state = state session.merge(task) session.commit() assert task.queue == 'queue2' dagrun.verify_integrity() task = dagrun.get_task_instances()[0] assert task.queue == 'queue1'
def test_timezone_awareness(self): NAIVE_DATETIME = DEFAULT_DATE.replace(tzinfo=None) # check ti without dag (just for bw compat) op_no_dag = DummyOperator(task_id='op_no_dag') ti = TI(task=op_no_dag, execution_date=NAIVE_DATETIME) self.assertEqual(ti.execution_date, DEFAULT_DATE) # check with dag without localized execution_date dag = DAG('dag', start_date=DEFAULT_DATE) op1 = DummyOperator(task_id='op_1') dag.add_task(op1) ti = TI(task=op1, execution_date=NAIVE_DATETIME) self.assertEqual(ti.execution_date, DEFAULT_DATE) # with dag and localized execution_date tz = pendulum.timezone("Europe/Amsterdam") execution_date = timezone.datetime(2016, 1, 1, 1, 0, 0, tzinfo=tz) utc_date = timezone.convert_to_utc(execution_date) ti = TI(task=op1, execution_date=execution_date) self.assertEqual(ti.execution_date, utc_date)
def test_set_task_dates(self): """ Test that tasks properly take start/end dates from DAGs """ dag = DAG('dag', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10)) op1 = DummyOperator(task_id='op_1', owner='test') self.assertTrue(op1.start_date is None and op1.end_date is None) # dag should assign its dates to op1 because op1 has no dates dag.add_task(op1) self.assertTrue(op1.start_date == dag.start_date and op1.end_date == dag.end_date) op2 = DummyOperator( task_id='op_2', owner='test', start_date=DEFAULT_DATE - datetime.timedelta(days=1), end_date=DEFAULT_DATE + datetime.timedelta(days=11)) # dag should assign its dates to op2 because they are more restrictive dag.add_task(op2) self.assertTrue(op2.start_date == dag.start_date and op2.end_date == dag.end_date) op3 = DummyOperator(task_id='op_3', owner='test', start_date=DEFAULT_DATE + datetime.timedelta(days=1), end_date=DEFAULT_DATE + datetime.timedelta(days=9)) # op3 should keep its dates because they are more restrictive dag.add_task(op3) self.assertTrue(op3.start_date == DEFAULT_DATE + datetime.timedelta(days=1)) self.assertTrue(op3.end_date == DEFAULT_DATE + datetime.timedelta(days=9))
def test_set_task_dates(self): """ Test that tasks properly take start/end dates from DAGs """ dag = DAG('dag', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10)) op1 = DummyOperator(task_id='op_1', owner='test') self.assertTrue(op1.start_date is None and op1.end_date is None) # dag should assign its dates to op1 because op1 has no dates dag.add_task(op1) self.assertTrue( op1.start_date == dag.start_date and op1.end_date == dag.end_date) op2 = DummyOperator( task_id='op_2', owner='test', start_date=DEFAULT_DATE - datetime.timedelta(days=1), end_date=DEFAULT_DATE + datetime.timedelta(days=11)) # dag should assign its dates to op2 because they are more restrictive dag.add_task(op2) self.assertTrue( op2.start_date == dag.start_date and op2.end_date == dag.end_date) op3 = DummyOperator( task_id='op_3', owner='test', start_date=DEFAULT_DATE + datetime.timedelta(days=1), end_date=DEFAULT_DATE + datetime.timedelta(days=9)) # op3 should keep its dates because they are more restrictive dag.add_task(op3) self.assertTrue( op3.start_date == DEFAULT_DATE + datetime.timedelta(days=1)) self.assertTrue( op3.end_date == DEFAULT_DATE + datetime.timedelta(days=9))
from airflow.models import DAG from datetime import datetime default_args = { 'owner': 'max', 'start_date': datetime(2014, 11, 1), } dag = DAG(dag_id='example_1') # dag = DAG(dag_id='example_1', executor=SequentialExecutor()) cmd = 'ls -l' run_this_last = DummyOperator( task_id='run_this_last', default_args=default_args) dag.add_task(run_this_last) run_this = BashOperator( task_id='run_after_loop', bash_command='echo 1', default_args=default_args) dag.add_task(run_this) run_this.set_downstream(run_this_last) for i in range(9): i = str(i) task = BashOperator( task_id='runme_'+i, bash_command='sleep 5', default_args=default_args) task.set_downstream(run_this) dag.add_task(task)
from airflow.operators import BashOperator, MySqlOperator from airflow.models import DAG from datetime import datetime default_args = { 'owner': 'max', 'start_date': datetime(2014, 9, 1), 'mysql_dbid': 'local_mysql', } dag = DAG(dag_id='example_3') run_this = BashOperator(task_id='also_run_this', bash_command='ls -l', **default_args) dag.add_task(run_this) for i in range(5): i = str(i) task = BashOperator(task_id='runme_' + i, bash_command='sleep {{ 10 + macros.random() * 10 }}', **default_args) task.set_upstream(run_this) dag.add_task(task)
from airflow.models import DAG from datetime import datetime default_args = { 'owner': 'mistercrunch', 'start_date': datetime(2014, 10, 1), 'depends_on_past': True, } dag = DAG(dag_id='example_2') cmd = 'ls -l' run_this_last = BashOperator(task_id='run_this_last', bash_command='echo 1', **default_args) dag.add_task(run_this_last) run_this = BashOperator(task_id='run_this', bash_command='echo 1', **default_args) dag.add_task(run_this) run_this.set_downstream(run_this_last) for i in range(10): i = str(i) task = BashOperator(task_id='runme_' + i, bash_command='sleep 10', default_args=default_args) task.set_downstream(run_this) dag.add_task(task)
from airflow.operators import BashOperator, MySqlOperator from airflow.models import DAG from datetime import datetime default_args = { 'owner': 'max', 'start_date': datetime(2014, 9, 1), 'mysql_dbid': 'local_mysql', } dag = DAG(dag_id='example_3') run_this = BashOperator( task_id='also_run_this', bash_command='ls -l', **default_args) dag.add_task(run_this) for i in range(5): i = str(i) task = BashOperator( task_id='runme_'+i, bash_command='sleep {{ 10 + macros.random() * 10 }}', **default_args) task.set_upstream(run_this) dag.add_task(task)