def test_schedule_dag_once(self): """ Tests scheduling a dag scheduled for @once - should be scheduled the first time it is called, and not scheduled the second. """ dag = DAG(TEST_DAG_ID+'test_schedule_dag_once') dag.schedule_interval = '@once' dag.tasks = [models.BaseOperator(task_id="faketastic", owner='Also fake', start_date=datetime(2015, 1, 2, 0, 0))] dag_run = jobs.SchedulerJob(test_mode=True).schedule_dag(dag) dag_run2 = jobs.SchedulerJob(test_mode=True).schedule_dag(dag) assert dag_run is not None assert dag_run2 is None
def scheduler(args): print(settings.HEADER) job = jobs.SchedulerJob( dag_id=args.dag_id, subdir=process_subdir(args.subdir), num_runs=args.num_runs, do_pickle=args.do_pickle) if args.daemon: pid, stdout, stderr, log_file = setup_locations("scheduler", args.pid, args.stdout, args.stderr, args.log_file) handle = setup_logging(log_file) stdout = open(stdout, 'w+') stderr = open(stderr, 'w+') ctx = daemon.DaemonContext( pidfile=TimeoutPIDLockFile(pid, -1), files_preserve=[handle], stdout=stdout, stderr=stderr, ) with ctx: job.run() stdout.close() stderr.close() else: signal.signal(signal.SIGINT, sigint_handler) signal.signal(signal.SIGTERM, sigint_handler) job.run()
def test_schedule_dag_fake_scheduled_previous(self): """ Test scheduling a dag where there is a prior DagRun which has the same run_id as the next run should have """ delta = timedelta(hours=1) dag = DAG(TEST_DAG_ID + 'test_schedule_dag_fake_scheduled_previous', schedule_interval=delta, start_date=DEFAULT_DATE) dag.tasks = [ models.BaseOperator(task_id="faketastic", owner='Also fake', start_date=DEFAULT_DATE) ] scheduler = jobs.SchedulerJob(test_mode=True) trigger = models.DagRun(dag_id=dag.dag_id, run_id=models.DagRun.id_for_date(DEFAULT_DATE), execution_date=DEFAULT_DATE, state=utils.State.SUCCESS, external_trigger=True) settings.Session().add(trigger) settings.Session().commit() dag_run = scheduler.schedule_dag(dag) assert dag_run is not None assert dag_run.dag_id == dag.dag_id assert dag_run.run_id is not None assert dag_run.run_id != '' assert dag_run.execution_date == DEFAULT_DATE + delta, ( 'dag_run.execution_date did not match expectation: {0}'.format( dag_run.execution_date)) assert dag_run.state == models.State.RUNNING assert dag_run.external_trigger == False
def scheduler(args): print(settings.HEADER) job = jobs.SchedulerJob(dag_id=args.dag_id, subdir=process_subdir(args.subdir), num_runs=args.num_runs, do_pickle=args.do_pickle) job.run()
def test_schedule_dag_start_end_dates(self): """ Tests that an attempt to schedule a task after the Dag's end_date does not succeed. """ delta = timedelta(hours=1) runs = 3 start_date = DEFAULT_DATE end_date = start_date + (runs - 1) * delta dag = DAG(self.TEST_SCHEDULE_START_END_DATES_DAG_ID, start_date=start_date, end_date=end_date, schedule_interval=delta) dag.add_task(BaseOperator(task_id='faketastic', owner='Also fake')) # Create and schedule the dag runs dag_runs = [] scheduler = jobs.SchedulerJob(**self.default_scheduler_args) for _ in range(runs): dag_runs.append(scheduler.create_dag_run(dag)) additional_dag_run = scheduler.create_dag_run(dag) for dag_run in dag_runs: self.assertIsNotNone(dag_run) self.assertIsNone(additional_dag_run)
def test_schedule_dag_fake_scheduled_previous(self): """ Test scheduling a dag where there is a prior DagRun which has the same run_id as the next run should have """ delta = timedelta(hours=1) dag = DAG(self.TEST_SCHEDULE_DAG_FAKE_SCHEDULED_PREVIOUS_DAG_ID, schedule_interval=delta, start_date=DEFAULT_DATE) dag.add_task(BaseOperator( task_id="faketastic", owner='Also fake', start_date=DEFAULT_DATE)) scheduler = jobs.SchedulerJob(**self.default_scheduler_args) dag.create_dagrun(run_id=DagRun.id_for_date(DEFAULT_DATE), execution_date=DEFAULT_DATE, state=State.SUCCESS, external_trigger=True) dag_run = scheduler.create_dag_run(dag) self.assertIsNotNone(dag_run) self.assertEqual(dag.dag_id, dag_run.dag_id) self.assertIsNotNone(dag_run.run_id) self.assertNotEqual('', dag_run.run_id) self.assertEqual( DEFAULT_DATE + delta, dag_run.execution_date, msg='dag_run.execution_date did not match expectation: {0}' .format(dag_run.execution_date) ) self.assertEqual(State.RUNNING, dag_run.state) self.assertFalse(dag_run.external_trigger)
def scheduler(args): print(settings.HEADER) log_to_stdout() job = jobs.SchedulerJob(dag_id=args.dag_id, subdir=args.subdir, num_runs=args.num_runs) job.run()
def test_schedule_dag_once(self): """ Tests scheduling a dag scheduled for @once - should be scheduled the first time it is called, and not scheduled the second. """ dag = DAG(self.TEST_SCHEDULE_ONCE_DAG_ID) dag.schedule_interval = '@once' dag.add_task(BaseOperator( task_id="faketastic", owner='Also fake', start_date=datetime(2015, 1, 2, 0, 0))) dag_run = jobs.SchedulerJob(**self.default_scheduler_args).create_dag_run(dag) dag_run2 = jobs.SchedulerJob(**self.default_scheduler_args).create_dag_run(dag) self.assertIsNotNone(dag_run) self.assertIsNone(dag_run2) dag.clear()
def test_schedule_dag_relativedelta(self): """ Tests scheduling a dag with a relativedelta schedule_interval """ delta = relativedelta(hours=+1) dag = DAG(self.TEST_SCHEDULE_RELATIVEDELTA_DAG_ID, schedule_interval=delta) dag.add_task(BaseOperator( task_id="faketastic", owner='Also fake', start_date=datetime(2015, 1, 2, 0, 0))) dag_run = jobs.SchedulerJob(**self.default_scheduler_args).create_dag_run(dag) self.assertIsNotNone(dag_run) self.assertEqual(dag.dag_id, dag_run.dag_id) self.assertIsNotNone(dag_run.run_id) self.assertNotEqual('', dag_run.run_id) self.assertEqual( datetime(2015, 1, 2, 0, 0), dag_run.execution_date, msg='dag_run.execution_date did not match expectation: {0}' .format(dag_run.execution_date) ) self.assertEqual(State.RUNNING, dag_run.state) self.assertFalse(dag_run.external_trigger) dag_run2 = jobs.SchedulerJob(**self.default_scheduler_args).create_dag_run(dag) self.assertIsNotNone(dag_run2) self.assertEqual(dag.dag_id, dag_run2.dag_id) self.assertIsNotNone(dag_run2.run_id) self.assertNotEqual('', dag_run2.run_id) self.assertEqual( datetime(2015, 1, 2, 0, 0) + delta, dag_run2.execution_date, msg='dag_run2.execution_date did not match expectation: {0}' .format(dag_run2.execution_date) ) self.assertEqual(State.RUNNING, dag_run2.state) self.assertFalse(dag_run2.external_trigger) dag.clear()
def test_schedule_dag_no_previous_runs(self): """ Tests scheduling a dag with no previous runs """ dag = DAG(TEST_DAG_ID+'test_schedule_dag_no_previous_runs') dag.tasks = [models.BaseOperator(task_id="faketastic", owner='Also fake', start_date=datetime(2015, 1, 2, 0, 0))] dag_run = jobs.SchedulerJob(test_mode=True).schedule_dag(dag) assert dag_run is not None assert dag_run.dag_id == dag.dag_id assert dag_run.run_id is not None assert dag_run.run_id != '' assert dag_run.execution_date == datetime(2015, 1, 2, 0, 0), ( 'dag_run.execution_date did not match expectation: {0}' .format(dag_run.execution_date)) assert dag_run.state == models.State.RUNNING assert dag_run.external_trigger == False
def test_schedule_dag_no_end_date_up_to_today_only(self): """ Tests that a Dag created without an end_date can only be scheduled up to and including the current datetime. For example, if today is 2016-01-01 and we are scheduling from a start_date of 2015-01-01, only jobs up to, but not including 2016-01-01 should be scheduled. """ session = settings.Session() delta = timedelta(days=1) now = utcnow() start_date = now.subtract(weeks=1) runs = (now - start_date).days dag = DAG(self.TEST_SCHEDULE_DAG_NO_END_DATE_UP_TO_TODAY_ONLY_DAG_ID, start_date=start_date, schedule_interval=delta) dag.add_task(BaseOperator(task_id='faketastic', owner='Also fake')) dag_runs = [] scheduler = jobs.SchedulerJob(**self.default_scheduler_args) for _ in range(runs): dag_run = scheduler.create_dag_run(dag) dag_runs.append(dag_run) # Mark the DagRun as complete dag_run.state = State.SUCCESS session.merge(dag_run) session.commit() # Attempt to schedule an additional dag run (for 2016-01-01) additional_dag_run = scheduler.create_dag_run(dag) for dag_run in dag_runs: self.assertIsNotNone(dag_run) self.assertIsNone(additional_dag_run)
def test_schedule_dag_no_previous_runs(self): """ Tests scheduling a dag with no previous runs """ dag = DAG(self.TEST_SCHEDULE_WITH_NO_PREVIOUS_RUNS_DAG_ID) dag.add_task(BaseOperator( task_id="faketastic", owner='Also fake', start_date=datetime(2015, 1, 2, 0, 0))) dag_run = jobs.SchedulerJob(**self.default_scheduler_args).create_dag_run(dag) self.assertIsNotNone(dag_run) self.assertEqual(dag.dag_id, dag_run.dag_id) self.assertIsNotNone(dag_run.run_id) self.assertNotEqual('', dag_run.run_id) self.assertEqual( datetime(2015, 1, 2, 0, 0), dag_run.execution_date, msg='dag_run.execution_date did not match expectation: {0}' .format(dag_run.execution_date) ) self.assertEqual(State.RUNNING, dag_run.state) self.assertFalse(dag_run.external_trigger) dag.clear()
def test_scheduler_job(self): job = jobs.SchedulerJob(dag_id='example_bash_operator', test_mode=True) job.run()
def scheduler(args): print(settings.HEADER) log_to_stdout() job = jobs.SchedulerJob(args.dag_id, args.subdir) job.run()