def test_run_scheduled_on_time_boundary(external_repo_context): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule( "simple_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime): # Start schedule exactly at midnight instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS
def test_launch_failure(external_repo_context): with instance_with_schedules( external_repo_context, overrides={ "run_launcher": {"module": "dagster.core.test_utils", "class": "ExplodingRunLauncher",}, }, ) as (instance, external_repo): external_schedule = external_repo.get_external_schedule("simple_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime): instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] validate_run_started(run, initial_datetime, "2019-02-26", expected_success=False) ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SUCCESS, run.run_id, )
def test_bad_should_execute(external_repo_context): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule("bad_should_execute_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime): instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.FAILURE, None, "Error occurred during the execution of should_execute for " "schedule bad_should_execute_schedule", )
def launch_scheduled_runs_for_schedule(instance, schedule_state, end_datetime_utc, max_catchup_runs, debug_crash_flags=None): check.inst_param(instance, "instance", DagsterInstance) check.inst_param(schedule_state, "schedule_state", ScheduleState) check.inst_param(end_datetime_utc, "end_datetime_utc", datetime.datetime) latest_tick = instance.get_latest_tick(schedule_state.schedule_origin_id) if not latest_tick: start_timestamp_utc = schedule_state.start_timestamp elif latest_tick.status == ScheduleTickStatus.STARTED: # Scheduler was interrupted while performing this tick, re-do it start_timestamp_utc = latest_tick.timestamp else: start_timestamp_utc = latest_tick.timestamp + 1 start_datetime_utc = datetime.datetime.fromtimestamp(start_timestamp_utc, tz=get_utc_timezone()) tick_times = list( croniter_range(start_datetime_utc, end_datetime_utc, schedule_state.cron_schedule)) for schedule_time_utc in tick_times[-max_catchup_runs:]: if latest_tick and latest_tick.timestamp == schedule_time_utc.timestamp( ): tick = latest_tick else: tick = instance.create_schedule_tick( ScheduleTickData( schedule_origin_id=schedule_state.schedule_origin_id, schedule_name=schedule_state.name, timestamp=schedule_time_utc.timestamp(), cron_schedule=schedule_state.cron_schedule, status=ScheduleTickStatus.STARTED, )) _check_for_debug_crash(debug_crash_flags, "TICK_CREATED") with ScheduleTickHolder(tick, instance) as tick_holder: _check_for_debug_crash(debug_crash_flags, "TICK_HELD") with RepositoryLocationHandle.create_from_repository_origin( schedule_state.origin.repository_origin, instance) as repo_location_handle: repo_location = RepositoryLocation.from_handle( repo_location_handle) _schedule_run_at_time( instance, repo_location, schedule_state, schedule_time_utc, tick_holder, debug_crash_flags, )
def test_launch_failure(external_repo_context, capfd): with central_timezone(): with instance_with_schedules( external_repo_context, overrides={ "run_launcher": { "module": "dagster.core.test_utils", "class": "ExplodingRunLauncher", }, }, ) as (instance, external_repo): external_schedule = external_repo.get_external_schedule( "simple_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime): instance.start_schedule_and_update_storage_state( external_schedule) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] validate_run_started(run, initial_datetime, "2019-02-26", expected_success=False) ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SUCCESS, run.run_id, ) captured = capfd.readouterr() assert ( captured.out == """2019-02-26 18:00:00 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule 2019-02-26 18:00:00 - dagster-scheduler - INFO - Launching run for simple_schedule at 2019-02-27 00:00:00+0000 2019-02-26 18:00:00 - dagster-scheduler - ERROR - Run {run_id} created successfully but failed to launch. """.format(run_id=instance.get_runs()[0].run_id))
def test_max_catchup_runs(): initial_datetime = datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, tzinfo=get_utc_timezone(), ) with instance_with_schedules(grpc_repo) as (instance, external_repo): with freeze_time(initial_datetime) as frozen_datetime: external_schedule = external_repo.get_external_schedule("simple_schedule") schedule_origin = external_schedule.get_origin() instance.start_schedule_and_update_storage_state(external_schedule) # Day is now March 4 at 11:59PM frozen_datetime.tick(delta=timedelta(days=5)) launch_scheduled_runs(instance, get_current_datetime_in_utc(), max_catchup_runs=2) assert instance.get_runs_count() == 2 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 2 first_datetime = datetime(year=2019, month=3, day=4, tzinfo=get_utc_timezone()) wait_for_all_runs_to_start(instance) validate_tick( ticks[0], external_schedule, first_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) validate_run_started(instance.get_runs()[0], first_datetime, "2019-03-03") second_datetime = datetime(year=2019, month=3, day=3, tzinfo=get_utc_timezone()) validate_tick( ticks[1], external_schedule, second_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[1].run_id, ) validate_run_started(instance.get_runs()[1], second_datetime, "2019-03-02")
def test_wrong_config(external_repo_context, capfd): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule( "wrong_config_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime): instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 wait_for_all_runs_to_start(instance) run = instance.get_runs()[0] validate_run_started(run, initial_datetime, "2019-02-26", expected_success=False) ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SUCCESS, run.run_id, ) run_logs = instance.all_logs(run.run_id) assert (len([ event for event in run_logs if ("DagsterInvalidConfigError" in event.dagster_event.message and event.dagster_event_type == DagsterEventType.ENGINE_EVENT) ]) > 0) captured = capfd.readouterr() assert "Failed to fetch execution plan for wrong_config_schedule" in captured.out assert "Error in config for pipeline the_pipeline" in captured.out assert 'Missing required field "solids" at the root.' in captured.out
def test_bad_load(): with schedule_instance() as instance: working_directory = os.path.dirname(__file__) recon_repo = ReconstructableRepository.for_file(__file__, "doesnt_exist", working_directory) schedule = recon_repo.get_reconstructable_schedule("also_doesnt_exist") fake_origin = schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime) as frozen_datetime: schedule_state = ScheduleState( fake_origin, ScheduleStatus.RUNNING, "0 0 * * *", get_timestamp_from_utc_datetime(get_current_datetime_in_utc()), ) instance.add_schedule_state(schedule_state) frozen_datetime.tick(delta=timedelta(seconds=1)) launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(fake_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.FAILURE assert ticks[0].timestamp == get_timestamp_from_utc_datetime( get_current_datetime_in_utc() ) assert "doesnt_exist not found at module scope in file" in ticks[0].error.message frozen_datetime.tick(delta=timedelta(days=1)) launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(fake_origin.get_id()) assert len(ticks) == 2 assert ticks[0].status == ScheduleTickStatus.FAILURE assert ticks[0].timestamp == get_timestamp_from_utc_datetime( get_current_datetime_in_utc() ) assert "doesnt_exist not found at module scope in file" in ticks[0].error.message
def test_wrong_config(external_repo_context): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule("wrong_config_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime): instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 wait_for_all_runs_to_start(instance) run = instance.get_runs()[0] validate_run_started(run, initial_datetime, "2019-02-26", expected_success=False) ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SUCCESS, run.run_id, ) run_logs = instance.all_logs(run.run_id) assert ( len( [ event for event in run_logs if ( "DagsterInvalidConfigError" in event.dagster_event.message and event.dagster_event_type == DagsterEventType.ENGINE_EVENT ) ] ) > 0 )
def test_skip(external_repo_context): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule("skip_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime): instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SKIPPED, None, )
def test_bad_should_execute(external_repo_context, capfd): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule( "bad_should_execute_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime): instance.start_schedule_and_update_storage_state(external_schedule) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.FAILURE, None, "Error occurred during the execution of should_execute for " "schedule bad_should_execute_schedule", ) captured = capfd.readouterr() assert ( "Failed to fetch schedule data for bad_should_execute_schedule: " ) in captured.out assert ("Error occurred during the execution of should_execute " "for schedule bad_should_execute_schedule" in captured.out) assert "Exception: bananas" in captured.out
def test_skip(external_repo_context, capfd): with central_timezone(): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule( "skip_schedule") schedule_origin = external_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime): instance.start_schedule_and_update_storage_state( external_schedule) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SKIPPED, None, ) captured = capfd.readouterr() assert ( captured.out == """2019-02-26 18:00:00 - dagster-scheduler - INFO - Checking for new runs for the following schedules: skip_schedule 2019-02-26 18:00:00 - dagster-scheduler - INFO - Launching run for skip_schedule at 2019-02-27 00:00:00+0000 2019-02-26 18:00:00 - dagster-scheduler - INFO - should_execute returned False for skip_schedule, skipping """)
def test_multiple_schedules_on_different_time_ranges(external_repo_context): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule("simple_schedule") external_hourly_schedule = external_repo.get_external_schedule("simple_hourly_schedule") initial_datetime = datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime) as frozen_datetime: instance.start_schedule_and_update_storage_state(external_schedule) instance.start_schedule_and_update_storage_state(external_hourly_schedule) frozen_datetime.tick(delta=timedelta(seconds=2)) launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 2 ticks = instance.get_schedule_ticks(external_schedule.get_origin_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS hourly_ticks = instance.get_schedule_ticks(external_hourly_schedule.get_origin_id()) assert len(hourly_ticks) == 1 assert hourly_ticks[0].status == ScheduleTickStatus.SUCCESS frozen_datetime.tick(delta=timedelta(hours=1)) launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 3 ticks = instance.get_schedule_ticks(external_schedule.get_origin_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS hourly_ticks = instance.get_schedule_ticks(external_hourly_schedule.get_origin_id()) assert len(hourly_ticks) == 2 assert ( len([tick for tick in hourly_ticks if tick.status == ScheduleTickStatus.SUCCESS]) == 2 )
def test_simple_schedule(external_repo_context, capfd): with central_timezone(): initial_datetime = datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, tzinfo=get_utc_timezone(), ) with instance_with_schedules(external_repo_context) as (instance, external_repo): with freeze_time(initial_datetime) as frozen_datetime: external_schedule = external_repo.get_external_schedule( "simple_schedule") schedule_origin = external_schedule.get_origin() instance.start_schedule_and_update_storage_state( external_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 # launch_scheduled_runs does nothing before the first tick launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 captured = capfd.readouterr() assert ( captured.out == """2019-02-27 17:59:59 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule 2019-02-27 17:59:59 - dagster-scheduler - INFO - No new runs for simple_schedule """) # Move forward in time so we're past a tick frozen_datetime.tick(delta=timedelta(seconds=2)) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 expected_datetime = datetime(year=2019, month=2, day=28, tzinfo=get_utc_timezone()) validate_tick( ticks[0], external_schedule, expected_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) wait_for_all_runs_to_start(instance) validate_run_started(instance.get_runs()[0], expected_datetime, "2019-02-27") captured = capfd.readouterr() assert ( captured.out == """2019-02-27 18:00:01 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule 2019-02-27 18:00:01 - dagster-scheduler - INFO - Launching run for simple_schedule at 2019-02-28 00:00:00+0000 2019-02-27 18:00:01 - dagster-scheduler - INFO - Completed scheduled launch of run {run_id} for simple_schedule """.format(run_id=instance.get_runs()[0].run_id)) # Verify idempotence launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS # Verify advancing in time but not going past a tick doesn't add any new runs frozen_datetime.tick(delta=timedelta(seconds=2)) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS capfd.readouterr() # Traveling two more days in the future before running results in two new ticks frozen_datetime.tick(delta=timedelta(days=2)) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 3 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 3 assert (len([ tick for tick in ticks if tick.status == ScheduleTickStatus.SUCCESS ]) == 3) runs_by_partition = { run.tags[PARTITION_NAME_TAG]: run for run in instance.get_runs() } assert "2019-02-28" in runs_by_partition assert "2019-03-01" in runs_by_partition captured = capfd.readouterr() assert ( captured.out == """2019-03-01 18:00:03 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule 2019-03-01 18:00:03 - dagster-scheduler - INFO - Launching 2 runs for simple_schedule at the following times: 2019-03-01 00:00:00+0000, 2019-03-02 00:00:00+0000 2019-03-01 18:00:03 - dagster-scheduler - INFO - Completed scheduled launch of run {first_run_id} for simple_schedule 2019-03-01 18:00:03 - dagster-scheduler - INFO - Completed scheduled launch of run {second_run_id} for simple_schedule """.format( first_run_id=instance.get_runs()[1].run_id, second_run_id=instance.get_runs()[0].run_id, )) # Check idempotence again launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 3 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 3
def test_simple_schedule(external_repo_context): initial_datetime = datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, tzinfo=get_utc_timezone(), ) with instance_with_schedules(external_repo_context) as (instance, external_repo): with freeze_time(initial_datetime) as frozen_datetime: external_schedule = external_repo.get_external_schedule("simple_schedule") schedule_origin = external_schedule.get_origin() instance.start_schedule_and_update_storage_state(external_schedule) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 # launch_scheduled_runs does nothing before the first tick launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 0 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 0 # Move forward in time so we're past a tick frozen_datetime.tick(delta=timedelta(seconds=2)) launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 expected_datetime = datetime(year=2019, month=2, day=28, tzinfo=get_utc_timezone()) validate_tick( ticks[0], external_schedule, expected_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) wait_for_all_runs_to_start(instance) validate_run_started(instance.get_runs()[0], expected_datetime, "2019-02-27") # Verify idempotence launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS # Verify advancing in time but not going past a tick doesn't add any new runs frozen_datetime.tick(delta=timedelta(seconds=2)) launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS # Traveling two more days in the future before running results in two new ticks frozen_datetime.tick(delta=timedelta(days=2)) launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 3 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 3 assert len([tick for tick in ticks if tick.status == ScheduleTickStatus.SUCCESS]) == 3 runs_by_partition = {run.tags[PARTITION_NAME_TAG]: run for run in instance.get_runs()} assert "2019-02-28" in runs_by_partition assert "2019-03-01" in runs_by_partition # Check idempotence again launch_scheduled_runs(instance, get_current_datetime_in_utc()) assert instance.get_runs_count() == 3 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 3
def get_external_schedule_execution(recon_repo, external_schedule_execution_args): check.inst_param( recon_repo, "recon_repo", ReconstructableRepository, ) check.inst_param( external_schedule_execution_args, "external_schedule_execution_args", ExternalScheduleExecutionArgs, ) definition = recon_repo.get_definition() schedule_def = definition.get_schedule_def( external_schedule_execution_args.schedule_name) with DagsterInstance.from_ref( external_schedule_execution_args.instance_ref) as instance: scheduled_execution_time_utc = (datetime.fromtimestamp( external_schedule_execution_args.scheduled_execution_timestamp_utc, tz=get_utc_timezone(), ) if external_schedule_execution_args.scheduled_execution_timestamp_utc else None) schedule_context = ScheduleExecutionContext( instance, scheduled_execution_time_utc) schedule_execution_data_mode = external_schedule_execution_args.schedule_execution_data_mode try: with user_code_error_boundary( ScheduleExecutionError, lambda: "Error occurred during the execution of should_execute for schedule " "{schedule_name}".format(schedule_name=schedule_def.name), ): should_execute = None if (schedule_execution_data_mode == ScheduleExecutionDataMode.LAUNCH_SCHEDULED_EXECUTION): should_execute = schedule_def.should_execute( schedule_context) if not should_execute: return ExternalScheduleExecutionData( should_execute=False, run_config=None, tags=None) with user_code_error_boundary( ScheduleExecutionError, lambda: "Error occurred during the execution of run_config_fn for schedule " "{schedule_name}".format(schedule_name=schedule_def.name), ): run_config = schedule_def.get_run_config(schedule_context) with user_code_error_boundary( ScheduleExecutionError, lambda: "Error occurred during the execution of tags_fn for schedule " "{schedule_name}".format(schedule_name=schedule_def.name), ): tags = schedule_def.get_tags(schedule_context) return ExternalScheduleExecutionData(run_config=run_config, tags=tags, should_execute=should_execute) except ScheduleExecutionError: return ExternalScheduleExecutionErrorData( serializable_error_info_from_exc_info(sys.exc_info()))
def test_bad_schedule_mixed_with_good_schedule(external_repo_context): with instance_with_schedules(external_repo_context) as (instance, external_repo): good_schedule = external_repo.get_external_schedule("simple_schedule") bad_schedule = external_repo.get_external_schedule( "bad_should_execute_schedule_on_odd_days") good_origin = good_schedule.get_origin() bad_origin = bad_schedule.get_origin() initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime) as frozen_datetime: instance.start_schedule_and_update_storage_state(good_schedule) instance.start_schedule_and_update_storage_state(bad_schedule) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 1 wait_for_all_runs_to_start(instance) validate_run_started(instance.get_runs()[0], initial_datetime, "2019-02-26") good_ticks = instance.get_schedule_ticks(good_origin.get_id()) assert len(good_ticks) == 1 validate_tick( good_ticks[0], good_schedule, initial_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) bad_ticks = instance.get_schedule_ticks(bad_origin.get_id()) assert len(bad_ticks) == 1 assert bad_ticks[0].status == ScheduleTickStatus.FAILURE assert ("Error occurred during the execution of should_execute " "for schedule bad_should_execute_schedule" in bad_ticks[0].error.message) frozen_datetime.tick(delta=timedelta(days=1)) new_now = get_current_datetime_in_utc() launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 3 wait_for_all_runs_to_start(instance) good_schedule_runs = instance.get_runs( filters=PipelineRunsFilter.for_schedule(good_schedule)) assert len(good_schedule_runs) == 2 validate_run_started(good_schedule_runs[0], new_now, "2019-02-27") good_ticks = instance.get_schedule_ticks(good_origin.get_id()) assert len(good_ticks) == 2 validate_tick( good_ticks[0], good_schedule, new_now, ScheduleTickStatus.SUCCESS, good_schedule_runs[0].run_id, ) bad_schedule_runs = instance.get_runs( filters=PipelineRunsFilter.for_schedule(bad_schedule)) assert len(bad_schedule_runs) == 1 validate_run_started(bad_schedule_runs[0], new_now, "2019-02-27") bad_ticks = instance.get_schedule_ticks(bad_origin.get_id()) assert len(bad_ticks) == 2 validate_tick( bad_ticks[0], bad_schedule, new_now, ScheduleTickStatus.SUCCESS, bad_schedule_runs[0].run_id, )
def test_failure_recovery_after_run_created(external_repo_context, crash_location, crash_signal): # Verify that if the scheduler crashes or is interrupted after a run is created, # it will just re-launch the already-created run when it runs again with instance_with_schedules(external_repo_context) as (instance, external_repo): initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) external_schedule = external_repo.get_external_schedule( "simple_schedule") with freeze_time(initial_datetime) as frozen_datetime: instance.start_schedule_and_update_storage_state(external_schedule) debug_crash_flags = { external_schedule.name: { crash_location: crash_signal } } scheduler_process = multiprocessing.Process( target=_test_launch_scheduled_runs_in_subprocess, args=[ instance.get_ref(), get_current_datetime_in_utc(), debug_crash_flags ], ) scheduler_process.start() scheduler_process.join(timeout=60) assert scheduler_process.exitcode != 0 ticks = instance.get_schedule_ticks( external_schedule.get_origin_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.STARTED assert instance.get_runs_count() == 1 if crash_location == "RUN_CREATED": run = instance.get_runs()[0] # Run was created, but hasn't launched yet assert run.tags[ SCHEDULED_EXECUTION_TIME_TAG] == initial_datetime.isoformat( ) assert run.tags[PARTITION_NAME_TAG] == "2019-02-26" assert run.status == PipelineRunStatus.NOT_STARTED else: # The run was created and launched - running again should do nothing other than # moving the tick to success state. # The fact that we need to add this line indicates that there is still a theoretical # possible race condition - if the scheduler fails after launching a run # and then runs again between when the run was launched and when its status is changed to STARTED by the executor, we could # end up launching the same run twice. Run queueing or some other way to immediately # identify that a run was launched would help eliminate this race condition. For now, # eliminate the possibility by waiting for the run to start before running the # scheduler again. wait_for_all_runs_to_start(instance) run = instance.get_runs()[0] validate_run_started(instance.get_runs()[0], initial_datetime, "2019-02-26") assert run.status in [ PipelineRunStatus.STARTED, PipelineRunStatus.SUCCESS ] frozen_datetime.tick(delta=timedelta(minutes=5)) # Running again just launches the existing run and marks the tick as success scheduler_process = multiprocessing.Process( target=_test_launch_scheduled_runs_in_subprocess, args=[instance.get_ref(), get_current_datetime_in_utc(), None], ) scheduler_process.start() scheduler_process.join(timeout=60) assert scheduler_process.exitcode == 0 assert instance.get_runs_count() == 1 wait_for_all_runs_to_start(instance) validate_run_started(instance.get_runs()[0], initial_datetime, "2019-02-26") ticks = instance.get_schedule_ticks( external_schedule.get_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, )
def test_multiple_schedules_on_different_time_ranges(external_repo_context, capfd): with central_timezone(): with instance_with_schedules(external_repo_context) as (instance, external_repo): external_schedule = external_repo.get_external_schedule( "simple_schedule") external_hourly_schedule = external_repo.get_external_schedule( "simple_hourly_schedule") initial_datetime = datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, tzinfo=get_utc_timezone(), ) with freeze_time(initial_datetime) as frozen_datetime: instance.start_schedule_and_update_storage_state( external_schedule) instance.start_schedule_and_update_storage_state( external_hourly_schedule) frozen_datetime.tick(delta=timedelta(seconds=2)) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 2 ticks = instance.get_schedule_ticks( external_schedule.get_origin_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS hourly_ticks = instance.get_schedule_ticks( external_hourly_schedule.get_origin_id()) assert len(hourly_ticks) == 1 assert hourly_ticks[0].status == ScheduleTickStatus.SUCCESS captured = capfd.readouterr() assert ( captured.out == """2019-02-27 18:00:01 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_hourly_schedule, simple_schedule 2019-02-27 18:00:01 - dagster-scheduler - INFO - Launching run for simple_hourly_schedule at 2019-02-28 00:00:00+0000 2019-02-27 18:00:01 - dagster-scheduler - INFO - Completed scheduled launch of run {first_run_id} for simple_hourly_schedule 2019-02-27 18:00:01 - dagster-scheduler - INFO - Launching run for simple_schedule at 2019-02-28 00:00:00+0000 2019-02-27 18:00:01 - dagster-scheduler - INFO - Completed scheduled launch of run {second_run_id} for simple_schedule """.format( first_run_id=instance.get_runs()[1].run_id, second_run_id=instance.get_runs()[0].run_id, )) frozen_datetime.tick(delta=timedelta(hours=1)) launch_scheduled_runs(instance, get_default_scheduler_logger(), get_current_datetime_in_utc()) assert instance.get_runs_count() == 3 ticks = instance.get_schedule_ticks( external_schedule.get_origin_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.SUCCESS hourly_ticks = instance.get_schedule_ticks( external_hourly_schedule.get_origin_id()) assert len(hourly_ticks) == 2 assert (len([ tick for tick in hourly_ticks if tick.status == ScheduleTickStatus.SUCCESS ]) == 2) captured = capfd.readouterr() assert ( captured.out == """2019-02-27 19:00:01 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_hourly_schedule, simple_schedule 2019-02-27 19:00:01 - dagster-scheduler - INFO - Launching run for simple_hourly_schedule at 2019-02-28 01:00:00+0000 2019-02-27 19:00:01 - dagster-scheduler - INFO - Completed scheduled launch of run {third_run_id} for simple_hourly_schedule 2019-02-27 19:00:01 - dagster-scheduler - INFO - No new runs for simple_schedule """.format(third_run_id=instance.get_runs()[0].run_id))
def test_failure_recovery_after_tick_success(external_repo_context, crash_location, crash_signal): # Verify that if the scheduler crashes or is interrupted after a run is created, # it will just re-launch the already-created run when it runs again with instance_with_schedules(external_repo_context) as (instance, external_repo): initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) external_schedule = external_repo.get_external_schedule( "simple_schedule") with freeze_time(initial_datetime) as frozen_datetime: instance.start_schedule_and_update_storage_state(external_schedule) debug_crash_flags = { external_schedule.name: { crash_location: crash_signal } } scheduler_process = multiprocessing.Process( target=_test_launch_scheduled_runs_in_subprocess, args=[ instance.get_ref(), get_current_datetime_in_utc(), debug_crash_flags ], ) scheduler_process.start() scheduler_process.join(timeout=60) assert scheduler_process.exitcode != 0 # As above there's a possible race condition here if the scheduler crashes # and launches the same run twice if we crash right after the launch and re-run # before the run actually starts wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 1 validate_run_started(instance.get_runs()[0], initial_datetime, "2019-02-26") ticks = instance.get_schedule_ticks( external_schedule.get_origin_id()) assert len(ticks) == 1 if crash_signal == signal.SIGKILL: validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.STARTED, None, ) else: validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) frozen_datetime.tick(delta=timedelta(minutes=5)) # Running again just marks the tick as success since the run has already started scheduler_process = multiprocessing.Process( target=_test_launch_scheduled_runs_in_subprocess, args=[instance.get_ref(), get_current_datetime_in_utc(), None], ) scheduler_process.start() scheduler_process.join(timeout=60) assert scheduler_process.exitcode == 0 assert instance.get_runs_count() == 1 validate_run_started(instance.get_runs()[0], initial_datetime, "2019-02-26") ticks = instance.get_schedule_ticks( external_schedule.get_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, )
def test_max_catchup_runs(capfd): initial_datetime = datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, tzinfo=get_utc_timezone(), ) with central_timezone(): with instance_with_schedules(grpc_repo) as (instance, external_repo): with freeze_time(initial_datetime) as frozen_datetime: external_schedule = external_repo.get_external_schedule( "simple_schedule") schedule_origin = external_schedule.get_origin() instance.start_schedule_and_update_storage_state( external_schedule) # Day is now March 4 at 11:59PM frozen_datetime.tick(delta=timedelta(days=5)) launch_scheduled_runs( instance, get_default_scheduler_logger(), get_current_datetime_in_utc(), max_catchup_runs=2, ) assert instance.get_runs_count() == 2 ticks = instance.get_schedule_ticks(schedule_origin.get_id()) assert len(ticks) == 2 first_datetime = datetime(year=2019, month=3, day=4, tzinfo=get_utc_timezone()) wait_for_all_runs_to_start(instance) validate_tick( ticks[0], external_schedule, first_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) validate_run_started(instance.get_runs()[0], first_datetime, "2019-03-03") second_datetime = datetime(year=2019, month=3, day=3, tzinfo=get_utc_timezone()) validate_tick( ticks[1], external_schedule, second_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[1].run_id, ) validate_run_started(instance.get_runs()[1], second_datetime, "2019-03-02") captured = capfd.readouterr() assert ( captured.out == """2019-03-04 17:59:59 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule 2019-03-04 17:59:59 - dagster-scheduler - WARNING - simple_schedule has fallen behind, only launching 2 runs 2019-03-04 17:59:59 - dagster-scheduler - INFO - Launching 2 runs for simple_schedule at the following times: 2019-03-03 00:00:00+0000, 2019-03-04 00:00:00+0000 2019-03-04 17:59:59 - dagster-scheduler - INFO - Completed scheduled launch of run {first_run_id} for simple_schedule 2019-03-04 17:59:59 - dagster-scheduler - INFO - Completed scheduled launch of run {second_run_id} for simple_schedule """.format( first_run_id=instance.get_runs()[1].run_id, second_run_id=instance.get_runs()[0].run_id, ))
def test_failure_recovery_before_run_created(external_repo_context, crash_location, crash_signal): # Verify that if the scheduler crashes or is interrupted before a run is created, # it will create exactly one tick/run when it is re-launched with instance_with_schedules(external_repo_context) as (instance, external_repo): initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) external_schedule = external_repo.get_external_schedule( "simple_schedule") with freeze_time(initial_datetime) as frozen_datetime: instance.start_schedule_and_update_storage_state(external_schedule) debug_crash_flags = { external_schedule.name: { crash_location: crash_signal } } scheduler_process = multiprocessing.Process( target=_test_launch_scheduled_runs_in_subprocess, args=[ instance.get_ref(), get_current_datetime_in_utc(), debug_crash_flags ], ) scheduler_process.start() scheduler_process.join(timeout=60) assert scheduler_process.exitcode != 0 ticks = instance.get_schedule_ticks( external_schedule.get_origin_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.STARTED assert instance.get_runs_count() == 0 frozen_datetime.tick(delta=timedelta(minutes=5)) scheduler_process = multiprocessing.Process( target=_test_launch_scheduled_runs_in_subprocess, args=[instance.get_ref(), get_current_datetime_in_utc(), None], ) scheduler_process.start() scheduler_process.join(timeout=60) assert scheduler_process.exitcode == 0 assert instance.get_runs_count() == 1 wait_for_all_runs_to_start(instance) validate_run_started(instance.get_runs()[0], initial_datetime, "2019-02-26") ticks = instance.get_schedule_ticks( external_schedule.get_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, )
def launch_scheduled_runs_for_schedule(instance, logger, schedule_state, end_datetime_utc, max_catchup_runs, debug_crash_flags=None): check.inst_param(instance, "instance", DagsterInstance) check.inst_param(schedule_state, "schedule_state", ScheduleState) check.inst_param(end_datetime_utc, "end_datetime_utc", datetime.datetime) latest_tick = instance.get_latest_tick(schedule_state.schedule_origin_id) if not latest_tick: start_timestamp_utc = schedule_state.start_timestamp elif latest_tick.status == ScheduleTickStatus.STARTED: # Scheduler was interrupted while performing this tick, re-do it start_timestamp_utc = latest_tick.timestamp else: start_timestamp_utc = latest_tick.timestamp + 1 start_datetime_utc = datetime.datetime.fromtimestamp(start_timestamp_utc, tz=get_utc_timezone()) tick_times = list( croniter_range(start_datetime_utc, end_datetime_utc, schedule_state.cron_schedule)) if not tick_times: logger.info("No new runs for {schedule_name}".format( schedule_name=schedule_state.name)) return if len(tick_times) > max_catchup_runs: logger.warn( "{schedule_name} has fallen behind, only launching {max_catchup_runs} runs" .format(schedule_name=schedule_state.name, max_catchup_runs=max_catchup_runs)) tick_times = tick_times[-max_catchup_runs:] if len(tick_times) == 1: logger.info("Launching run for {schedule_name} at {time}".format( schedule_name=schedule_state.name, time=tick_times[0].strftime(_SCHEDULER_DATETIME_FORMAT), )) else: logger.info( "Launching {num_runs} runs for {schedule_name} at the following times: {times}" .format( num_runs=len(tick_times), schedule_name=schedule_state.name, times=", ".join([ time.strftime(_SCHEDULER_DATETIME_FORMAT) for time in tick_times ]), )) for schedule_time_utc in tick_times: schedule_timestamp = get_timestamp_from_utc_datetime(schedule_time_utc) if latest_tick and latest_tick.timestamp == schedule_timestamp: tick = latest_tick logger.info("Resuming previously interrupted schedule execution") else: tick = instance.create_schedule_tick( ScheduleTickData( schedule_origin_id=schedule_state.schedule_origin_id, schedule_name=schedule_state.name, timestamp=schedule_timestamp, cron_schedule=schedule_state.cron_schedule, status=ScheduleTickStatus.STARTED, )) _check_for_debug_crash(debug_crash_flags, "TICK_CREATED") with ScheduleTickHolder(tick, instance, logger) as tick_holder: _check_for_debug_crash(debug_crash_flags, "TICK_HELD") with RepositoryLocationHandle.create_from_repository_origin( schedule_state.origin.repository_origin, instance) as repo_location_handle: repo_location = RepositoryLocation.from_handle( repo_location_handle) _schedule_run_at_time( instance, logger, repo_location, schedule_state, schedule_time_utc, tick_holder, debug_crash_flags, )
def test_failure_recovery_before_run_created(external_repo_context, crash_location, crash_signal, capfd): with central_timezone(): # Verify that if the scheduler crashes or is interrupted before a run is created, # it will create exactly one tick/run when it is re-launched with instance_with_schedules(external_repo_context) as (instance, external_repo): initial_datetime = datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, tzinfo=get_utc_timezone(), ) external_schedule = external_repo.get_external_schedule( "simple_schedule") with freeze_time(initial_datetime) as frozen_datetime: instance.start_schedule_and_update_storage_state( external_schedule) debug_crash_flags = { external_schedule.name: { crash_location: crash_signal } } scheduler_process = multiprocessing.Process( target=_test_launch_scheduled_runs_in_subprocess, args=[ instance.get_ref(), get_current_datetime_in_utc(), debug_crash_flags ], ) scheduler_process.start() scheduler_process.join(timeout=60) assert scheduler_process.exitcode != 0 captured = capfd.readouterr() assert ( captured.out == """2019-02-26 18:00:00 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule 2019-02-26 18:00:00 - dagster-scheduler - INFO - Launching run for simple_schedule at 2019-02-27 00:00:00+0000 """) ticks = instance.get_schedule_ticks( external_schedule.get_origin_id()) assert len(ticks) == 1 assert ticks[0].status == ScheduleTickStatus.STARTED assert instance.get_runs_count() == 0 frozen_datetime.tick(delta=timedelta(minutes=5)) scheduler_process = multiprocessing.Process( target=_test_launch_scheduled_runs_in_subprocess, args=[ instance.get_ref(), get_current_datetime_in_utc(), None ], ) scheduler_process.start() scheduler_process.join(timeout=60) assert scheduler_process.exitcode == 0 assert instance.get_runs_count() == 1 wait_for_all_runs_to_start(instance) validate_run_started(instance.get_runs()[0], initial_datetime, "2019-02-26") ticks = instance.get_schedule_ticks( external_schedule.get_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_schedule, initial_datetime, ScheduleTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) captured = capfd.readouterr() assert ( captured.out == """2019-02-26 18:05:00 - dagster-scheduler - INFO - Checking for new runs for the following schedules: simple_schedule 2019-02-26 18:05:00 - dagster-scheduler - INFO - Launching run for simple_schedule at 2019-02-27 00:00:00+0000 2019-02-26 18:05:00 - dagster-scheduler - INFO - Resuming previously interrupted schedule execution 2019-02-26 18:05:00 - dagster-scheduler - INFO - Completed scheduled launch of run {run_id} for simple_schedule """.format(run_id=instance.get_runs()[0].run_id))