def test_get_unloadable_job(graphql_context): instance = graphql_context.instance initial_datetime = pendulum.datetime(year=2019, month=2, day=27, hour=23, minute=59, second=59,) with pendulum.test(initial_datetime): instance.add_job_state( JobState( _get_unloadable_schedule_origin("unloadable_running"), JobType.SCHEDULE, JobStatus.RUNNING, ScheduleJobData("0 0 * * *", pendulum.now("UTC").timestamp(),), ) ) instance.add_job_state( JobState( _get_unloadable_schedule_origin("unloadable_stopped"), JobType.SCHEDULE, JobStatus.STOPPED, ScheduleJobData("0 0 * * *", pendulum.now("UTC").timestamp(),), ) ) result = execute_dagster_graphql(graphql_context, GET_UNLOADABLE_QUERY) assert len(result.data["unloadableJobStatesOrError"]["results"]) == 1 assert result.data["unloadableJobStatesOrError"]["results"][0]["name"] == "unloadable_running"
def test_reconcile_schedule_without_start_time(): with TemporaryDirectory() as tempdir: instance = define_scheduler_instance(tempdir) with get_test_external_repo() as external_repo: external_schedule = external_repo.get_external_schedule( "no_config_pipeline_daily_schedule") legacy_schedule_state = JobState( external_schedule.get_external_origin(), JobType.SCHEDULE, JobStatus.RUNNING, ScheduleJobData(external_schedule.cron_schedule, None), ) instance.add_job_state(legacy_schedule_state) instance.reconcile_scheduler_state( external_repository=external_repo) reconciled_schedule_state = instance.get_job_state( external_schedule.get_external_origin_id()) assert reconciled_schedule_state.status == JobStatus.RUNNING assert (reconciled_schedule_state.job_specific_data.start_timestamp == get_timestamp_from_utc_datetime( get_current_datetime_in_utc()))
def test_launch_failure(external_repo_context, capfd): freeze_datetime = pendulum.datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, ).in_tz("US/Central") with instance_with_sensors( external_repo_context, overrides={ "run_launcher": {"module": "dagster.core.test_utils", "class": "ExplodingRunLauncher",}, }, ) as (instance, external_repo): with pendulum.test(freeze_datetime): external_sensor = external_repo.get_external_sensor("always_on_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING) ) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 0 list(execute_sensor_iteration(instance, get_default_daemon_logger("SensorDaemon"))) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.SUCCESS, [run.run_id] ) captured = capfd.readouterr() assert ( "Run {run_id} created successfully but failed to launch.".format(run_id=run.run_id) ) in captured.out
def test_error_sensor(external_repo_context, capfd): freeze_datetime = pendulum.datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, ).in_tz("US/Central") with instance_with_sensors(external_repo_context) as (instance, external_repo): with pendulum.test(freeze_datetime): external_sensor = external_repo.get_external_sensor("error_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING) ) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 0 list(execute_sensor_iteration(instance, get_default_daemon_logger("SensorDaemon"))) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.FAILURE, [], "Error occurred during the execution of evaluation_fn for sensor error_sensor", ) captured = capfd.readouterr() assert ("Failed to resolve sensor for error_sensor : ") in captured.out assert ( "Error occurred during the execution of evaluation_fn for sensor error_sensor" ) in captured.out
def test_custom_interval_sensor(external_repo_context): freeze_datetime = pendulum.datetime(year=2019, month=2, day=28).in_tz("US/Central") with instance_with_sensors(external_repo_context) as (instance, external_repo): with pendulum.test(freeze_datetime): external_sensor = external_repo.get_external_sensor("custom_interval_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING) ) ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 0 list(execute_sensor_iteration(instance, get_default_daemon_logger("SensorDaemon"))) ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 1 validate_tick(ticks[0], external_sensor, freeze_datetime, JobTickStatus.SKIPPED) freeze_datetime = freeze_datetime.add(seconds=30) with pendulum.test(freeze_datetime): list(execute_sensor_iteration(instance, get_default_daemon_logger("SensorDaemon"))) ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) # no additional tick created after 30 seconds assert len(ticks) == 1 freeze_datetime = freeze_datetime.add(seconds=30) with pendulum.test(freeze_datetime): list(execute_sensor_iteration(instance, get_default_daemon_logger("SensorDaemon"))) ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 2 expected_datetime = pendulum.datetime(year=2019, month=2, day=28, hour=0, minute=1) validate_tick(ticks[0], external_sensor, expected_datetime, JobTickStatus.SKIPPED)
def test_custom_interval_sensor_with_offset(external_repo_context, monkeypatch): freeze_datetime = to_timezone( create_pendulum_time(year=2019, month=2, day=28, tz="UTC"), "US/Central") sleeps = [] def fake_sleep(s): sleeps.append(s) pendulum.set_test_now(pendulum.now().add(seconds=s)) monkeypatch.setattr(time, "sleep", fake_sleep) with instance_with_sensors(external_repo_context) as ( instance, grpc_server_registry, external_repo, ): with pendulum.test(freeze_datetime): # 60 second custom interval external_sensor = external_repo.get_external_sensor( "custom_interval_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING)) # create a tick evaluate_sensors(instance, grpc_server_registry) ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 1 # calling for another iteration should not generate another tick because time has not # advanced evaluate_sensors(instance, grpc_server_registry) ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 1 # call the sensor_iteration_loop, which should loop, and call the monkeypatched sleep # to advance 30 seconds list( execute_sensor_iteration_loop( instance, grpc_server_registry, get_default_daemon_logger("SensorDaemon"), daemon_shutdown_event=None, until=freeze_datetime.add(seconds=65).timestamp(), )) assert pendulum.now() == freeze_datetime.add(seconds=65) ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 2 assert sum(sleeps) == 65
def get_default_job_state(self, _instance): from dagster.core.scheduler.job import JobState, JobStatus, SensorJobData return JobState( self.get_external_origin(), JobType.SENSOR, JobStatus.STOPPED, SensorJobData(min_interval=self.min_interval_seconds), )
def build_schedule( cls, schedule_name, cron_schedule, status=JobStatus.STOPPED, ): return JobState( cls.fake_repo_target().get_job_origin(schedule_name), JobType.SCHEDULE, status, ScheduleJobData(cron_schedule, start_timestamp=None), )
def get_default_job_state(self): from dagster.core.scheduler.job import JobState, JobStatus, ScheduleJobData return JobState( self.get_external_origin(), JobType.SCHEDULE, JobStatus.STOPPED, ScheduleJobData(self.cron_schedule, start_timestamp=None), )
def test_simple_sensor(external_repo_context, capfd): freeze_datetime = pendulum.datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, ).in_tz("US/Central") with instance_with_sensors(external_repo_context) as (instance, external_repo): with pendulum.test(freeze_datetime): external_sensor = external_repo.get_external_sensor("simple_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING) ) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 0 list(execute_sensor_iteration(instance, get_default_daemon_logger("SensorDaemon"))) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.SKIPPED, ) captured = capfd.readouterr() assert ( captured.out == """2019-02-27 17:59:59 - SensorDaemon - INFO - Checking for new runs for the following sensors: simple_sensor 2019-02-27 17:59:59 - SensorDaemon - INFO - Sensor returned false for simple_sensor, skipping """ ) freeze_datetime = freeze_datetime.add(seconds=1) with pendulum.test(freeze_datetime): list(execute_sensor_iteration(instance, get_default_daemon_logger("SensorDaemon"))) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] validate_run_started(run) ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 2 expected_datetime = pendulum.datetime(year=2019, month=2, day=28) validate_tick( ticks[0], external_sensor, expected_datetime, JobTickStatus.SUCCESS, [run.run_id], ) captured = capfd.readouterr() assert ( captured.out == """2019-02-27 18:00:00 - SensorDaemon - INFO - Checking for new runs for the following sensors: simple_sensor 2019-02-27 18:00:00 - SensorDaemon - INFO - Launching run for simple_sensor 2019-02-27 18:00:00 - SensorDaemon - INFO - Completed launch of run {run_id} for simple_sensor """.format( run_id=run.run_id ) )
def _create_new_schedule_state(self, instance, external_schedule): schedule_state = JobState( external_schedule.get_external_origin(), JobType.SCHEDULE, JobStatus.STOPPED, ScheduleJobData(external_schedule.cron_schedule), ) instance.add_job_state(schedule_state) return schedule_state
def test_launch_once(external_repo_context, capfd): freeze_datetime = pendulum.datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, ).in_tz("US/Central") with instance_with_sensors(external_repo_context) as (instance, external_repo): with pendulum.test(freeze_datetime): external_sensor = external_repo.get_external_sensor( "run_key_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING)) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 0 execute_sensor_iteration(instance, get_default_daemon_logger("SensorDaemon")) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.SUCCESS, expected_run_ids=[run.run_id], ) # run again, ensure execute_sensor_iteration(instance, get_default_daemon_logger("SensorDaemon")) assert instance.get_runs_count() == 1 ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 2 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.SKIPPED, ) captured = capfd.readouterr() assert f"Run {run.run_id} already completed with the run key `only_once` for run_key_sensor"
def test_wrong_config_sensor(external_repo_context, capfd): freeze_datetime = pendulum.datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, ).in_tz("US/Central") with instance_with_sensors(external_repo_context) as (instance, external_repo): with pendulum.test(freeze_datetime): external_sensor = external_repo.get_external_sensor("wrong_config_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING) ) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 0 list(execute_sensor_iteration(instance, get_default_daemon_logger("SensorDaemon"))) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.FAILURE, [], "Error in config for pipeline the_pipeline", ) captured = capfd.readouterr() assert ("Error in config for pipeline the_pipeline") in captured.out # Error repeats on subsequent ticks list(execute_sensor_iteration(instance, get_default_daemon_logger("SensorDaemon"))) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 2 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.FAILURE, [], "Error in config for pipeline the_pipeline", ) captured = capfd.readouterr() assert ("Error in config for pipeline the_pipeline") in captured.out
def test_bad_load_sensor_repository(external_repo_context, capfd): freeze_datetime = to_timezone( create_pendulum_time(year=2019, month=2, day=27, hour=23, minute=59, second=59, tz="UTC"), "US/Central", ) with instance_with_sensors(external_repo_context) as ( instance, grpc_server_registry, external_repo, ): with pendulum.test(freeze_datetime): external_sensor = external_repo.get_external_sensor( "simple_sensor") valid_origin = external_sensor.get_external_origin() # Swap out a new repository name invalid_repo_origin = ExternalJobOrigin( ExternalRepositoryOrigin( valid_origin.external_repository_origin. repository_location_origin, "invalid_repo_name", ), valid_origin.job_name, ) instance.add_job_state( JobState(invalid_repo_origin, JobType.SENSOR, JobStatus.RUNNING)) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks(invalid_repo_origin.get_id()) assert len(ticks) == 0 evaluate_sensors(instance, grpc_server_registry) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks(invalid_repo_origin.get_id()) assert len(ticks) == 0 captured = capfd.readouterr() assert "Sensor daemon caught an error for sensor simple_sensor" in captured.out assert ( "Could not find repository invalid_repo_name in location test_location to run sensor simple_sensor" in captured.out)
def test_custom_interval_sensor(external_repo_context): freeze_datetime = to_timezone( create_pendulum_time(year=2019, month=2, day=28, tz="UTC"), "US/Central") with instance_with_sensors(external_repo_context) as ( instance, grpc_server_registry, external_repo, ): with pendulum.test(freeze_datetime): external_sensor = external_repo.get_external_sensor( "custom_interval_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING)) ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 0 evaluate_sensors(instance, grpc_server_registry) ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 1 validate_tick(ticks[0], external_sensor, freeze_datetime, JobTickStatus.SKIPPED) freeze_datetime = freeze_datetime.add(seconds=30) with pendulum.test(freeze_datetime): evaluate_sensors(instance, grpc_server_registry) ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) # no additional tick created after 30 seconds assert len(ticks) == 1 freeze_datetime = freeze_datetime.add(seconds=30) with pendulum.test(freeze_datetime): evaluate_sensors(instance, grpc_server_registry) ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 2 expected_datetime = create_pendulum_time(year=2019, month=2, day=28, hour=0, minute=1) validate_tick(ticks[0], external_sensor, expected_datetime, JobTickStatus.SKIPPED)
def test_error_sensor(external_repo_context, capfd): freeze_datetime = to_timezone( create_pendulum_time(year=2019, month=2, day=27, hour=23, minute=59, second=59, tz="UTC"), "US/Central", ) with instance_with_sensors(external_repo_context) as ( instance, grpc_server_registry, external_repo, ): with pendulum.test(freeze_datetime): external_sensor = external_repo.get_external_sensor("error_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING)) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 0 evaluate_sensors(instance, grpc_server_registry) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.FAILURE, [], "Error occurred during the execution of evaluation_fn for sensor error_sensor", ) captured = capfd.readouterr() assert ( "Failed to resolve sensor for error_sensor : ") in captured.out assert ( "Error occurred during the execution of evaluation_fn for sensor error_sensor" ) in captured.out
def test_launch_failure(external_repo_context, capfd): freeze_datetime = to_timezone( create_pendulum_time(year=2019, month=2, day=27, hour=23, minute=59, second=59, tz="UTC"), "US/Central", ) with instance_with_sensors( external_repo_context, overrides={ "run_launcher": { "module": "dagster.core.test_utils", "class": "ExplodingRunLauncher", }, }, ) as (instance, grpc_server_registry, external_repo): with pendulum.test(freeze_datetime): external_sensor = external_repo.get_external_sensor( "always_on_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING)) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 0 evaluate_sensors(instance, grpc_server_registry) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 1 validate_tick(ticks[0], external_sensor, freeze_datetime, JobTickStatus.SUCCESS, [run.run_id]) captured = capfd.readouterr() assert ("Run {run_id} created successfully but failed to launch:". format(run_id=run.run_id)) in captured.out assert "The entire purpose of this is to throw on launch" in captured.out
def test_error_sensor_daemon(external_repo_context, monkeypatch): freeze_datetime = to_timezone( create_pendulum_time(year=2019, month=2, day=28, tz="UTC"), "US/Central") sleeps = [] def fake_sleep(s): sleeps.append(s) pendulum.set_test_now(pendulum.now().add(seconds=s)) monkeypatch.setattr(time, "sleep", fake_sleep) with instance_with_sensors( external_repo_context, overrides={ "run_launcher": { "module": "dagster.core.test_utils", "class": "ExplodingRunLauncher", }, }, ) as (instance, workspace, _external_repo): @contextmanager def _gen_workspace(_instance): yield workspace with pendulum.test(freeze_datetime): instance.add_job_state( JobState(_get_unloadable_sensor_origin(), JobType.SENSOR, JobStatus.RUNNING)) sensor_daemon = SensorDaemon.create_from_instance(instance) daemon_shutdown_event = threading.Event() sensor_daemon.run_loop( "my_uuid", daemon_shutdown_event, _gen_workspace, heartbeat_interval_seconds=DEFAULT_HEARTBEAT_INTERVAL_SECONDS, error_interval_seconds=DEFAULT_DAEMON_ERROR_INTERVAL_SECONDS, until=freeze_datetime.add(seconds=65), ) heartbeats = instance.get_daemon_heartbeats() heartbeat = heartbeats["SENSOR"] assert heartbeat assert heartbeat.errors assert len(heartbeat.errors) == DAEMON_HEARTBEAT_ERROR_LIMIT
def test_sensor_next_ticks(graphql_context): external_repository = graphql_context.get_repository_location( main_repo_location_name() ).get_repository(main_repo_name()) graphql_context.instance.reconcile_scheduler_state(external_repository) sensor_name = "always_no_config_sensor" external_sensor = external_repository.get_external_sensor(sensor_name) sensor_selector = infer_sensor_selector(graphql_context, sensor_name) result = execute_dagster_graphql( graphql_context, GET_SENSOR_QUERY, variables={"sensorSelector": sensor_selector} ) # test default sensor off assert result.data assert result.data["sensorOrError"]["__typename"] == "Sensor" next_tick = result.data["sensorOrError"]["nextTick"] assert not next_tick # test default sensor with no tick graphql_context.instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING) ) result = execute_dagster_graphql( graphql_context, GET_SENSOR_QUERY, variables={"sensorSelector": sensor_selector} ) assert result.data assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 0 assert result.data["sensorOrError"]["__typename"] == "Sensor" next_tick = result.data["sensorOrError"]["nextTick"] assert not next_tick # test default sensor with last tick list( execute_sensor_iteration( graphql_context.instance, get_default_daemon_logger("SensorDaemon") ) ) result = execute_dagster_graphql( graphql_context, GET_SENSOR_QUERY, variables={"sensorSelector": sensor_selector} ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 1 assert result.data assert result.data["sensorOrError"]["__typename"] == "Sensor" next_tick = result.data["sensorOrError"]["nextTick"] assert next_tick
def execute_cursor_command(sensor_name, cli_args, print_fn): with DagsterInstance.get() as instance: with get_repository_location_from_kwargs( instance, version=dagster_version, kwargs=cli_args) as repo_location: if bool(cli_args.get("delete")) == bool(cli_args.get("set")): # must use one of delete/set raise click.UsageError( "Must set cursor using `--set <value>` or use `--delete`") cursor_value = cli_args.get("set") external_repo = get_external_repository_from_repo_location( repo_location, cli_args.get("repository")) check_repo_and_scheduler(external_repo, instance) external_sensor = external_repo.get_external_sensor(sensor_name) job_state = instance.get_job_state( external_sensor.get_external_origin_id()) if not job_state: instance.add_job_state( JobState( external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.STOPPED, SensorJobData( min_interval=external_sensor.min_interval_seconds, cursor=cursor_value), )) else: instance.update_job_state( job_state.with_data( SensorJobData( last_tick_timestamp=job_state.job_specific_data. last_tick_timestamp, last_run_key=job_state.job_specific_data. last_run_key, min_interval=external_sensor.min_interval_seconds, cursor=cursor_value, ), )) if cursor_value: print_fn( f'Set cursor state for sensor {external_sensor.name} to "{cursor_value}"' ) else: print_fn( f"Cleared cursor state for sensor {external_sensor.name}")
def test_bad_load(capfd): with schedule_instance() as instance: fake_origin = _get_unloadable_schedule_origin() initial_datetime = pendulum.datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, ) with pendulum.test(initial_datetime): schedule_state = JobState( fake_origin, JobType.SCHEDULE, JobStatus.RUNNING, ScheduleJobData( "0 0 * * *", pendulum.now("UTC").timestamp(), ), ) instance.add_job_state(schedule_state) initial_datetime = initial_datetime.add(seconds=1) with pendulum.test(initial_datetime): launch_scheduled_runs(instance, logger(), pendulum.now("UTC")) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks(fake_origin.get_id()) assert len(ticks) == 0 captured = capfd.readouterr() assert "Scheduler failed for doesnt_exist" in captured.out assert "doesnt_exist not found at module scope" in captured.out initial_datetime = initial_datetime.add(days=1) with pendulum.test(initial_datetime): launch_scheduled_runs(instance, logger(), pendulum.now("UTC")) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks(fake_origin.get_id()) assert len(ticks) == 0
def _update_sensor_state(graphene_info, sensor_selector, job_status): instance = graphene_info.context.instance location = graphene_info.context.get_repository_location(sensor_selector.location_name) repository = location.get_repository(sensor_selector.repository_name) external_sensor = repository.get_external_job(sensor_selector.sensor_name) if not isinstance(external_sensor, ExternalSensor): raise UserFacingGraphQLError( graphene_info.schema.type_named("SensorNotFoundError")(sensor_selector.sensor_name) ) existing_job_state = instance.get_job_state(external_sensor.get_external_origin_id()) if not existing_job_state: instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, job_status) ) else: instance.update_job_state(existing_job_state.with_status(job_status)) return graphene_info.schema.type_named("Sensor")(graphene_info, external_sensor)
def test_launch_once(external_repo_context, capfd): freeze_datetime = to_timezone( create_pendulum_time( year=2019, month=2, day=27, hour=23, minute=59, second=59, tz="UTC", ), "US/Central", ) with instance_with_sensors(external_repo_context) as ( instance, grpc_server_registry, external_repo, ): with pendulum.test(freeze_datetime): external_sensor = external_repo.get_external_sensor( "run_key_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING)) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 0 evaluate_sensors(instance, grpc_server_registry) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.SUCCESS, expected_run_ids=[run.run_id], ) # run again (after 30 seconds), to ensure that the run key maintains idempotence freeze_datetime = freeze_datetime.add(seconds=30) with pendulum.test(freeze_datetime): evaluate_sensors(instance, grpc_server_registry) assert instance.get_runs_count() == 1 ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 2 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.SKIPPED, ) captured = capfd.readouterr() assert ( 'Skipping 1 run for sensor run_key_sensor already completed with run keys: ["only_once"]' in captured.out) launched_run = instance.get_runs()[0] # Manually create a new run with the same tags execute_pipeline( the_pipeline, run_config=launched_run.run_config, tags=launched_run.tags, instance=instance, ) # Sensor loop still executes freeze_datetime = freeze_datetime.add(seconds=30) with pendulum.test(freeze_datetime): evaluate_sensors(instance, grpc_server_registry) ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 3 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.SKIPPED, )
def test_wrong_config_sensor(external_repo_context, capfd): freeze_datetime = to_timezone( create_pendulum_time( year=2019, month=2, day=27, hour=23, minute=59, second=59, ), "US/Central", ) with instance_with_sensors(external_repo_context) as ( instance, grpc_server_registry, external_repo, ): with pendulum.test(freeze_datetime): external_sensor = external_repo.get_external_sensor( "wrong_config_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING)) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 0 evaluate_sensors(instance, grpc_server_registry) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.FAILURE, [], "Error in config for pipeline the_pipeline", ) captured = capfd.readouterr() assert ( "Error in config for pipeline the_pipeline") in captured.out # Error repeats on subsequent ticks evaluate_sensors(instance, grpc_server_registry) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 2 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.FAILURE, [], "Error in config for pipeline the_pipeline", ) captured = capfd.readouterr() assert ( "Error in config for pipeline the_pipeline") in captured.out
def test_simple_sensor(external_repo_context, capfd): freeze_datetime = to_timezone( create_pendulum_time(year=2019, month=2, day=27, hour=23, minute=59, second=59, tz="UTC"), "US/Central", ) with instance_with_sensors(external_repo_context) as ( instance, grpc_server_registry, external_repo, ): with pendulum.test(freeze_datetime): external_sensor = external_repo.get_external_sensor( "simple_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING)) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 0 evaluate_sensors(instance, grpc_server_registry) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.SKIPPED, ) captured = capfd.readouterr() assert ( captured.out == """2019-02-27 17:59:59 - SensorDaemon - INFO - Checking for new runs for sensor: simple_sensor 2019-02-27 17:59:59 - SensorDaemon - INFO - Sensor returned false for simple_sensor, skipping """) freeze_datetime = freeze_datetime.add(seconds=30) with pendulum.test(freeze_datetime): evaluate_sensors(instance, grpc_server_registry) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] validate_run_started(run) ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 2 expected_datetime = create_pendulum_time(year=2019, month=2, day=28, hour=0, minute=0, second=29) validate_tick( ticks[0], external_sensor, expected_datetime, JobTickStatus.SUCCESS, [run.run_id], ) captured = capfd.readouterr() assert ( captured.out == """2019-02-27 18:00:29 - SensorDaemon - INFO - Checking for new runs for sensor: simple_sensor 2019-02-27 18:00:29 - SensorDaemon - INFO - Launching run for simple_sensor 2019-02-27 18:00:29 - SensorDaemon - INFO - Completed launch of run {run_id} for simple_sensor """.format(run_id=run.run_id))
def reconcile_scheduler_state(self, instance, external_repository): """Reconcile the ExternalSchedule list from the repository and ScheduleStorage on the instance to ensure there is a 1-1 correlation between ExternalSchedule and JobStates of type JobType.SCHEDULE, where the ExternalSchedule list is the source of truth. If a new ExternalSchedule is introduced, a new JobState is added to storage with status JobStatus.STOPPED. For every previously existing ExternalSchedule (where target id is the primary key), any changes to the definition are persisted in the corresponding JobState and the status is left unchanged. The schedule is also restarted to make sure the external artifacts (such as a cron job) are up to date. For every ScheduleDefinitions that is removed, the corresponding JobState is removed from the storage and the corresponding job is ended. """ schedules_to_restart = [] for external_schedule in external_repository.get_external_schedules(): # If a schedule already exists for schedule_def, overwrite bash script and # metadata file existing_schedule_state = instance.get_job_state( external_schedule.get_external_origin_id()) if existing_schedule_state: new_timestamp = existing_schedule_state.job_specific_data.start_timestamp if not new_timestamp and existing_schedule_state.status == JobStatus.RUNNING: new_timestamp = get_timestamp_from_utc_datetime( get_current_datetime_in_utc()) # Keep the status, update target and cron schedule schedule_state = JobState( external_schedule.get_external_origin(), JobType.SCHEDULE, existing_schedule_state.status, ScheduleJobData( external_schedule.cron_schedule, new_timestamp, scheduler=self.__class__.__name__, ), ) instance.update_job_state(schedule_state) schedules_to_restart.append( (existing_schedule_state, external_schedule)) else: self._create_new_schedule_state(instance, external_schedule) # Delete all existing schedules that are not in external schedules external_schedule_origin_ids = { s.get_external_origin_id() for s in external_repository.get_external_schedules() } existing_schedule_origin_ids = set([ job.job_origin_id for job in instance.all_stored_job_state( external_repository.get_external_origin_id()) if job.job_type == JobType.SCHEDULE ]) schedule_origin_ids_to_delete = existing_schedule_origin_ids - external_schedule_origin_ids schedule_reconciliation_errors = [] for schedule_state, external_schedule in schedules_to_restart: # Restart is only needed if the schedule was previously running if schedule_state.status == JobStatus.RUNNING: try: self.refresh_schedule(instance, external_schedule) except DagsterSchedulerError as e: schedule_reconciliation_errors.append(e) if schedule_state.status == JobStatus.STOPPED: try: self.stop_schedule( instance, external_schedule.get_external_origin_id()) except DagsterSchedulerError as e: schedule_reconciliation_errors.append(e) for schedule_origin_id in schedule_origin_ids_to_delete: try: instance.stop_schedule_and_delete_from_storage( schedule_origin_id) except DagsterSchedulerError as e: schedule_reconciliation_errors.append(e) if len(schedule_reconciliation_errors): raise DagsterScheduleReconciliationError( "One or more errors were encountered by the Scheduler while starting or stopping schedules. " "Individual error messages follow:", errors=schedule_reconciliation_errors, )
def test_sensor_tick_range(graphql_context): external_repository = graphql_context.get_repository_location( main_repo_location_name() ).get_repository(main_repo_name()) graphql_context.instance.reconcile_scheduler_state(external_repository) sensor_name = "always_no_config_sensor" external_sensor = external_repository.get_external_sensor(sensor_name) sensor_selector = infer_sensor_selector(graphql_context, sensor_name) # test with no job state result = execute_dagster_graphql( graphql_context, GET_SENSOR_TICK_RANGE_QUERY, variables={"sensorSelector": sensor_selector, "dayRange": None, "dayOffset": None}, ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 0 # turn the sensor on graphql_context.instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING) ) now = pendulum.now().in_tz("US/Central") one = now.subtract(days=2).subtract(hours=1) with pendulum.test(one): _create_tick(graphql_context.instance) two = now.subtract(days=1).subtract(hours=1) with pendulum.test(two): _create_tick(graphql_context.instance) three = now.subtract(hours=1) with pendulum.test(three): _create_tick(graphql_context.instance) result = execute_dagster_graphql( graphql_context, GET_SENSOR_TICK_RANGE_QUERY, variables={"sensorSelector": sensor_selector, "dayRange": None, "dayOffset": None}, ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 3 result = execute_dagster_graphql( graphql_context, GET_SENSOR_TICK_RANGE_QUERY, variables={"sensorSelector": sensor_selector, "dayRange": 1, "dayOffset": None}, ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 1 assert result.data["sensorOrError"]["sensorState"]["ticks"][0]["timestamp"] == three.timestamp() result = execute_dagster_graphql( graphql_context, GET_SENSOR_TICK_RANGE_QUERY, variables={"sensorSelector": sensor_selector, "dayRange": 1, "dayOffset": 1}, ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 1 assert result.data["sensorOrError"]["sensorState"]["ticks"][0]["timestamp"] == two.timestamp() result = execute_dagster_graphql( graphql_context, GET_SENSOR_TICK_RANGE_QUERY, variables={ "sensorSelector": sensor_selector, "dayRange": 2, "dayOffset": None, }, ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 2
def test_failure_before_run_created(external_repo_context, crash_location, crash_signal, capfd): frozen_datetime = to_timezone( create_pendulum_time(year=2019, month=2, day=28, hour=0, minute=0, second=1, tz="UTC"), "US/Central", ) with instance_with_sensors(external_repo_context) as ( instance, _grpc_server_registry, external_repo, ): with pendulum.test(frozen_datetime): external_sensor = external_repo.get_external_sensor( "simple_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING)) # create a tick launch_process = multiprocessing.Process( target=_test_launch_sensor_runs_in_subprocess, args=[instance.get_ref(), frozen_datetime, None], ) launch_process.start() launch_process.join(timeout=60) ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 1 assert ticks[0].status == JobTickStatus.SKIPPED captured = capfd.readouterr() # create a starting tick, but crash debug_crash_flags = { external_sensor.name: { crash_location: crash_signal } } launch_process = multiprocessing.Process( target=_test_launch_sensor_runs_in_subprocess, args=[ instance.get_ref(), frozen_datetime.add(seconds=31), debug_crash_flags ], ) launch_process.start() launch_process.join(timeout=60) assert launch_process.exitcode != 0 captured = capfd.readouterr() ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 2 assert ticks[0].status == JobTickStatus.STARTED assert not int( ticks[0].timestamp) % 2 # skip condition for simple_sensor assert instance.get_runs_count() == 0 # create another tick, but ensure that the last evaluation time used is from the first, # successful tick rather than the failed tick launch_process = multiprocessing.Process( target=_test_launch_sensor_runs_in_subprocess, args=[ instance.get_ref(), frozen_datetime.add(seconds=62), None ], ) launch_process.start() launch_process.join(timeout=60) assert launch_process.exitcode == 0 wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] captured = capfd.readouterr() assert ( captured.out.replace("\r\n", "\n") == f"""2019-02-27 18:01:03 - SensorDaemon - INFO - Checking for new runs for sensor: simple_sensor 2019-02-27 18:01:03 - SensorDaemon - INFO - Launching run for simple_sensor 2019-02-27 18:01:03 - SensorDaemon - INFO - Completed launch of run {run.run_id} for simple_sensor """) ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 3 assert ticks[0].status == JobTickStatus.SUCCESS
def test_failure_after_run_launched(external_repo_context, crash_location, crash_signal, capfd): frozen_datetime = to_timezone( create_pendulum_time( year=2019, month=2, day=28, hour=0, minute=0, second=0, tz="UTC", ), "US/Central", ) with instance_with_sensors(external_repo_context) as ( instance, _grpc_server_registry, external_repo, ): with pendulum.test(frozen_datetime): external_sensor = external_repo.get_external_sensor( "run_key_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING)) # create a run, launch but crash debug_crash_flags = { external_sensor.name: { crash_location: crash_signal } } launch_process = multiprocessing.Process( target=_test_launch_sensor_runs_in_subprocess, args=[instance.get_ref(), frozen_datetime, debug_crash_flags], ) launch_process.start() launch_process.join(timeout=60) assert launch_process.exitcode != 0 ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 1 assert ticks[0].status == JobTickStatus.STARTED assert instance.get_runs_count() == 1 run = instance.get_runs()[0] wait_for_all_runs_to_start(instance) assert run.tags.get(SENSOR_NAME_TAG) == "run_key_sensor" assert run.tags.get(RUN_KEY_TAG) == "only_once" capfd.readouterr() launch_process = multiprocessing.Process( target=_test_launch_sensor_runs_in_subprocess, args=[ instance.get_ref(), frozen_datetime.add(seconds=1), None ], ) launch_process.start() launch_process.join(timeout=60) assert launch_process.exitcode == 0 wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] captured = capfd.readouterr() assert ( 'Skipping 1 run for sensor run_key_sensor already completed with run keys: ["only_once"]' in captured.out) ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 2 assert ticks[0].status == JobTickStatus.SKIPPED
def test_bad_schedules_mixed_with_good_schedule(external_repo_context, capfd): with instance_with_schedules(external_repo_context) as (instance, external_repo): good_schedule = external_repo.get_external_schedule("simple_schedule") bad_schedule = external_repo.get_external_schedule( "bad_should_execute_schedule_on_odd_days") good_origin = good_schedule.get_external_origin() bad_origin = bad_schedule.get_external_origin() unloadable_origin = _get_unloadable_schedule_origin() initial_datetime = pendulum.datetime( year=2019, month=2, day=27, hour=0, minute=0, second=0, ) with pendulum.test(initial_datetime): instance.start_schedule_and_update_storage_state(good_schedule) instance.start_schedule_and_update_storage_state(bad_schedule) unloadable_schedule_state = JobState( unloadable_origin, JobType.SCHEDULE, JobStatus.RUNNING, ScheduleJobData("0 0 * * *", pendulum.now("UTC").timestamp()), ) instance.add_job_state(unloadable_schedule_state) launch_scheduled_runs(instance, logger(), pendulum.now("UTC")) assert instance.get_runs_count() == 1 wait_for_all_runs_to_start(instance) validate_run_started( instance.get_runs()[0], execution_time=initial_datetime, partition_time=pendulum.datetime(2019, 2, 26), ) good_ticks = instance.get_job_ticks(good_origin.get_id()) assert len(good_ticks) == 1 validate_tick( good_ticks[0], good_schedule, initial_datetime, JobTickStatus.SUCCESS, instance.get_runs()[0].run_id, ) bad_ticks = instance.get_job_ticks(bad_origin.get_id()) assert len(bad_ticks) == 1 assert bad_ticks[0].status == JobTickStatus.FAILURE assert ("Error occurred during the execution of should_execute " "for schedule bad_should_execute_schedule" in bad_ticks[0].error.message) unloadable_ticks = instance.get_job_ticks( unloadable_origin.get_id()) assert len(unloadable_ticks) == 0 captured = capfd.readouterr() assert "Scheduler failed for doesnt_exist" in captured.out assert "doesnt_exist not found at module scope" in captured.out initial_datetime = initial_datetime.add(days=1) with pendulum.test(initial_datetime): new_now = pendulum.now("UTC") launch_scheduled_runs(instance, logger(), new_now) assert instance.get_runs_count() == 3 wait_for_all_runs_to_start(instance) good_schedule_runs = instance.get_runs( filters=PipelineRunsFilter.for_schedule(good_schedule)) assert len(good_schedule_runs) == 2 validate_run_started( good_schedule_runs[0], execution_time=new_now, partition_time=pendulum.datetime(2019, 2, 27), ) good_ticks = instance.get_job_ticks(good_origin.get_id()) assert len(good_ticks) == 2 validate_tick( good_ticks[0], good_schedule, new_now, JobTickStatus.SUCCESS, good_schedule_runs[0].run_id, ) bad_schedule_runs = instance.get_runs( filters=PipelineRunsFilter.for_schedule(bad_schedule)) assert len(bad_schedule_runs) == 1 validate_run_started( bad_schedule_runs[0], execution_time=new_now, partition_time=pendulum.datetime(2019, 2, 27), ) bad_ticks = instance.get_job_ticks(bad_origin.get_id()) assert len(bad_ticks) == 2 validate_tick( bad_ticks[0], bad_schedule, new_now, JobTickStatus.SUCCESS, bad_schedule_runs[0].run_id, ) unloadable_ticks = instance.get_job_ticks( unloadable_origin.get_id()) assert len(unloadable_ticks) == 0 captured = capfd.readouterr() assert "Scheduler failed for doesnt_exist" in captured.out assert "doesnt_exist not found at module scope" in captured.out