def get_current_instigator_state( self, stored_state: Optional["InstigatorState"]): from dagster.core.scheduler.instigation import ( InstigatorState, InstigatorStatus, ScheduleInstigatorData, ) if self.default_status == DefaultScheduleStatus.RUNNING: if stored_state: return stored_state return InstigatorState( self.get_external_origin(), InstigatorType.SCHEDULE, InstigatorStatus.AUTOMATICALLY_RUNNING, ScheduleInstigatorData(self.cron_schedule, start_timestamp=None), ) else: # Ignore AUTOMATICALLY_RUNNING states in the DB if the default status # isn't DefaultScheduleStatus.RUNNING - this would indicate that the schedule's # default has been changed in code but there's still a lingering AUTOMATICALLY_RUNNING # row in the database that can be ignored if stored_state and stored_state.status != InstigatorStatus.AUTOMATICALLY_RUNNING: return stored_state return InstigatorState( self.get_external_origin(), InstigatorType.SCHEDULE, InstigatorStatus.STOPPED, ScheduleInstigatorData(self.cron_schedule, start_timestamp=None), )
def test_unloadable_schedule(graphql_context): instance = graphql_context.instance initial_datetime = create_pendulum_time( year=2019, month=2, day=27, hour=23, minute=59, second=59, ) running_origin = _get_unloadable_schedule_origin("unloadable_running") running_instigator_state = InstigatorState( running_origin, InstigatorType.SCHEDULE, InstigatorStatus.RUNNING, ScheduleInstigatorData( "0 0 * * *", pendulum.now("UTC").timestamp(), ), ) stopped_origin = _get_unloadable_schedule_origin("unloadable_stopped") with pendulum.test(initial_datetime): instance.add_instigator_state(running_instigator_state) instance.add_instigator_state( InstigatorState( stopped_origin, InstigatorType.SCHEDULE, InstigatorStatus.STOPPED, ScheduleInstigatorData( "0 0 * * *", pendulum.now("UTC").timestamp(), ), )) result = execute_dagster_graphql(graphql_context, GET_UNLOADABLE_QUERY) assert len( result.data["unloadableInstigationStatesOrError"]["results"]) == 1 assert (result.data["unloadableInstigationStatesOrError"]["results"][0] ["name"] == "unloadable_running") # Verify that we can stop the unloadable schedule stop_result = execute_dagster_graphql( graphql_context, STOP_SCHEDULES_QUERY, variables={ "scheduleOriginId": running_instigator_state.instigator_origin_id, "scheduleSelectorId": running_instigator_state.selector_id, }, ) assert (stop_result.data["stopRunningSchedule"]["scheduleState"]["status"] == InstigatorStatus.STOPPED.value)
def test_update_schedule(self, storage): assert storage schedule = self.build_schedule("my_schedule", "* * * * *") storage.add_instigator_state(schedule) now_time = get_current_datetime_in_utc().timestamp() new_schedule = schedule.with_status(InstigatorStatus.RUNNING).with_data( ScheduleInstigatorData( cron_schedule=schedule.instigator_data.cron_schedule, start_timestamp=now_time, ) ) storage.update_instigator_state(new_schedule) schedules = storage.all_instigator_state( self.fake_repo_target().get_id(), self.fake_repo_target().get_selector_id(), InstigatorType.SCHEDULE, ) assert len(schedules) == 1 schedule = schedules[0] assert schedule.instigator_name == "my_schedule" assert schedule.status == InstigatorStatus.RUNNING assert schedule.instigator_data.start_timestamp == now_time stopped_schedule = schedule.with_status(InstigatorStatus.STOPPED).with_data( ScheduleInstigatorData(schedule.instigator_data.cron_schedule) ) storage.update_instigator_state(stopped_schedule) schedules = storage.all_instigator_state( self.fake_repo_target().get_id(), self.fake_repo_target().get_selector_id(), InstigatorType.SCHEDULE, ) assert len(schedules) == 1 schedule = schedules[0] assert schedule.instigator_name == "my_schedule" assert schedule.status == InstigatorStatus.STOPPED assert schedule.instigator_data.start_timestamp == None
def _create_new_schedule_state(self, instance, external_schedule): schedule_state = InstigatorState( external_schedule.get_external_origin(), InstigatorType.SCHEDULE, InstigatorStatus.STOPPED, ScheduleInstigatorData(external_schedule.cron_schedule), ) instance.add_job_state(schedule_state) return schedule_state
def test_get_unloadable_job(graphql_context): instance = graphql_context.instance initial_datetime = create_pendulum_time( year=2019, month=2, day=27, hour=23, minute=59, second=59, ) with pendulum.test(initial_datetime): instance.add_job_state( InstigatorState( _get_unloadable_schedule_origin("unloadable_running"), InstigatorType.SCHEDULE, InstigatorStatus.RUNNING, ScheduleInstigatorData( "0 0 * * *", pendulum.now("UTC").timestamp(), ), ) ) instance.add_job_state( InstigatorState( _get_unloadable_schedule_origin("unloadable_stopped"), InstigatorType.SCHEDULE, InstigatorStatus.STOPPED, ScheduleInstigatorData( "0 0 * * *", pendulum.now("UTC").timestamp(), ), ) ) result = execute_dagster_graphql(graphql_context, GET_UNLOADABLE_QUERY) assert len(result.data["unloadableInstigationStatesOrError"]["results"]) == 1 assert ( result.data["unloadableInstigationStatesOrError"]["results"][0]["name"] == "unloadable_running" )
def stop_schedule(self, instance, schedule_origin_id, schedule_selector_id, external_schedule): """ Updates the status of the given schedule to `InstigatorStatus.STOPPED` in schedule storage, This should not be overridden by subclasses. Args: schedule_origin_id (string): The id of the schedule target to stop running. """ check.str_param(schedule_origin_id, "schedule_origin_id") check.opt_inst_param(external_schedule, "external_schedule", ExternalSchedule) schedule_state = instance.get_instigator_state(schedule_origin_id, schedule_selector_id) if (external_schedule and not external_schedule. get_current_instigator_state(schedule_state).is_running) or ( schedule_state and not schedule_state.is_running): raise DagsterSchedulerError( "You have attempted to stop schedule {name}, but it is already stopped" .format(name=external_schedule.name)) if not schedule_state: assert external_schedule stopped_schedule = InstigatorState( external_schedule.get_external_origin(), InstigatorType.SCHEDULE, InstigatorStatus.STOPPED, ScheduleInstigatorData(external_schedule.cron_schedule, ), ) instance.add_instigator_state(stopped_schedule) else: stopped_schedule = schedule_state.with_status( InstigatorStatus.STOPPED).with_data( ScheduleInstigatorData(cron_schedule=schedule_state. instigator_data.cron_schedule, )) instance.update_instigator_state(stopped_schedule) return stopped_schedule
def build_schedule( cls, schedule_name, cron_schedule, status=InstigatorStatus.STOPPED, ): return InstigatorState( cls.fake_repo_target().get_instigator_origin(schedule_name), InstigatorType.SCHEDULE, status, ScheduleInstigatorData(cron_schedule, start_timestamp=None), )
def get_default_instigation_state(self): from dagster.core.scheduler.instigation import ( InstigatorState, InstigatorStatus, ScheduleInstigatorData, ) return InstigatorState( self.get_external_origin(), InstigatorType.SCHEDULE, InstigatorStatus.STOPPED, ScheduleInstigatorData(self.cron_schedule, start_timestamp=None), )
def start_schedule(self, instance, external_schedule): """ Updates the status of the given schedule to `InstigatorStatus.RUNNING` in schedule storage, This should not be overridden by subclasses. Args: instance (DagsterInstance): The current instance. external_schedule (ExternalSchedule): The schedule to start """ check.inst_param(instance, "instance", DagsterInstance) check.inst_param(external_schedule, "external_schedule", ExternalSchedule) schedule_state = instance.get_instigator_state( external_schedule.get_external_origin_id(), external_schedule.selector_id) if external_schedule.get_current_instigator_state( schedule_state).is_running: raise DagsterSchedulerError( "You have attempted to start schedule {name}, but it is already running" .format(name=external_schedule.name)) new_instigator_data = ScheduleInstigatorData( external_schedule.cron_schedule, get_current_datetime_in_utc().timestamp(), ) if not schedule_state: started_schedule = InstigatorState( external_schedule.get_external_origin(), InstigatorType.SCHEDULE, InstigatorStatus.RUNNING, new_instigator_data, ) instance.add_instigator_state(started_schedule) else: started_schedule = schedule_state.with_status( InstigatorStatus.RUNNING).with_data(new_instigator_data) instance.update_instigator_state(started_schedule) return started_schedule
def start_schedule_and_update_storage_state(self, instance, external_schedule): """ Updates the status of the given schedule to `InstigatorStatus.RUNNING` in schedule storage, then calls `start_schedule`. This should not be overridden by subclasses. Args: instance (DagsterInstance): The current instance. external_schedule (ExternalSchedule): The schedule to start """ check.inst_param(instance, "instance", DagsterInstance) check.inst_param(external_schedule, "external_schedule", ExternalSchedule) schedule_state = instance.get_job_state( external_schedule.get_external_origin_id()) if not schedule_state: schedule_state = self._create_new_schedule_state( instance, external_schedule) if schedule_state.status == InstigatorStatus.RUNNING: raise DagsterSchedulerError( "You have attempted to start schedule {name}, but it is already running" .format(name=external_schedule.name)) self.start_schedule(instance, external_schedule) started_schedule = schedule_state.with_status( InstigatorStatus.RUNNING).with_data( ScheduleInstigatorData( external_schedule.cron_schedule, get_current_datetime_in_utc().timestamp(), )) instance.update_job_state(started_schedule) return started_schedule
def stop_schedule_and_update_storage_state(self, instance, schedule_origin_id): """ Updates the status of the given schedule to `InstigatorStatus.STOPPED` in schedule storage, then calls `stop_schedule`. This should not be overridden by subclasses. Args: schedule_origin_id (string): The id of the schedule target to stop running. """ check.str_param(schedule_origin_id, "schedule_origin_id") schedule_state = self._get_schedule_state(instance, schedule_origin_id) self.stop_schedule(instance, schedule_origin_id) stopped_schedule = schedule_state.with_status( InstigatorStatus.STOPPED).with_data( ScheduleInstigatorData(cron_schedule=schedule_state. job_specific_data.cron_schedule, )) instance.update_job_state(stopped_schedule) return stopped_schedule
def launch_scheduled_runs( instance, workspace, logger, end_datetime_utc, max_catchup_runs=DEFAULT_MAX_CATCHUP_RUNS, max_tick_retries=0, debug_crash_flags=None, log_verbose_checks=True, ): check.inst_param(instance, "instance", DagsterInstance) check.inst_param(workspace, "workspace", IWorkspace) workspace_snapshot = { location_entry.origin: location_entry for location_entry in workspace.get_workspace_snapshot().values() } all_schedule_states = { schedule_state.origin.get_id(): schedule_state for schedule_state in instance.all_instigator_state(instigator_type=InstigatorType.SCHEDULE) } schedules = {} for location_entry in workspace_snapshot.values(): repo_location = location_entry.repository_location if repo_location: for repo in repo_location.get_repositories().values(): for schedule in repo.get_external_schedules(): origin_id = schedule.get_external_origin().get_id() if schedule.get_current_instigator_state( all_schedule_states.get(origin_id) ).is_running: schedules[origin_id] = schedule elif location_entry.load_error and log_verbose_checks: logger.warning( f"Could not load location {location_entry.origin.location_name} to check for schedules due to the following error: {location_entry.load_error}" ) # Remove any schedule states that were previously created with AUTOMATICALLY_RUNNING # and can no longer be found in the workspace (so that if they are later added # back again, their timestamps will start at the correct place) states_to_delete = { schedule_state for origin_id, schedule_state in all_schedule_states.items() if origin_id not in schedules and schedule_state.status == InstigatorStatus.AUTOMATICALLY_RUNNING } for state in states_to_delete: instance.schedule_storage.delete_instigator_state( state.instigator_origin_id, state.selector_id ) if log_verbose_checks: unloadable_schedule_states = { origin_id: schedule_state for origin_id, schedule_state in all_schedule_states.items() if origin_id not in schedules and schedule_state.status == InstigatorStatus.RUNNING } for schedule_state in unloadable_schedule_states.values(): schedule_name = schedule_state.origin.instigator_name repo_location_origin = ( schedule_state.origin.external_repository_origin.repository_location_origin ) repo_location_name = repo_location_origin.location_name repo_name = schedule_state.origin.external_repository_origin.repository_name if ( repo_location_origin not in workspace_snapshot or not workspace_snapshot[repo_location_origin].repository_location ): logger.warning( f"Schedule {schedule_name} was started from a location " f"{repo_location_name} that can no longer be found in the workspace, or has " "metadata that has changed since the schedule was started. You can turn off " "this schedule in the Dagit UI from the Status tab." ) elif not workspace_snapshot[repo_location_origin].repository_location.has_repository( repo_name ): logger.warning( f"Could not find repository {repo_name} in location {repo_location_name} to " + f"run schedule {schedule_name}. If this repository no longer exists, you can " + "turn off the schedule in the Dagit UI from the Status tab.", ) else: logger.warning( f"Could not find schedule {schedule_name} in repository {repo_name}. If this " "schedule no longer exists, you can turn it off in the Dagit UI from the " "Status tab.", ) if not schedules: logger.debug("Not checking for any runs since no schedules have been started.") yield return if log_verbose_checks: schedule_names = ", ".join([schedule.name for schedule in schedules.values()]) logger.info(f"Checking for new runs for the following schedules: {schedule_names}") for external_schedule in schedules.values(): error_info = None try: schedule_state = all_schedule_states.get( external_schedule.get_external_origin().get_id() ) if not schedule_state: assert external_schedule.default_status == DefaultScheduleStatus.RUNNING schedule_state = InstigatorState( external_schedule.get_external_origin(), InstigatorType.SCHEDULE, InstigatorStatus.AUTOMATICALLY_RUNNING, ScheduleInstigatorData( external_schedule.cron_schedule, end_datetime_utc.timestamp(), ), ) instance.add_instigator_state(schedule_state) yield from launch_scheduled_runs_for_schedule( instance, logger, external_schedule, schedule_state, workspace, end_datetime_utc, max_catchup_runs, max_tick_retries, ( debug_crash_flags.get(schedule_state.instigator_name) if debug_crash_flags else None ), log_verbose_checks=log_verbose_checks, ) except Exception: error_info = serializable_error_info_from_exc_info(sys.exc_info()) logger.error( f"Scheduler caught an error for schedule {external_schedule.name} : {error_info.to_string()}" ) yield error_info