def test_scheduler_call_target_method_with_correct_auth(self, method): method.side_effect = self.target_check_context_method default_context = base.get_context(default=True) auth_context.set_ctx(default_context) default_project_id = default_context.project_id job = sched_base.SchedulerJob( run_after=DELAY, func_name=TARGET_METHOD_PATH, func_args={'expected_project_id': default_project_id}) self.scheduler.schedule(job) second_context = base.get_context(default=False) auth_context.set_ctx(second_context) second_project_id = second_context.project_id job = sched_base.SchedulerJob( run_after=DELAY, func_name=TARGET_METHOD_PATH, func_args={'expected_project_id': second_project_id}) self.scheduler.schedule(job) self.assertNotEqual(default_project_id, second_project_id) for _ in range(2): self.assertTrue(self.queue.get())
def test_scheduler_multi_instance(self, method): method.side_effect = self.target_method second_scheduler = legacy_scheduler.LegacyScheduler(CONF.scheduler) second_scheduler.start() self.addCleanup(second_scheduler.stop, True) job = sched_base.SchedulerJob( run_after=DELAY, func_name=TARGET_METHOD_PATH, func_args={ 'name': 'task', 'id': '321' }, ) second_scheduler.schedule(job) calls = db_api.get_delayed_calls_to_start(get_time_delay()) self._assert_single_item(calls, target_method_name=TARGET_METHOD_PATH) self.queue.get() method.assert_called_once_with(name='task', id='321') calls = db_api.get_delayed_calls_to_start(get_time_delay()) self.assertEqual(0, len(calls))
def schedule_on_action_update(action_ex, delay=0): """Schedules task update check. This method provides transactional decoupling of action update from task update check. It's needed in non-locking model in order to avoid 'phantom read' phenomena when reading state of multiple actions to see if a task is updated. Just starting a separate transaction without using scheduler is not safe due to concurrency window that we'll have in this case (time between transactions) whereas scheduler is a special component that is designed to be resistant to failures. :param action_ex: Action execution. :param delay: Minimum amount of time before task update check should be made. """ # Optimization to avoid opening a new transaction if it's not needed. if not action_ex.task_execution.spec.get('with-items'): _on_action_update(action_ex) return sched = sched_base.get_system_scheduler() job = sched_base.SchedulerJob( run_after=delay, func_name=_SCHEDULED_ON_ACTION_UPDATE_PATH, func_args={ 'action_ex_id': action_ex.id, 'wf_action': isinstance(action_ex, models.WorkflowExecution) }, key='th_on_a_u-%s' % action_ex.task_execution_id) sched.schedule(job)
def test_scheduler_with_factory(self, factory): target_method_name = 'run_something' factory.return_value = type('something', (object, ), { target_method_name: mock.MagicMock(side_effect=self.target_method) }) job = sched_base.SchedulerJob( run_after=DELAY, target_factory_func_name=TARGET_METHOD_PATH, func_name=target_method_name, func_args={ 'name': 'task', 'id': '123' }) self.scheduler.schedule(job) calls = db_api.get_delayed_calls_to_start(get_time_delay()) call = self._assert_single_item(calls, target_method_name=target_method_name) self.assertIn('name', call['method_arguments']) self.queue.get() factory().run_something.assert_called_once_with(name='task', id='123') calls = db_api.get_delayed_calls_to_start(get_time_delay()) self.assertEqual(0, len(calls))
def _schedule_refresh_task_state(task_ex_id, delay=0): """Schedules task preconditions check. This method provides transactional decoupling of task preconditions check from events that can potentially satisfy those preconditions. It's needed in non-locking model in order to avoid 'phantom read' phenomena when reading state of multiple tasks to see if a task that depends on them can start. Just starting a separate transaction without using scheduler is not safe due to concurrency window that we'll have in this case (time between transactions) whereas scheduler is a special component that is designed to be resistant to failures. :param task_ex_id: Task execution ID. :param delay: Delay. """ sched = sched_base.get_system_scheduler() job = sched_base.SchedulerJob(run_after=delay, func_name=_REFRESH_TASK_STATE_PATH, func_args={'task_ex_id': task_ex_id}, key=_get_refresh_state_job_key(task_ex_id)) sched.schedule(job)
def test_schedule_called_once(self, method): # Delegate from the module function to the method of the test class. method.side_effect = self.target_method job = scheduler_base.SchedulerJob(run_after=1, func_name=TARGET_METHOD_PATH, func_args={ 'name': 'task', 'id': '321' }) self.scheduler.schedule(job) self._wait_target_method_start() # Check that the persistent job has been created and captured. scheduled_jobs = db_api.get_scheduled_jobs() self.assertEqual(1, len(scheduled_jobs)) captured_at = scheduled_jobs[0].captured_at self.assertIsNotNone(captured_at) self.assertTrue(datetime.datetime.utcnow() - captured_at < datetime.timedelta(seconds=3)) self._unlock_target_method() self._wait_target_method_end() method.assert_called_once_with(name='task', id='321') # After the job is processed the persistent object must be deleted. self._await(lambda: not db_api.get_scheduled_jobs())
def test_scheduler_without_factory(self, method): method.side_effect = self.target_method job = sched_base.SchedulerJob(run_after=DELAY, func_name=TARGET_METHOD_PATH, func_args={ 'name': 'task', 'id': '321' }, key='my_job_key') self.scheduler.schedule(job) calls = db_api.get_delayed_calls_to_start(get_time_delay()) call = self._assert_single_item(calls, target_method_name=TARGET_METHOD_PATH, key='my_job_key') self.assertIn('name', call['method_arguments']) self.queue.get() method.assert_called_once_with(name='task', id='321') calls = db_api.get_delayed_calls_to_start(get_time_delay()) self.assertEqual(0, len(calls))
def before_task_start(self, task): super(WaitBeforePolicy, self).before_task_start(task) # No need to wait for a task if delay is 0 if self.delay == 0: return ctx_key = 'wait_before_policy' policy_ctx = task.get_policy_context(ctx_key) if policy_ctx.get('skip'): # Unset state 'RUNNING_DELAYED'. task.set_state(states.RUNNING, None) return if task.get_state() != states.IDLE: policy_ctx.update({'skip': True}) task.set_state( states.RUNNING_DELAYED, "Delayed by 'wait-before' policy [delay=%s]" % self.delay) sched = sched_base.get_system_scheduler() job = sched_base.SchedulerJob( run_after=self.delay, func_name=_CONTINUE_TASK_PATH, func_args={'task_ex_id': task.get_id()}) sched.schedule(job)
def before_task_start(self, task_ex, task_spec): super(TimeoutPolicy, self).before_task_start(task_ex, task_spec) # No timeout if delay is 0 if self.delay == 0: return sched = sched_base.get_system_scheduler() job = sched_base.SchedulerJob( run_after=self.delay, func_name=_FAIL_IF_INCOMPLETE_TASK_PATH, func_args={ 'task_ex_id': task_ex.id, 'timeout': self.delay } ) sched.schedule(job) wf_trace.info( task_ex, "Timeout check scheduled [task=%s, timeout(s)=%s]." % (task_ex.id, self.delay) )
def test_scheduler_doesnt_handle_calls_the_failed_on_update( self, update_delayed_call): def update_call_failed(id, values, query_filter): self.queue.put("item") return None, 0 update_delayed_call.side_effect = update_call_failed job = sched_base.SchedulerJob( run_after=DELAY, func_name=TARGET_METHOD_PATH, func_args={ 'name': 'task', 'id': '321' }, ) self.scheduler.schedule(job) calls = db_api.get_delayed_calls_to_start(get_time_delay()) self.queue.get() eventlet.sleep(1) update_delayed_call.assert_called_with(id=calls[0].id, values=mock.ANY, query_filter=mock.ANY) # If the scheduler does handel calls that failed on update # DBEntityNotFoundException will raise. db_api.get_delayed_call(calls[0].id) db_api.delete_delayed_call(calls[0].id)
def after_task_complete(self, task_ex, task_spec): super(WaitAfterPolicy, self).after_task_complete(task_ex, task_spec) # No need to postpone a task if delay is 0 if self.delay == 0: return context_key = 'wait_after_policy' runtime_context = _ensure_context_has_key( task_ex.runtime_context, context_key ) task_ex.runtime_context = runtime_context policy_context = runtime_context[context_key] if policy_context.get('skip'): # Skip, already processed. return policy_context.update({'skip': True}) _log_task_delay(task_ex, self.delay) end_state = task_ex.state end_state_info = task_ex.state_info # TODO(rakhmerov): Policies probably need to have tasks.Task # interface in order to manage task state safely. # Set task state to 'RUNNING_DELAYED'. task_ex.state = states.RUNNING_DELAYED task_ex.state_info = ( 'Suspended by wait-after policy for %s seconds' % self.delay ) # Schedule to change task state to RUNNING again. sched = sched_base.get_system_scheduler() job = sched_base.SchedulerJob( run_after=self.delay, func_name=_COMPLETE_TASK_PATH, func_args={ 'task_ex_id': task_ex.id, 'state': end_state, 'state_info': end_state_info } ) sched.schedule(job)
def before_task_start(self, task_ex, task_spec): super(WaitBeforePolicy, self).before_task_start(task_ex, task_spec) # No need to wait for a task if delay is 0 if self.delay == 0: return context_key = 'wait_before_policy' runtime_context = _ensure_context_has_key( task_ex.runtime_context, context_key ) task_ex.runtime_context = runtime_context policy_context = runtime_context[context_key] if policy_context.get('skip'): # Unset state 'RUNNING_DELAYED'. wf_trace.info( task_ex, "Task '%s' [%s -> %s]" % (task_ex.name, states.RUNNING_DELAYED, states.RUNNING) ) task_ex.state = states.RUNNING return if task_ex.state != states.IDLE: policy_context.update({'skip': True}) _log_task_delay(task_ex, self.delay) task_ex.state = states.RUNNING_DELAYED sched = sched_base.get_system_scheduler() job = sched_base.SchedulerJob( run_after=self.delay, func_name=_CONTINUE_TASK_PATH, func_args={ 'task_ex_id': task_ex.id } ) sched.schedule(job)
def test_scheduler_with_custom_batch_size(self): self.scheduler.stop() number_delayed_calls = 5 processed_calls_at_time = [] real_delete_calls_method = \ legacy_scheduler.LegacyScheduler.delete_calls @staticmethod def delete_calls_counter(delayed_calls): real_delete_calls_method(delayed_calls) for _ in range(len(delayed_calls)): self.queue.put("item") processed_calls_at_time.append(len(delayed_calls)) legacy_scheduler.LegacyScheduler.delete_calls = delete_calls_counter # Create 5 delayed calls. for i in range(number_delayed_calls): job = sched_base.SchedulerJob( run_after=DELAY, func_name=TARGET_METHOD_PATH, func_args={ 'name': 'task', 'id': i }, ) self.scheduler.schedule(job) # Start scheduler which process 2 calls at a time. self.override_config('batch_size', 2, 'scheduler') self.scheduler = legacy_scheduler.LegacyScheduler(CONF.scheduler) self.scheduler.start() # Wait when all of calls will be processed for _ in range(number_delayed_calls): self.queue.get() self.assertListEqual([1, 2, 2], sorted(processed_calls_at_time))
def _schedule_check_and_fix_integrity(wf_ex, delay=0): """Schedules workflow integrity check. :param wf_ex: Workflow execution. :param delay: Minimum amount of time before the check should be made. """ if CONF.engine.execution_integrity_check_delay < 0: # Never check integrity if it's a negative value. return sched = sched_base.get_system_scheduler() job = sched_base.SchedulerJob(run_after=delay, func_name=_CHECK_AND_FIX_INTEGRITY_PATH, func_args={'wf_ex_id': wf_ex.id}, key=_get_integrity_check_key(wf_ex)) sched.schedule(job)
def test_scheduler_with_serializer(self, factory): target_method_name = 'run_something' factory.return_value = type('something', (object, ), { target_method_name: mock.MagicMock(side_effect=self.target_method) }) task_result = ml_actions.Result('data', 'error') method_args = {'name': 'task', 'id': '123', 'result': task_result} serializers = {'result': 'mistral.workflow.utils.ResultSerializer'} job = sched_base.SchedulerJob( run_after=DELAY, target_factory_func_name=TARGET_METHOD_PATH, func_name=target_method_name, func_args=method_args, func_arg_serializers=serializers) self.scheduler.schedule(job) calls = db_api.get_delayed_calls_to_start(get_time_delay()) call = self._assert_single_item(calls, target_method_name=target_method_name) self.assertIn('name', call['method_arguments']) self.queue.get() result = factory().run_something.call_args[1].get('result') self.assertIsInstance(result, ml_actions.Result) self.assertEqual('data', result.data) self.assertEqual('error', result.error) calls = db_api.get_delayed_calls_to_start(get_time_delay()) self.assertEqual(0, len(calls))
def after_task_complete(self, task): super(WaitAfterPolicy, self).after_task_complete(task) # No need to postpone a task if delay is 0 if self.delay == 0: return ctx_key = 'wait_after_policy' policy_ctx = task.get_policy_context(ctx_key) if policy_ctx.get('skip'): # Skip, already processed. return policy_ctx.update({'skip': True}) end_state = task.get_state() end_state_info = task.get_state_info() # Set task state to 'RUNNING_DELAYED'. task.set_state( states.RUNNING_DELAYED, "Delayed by 'wait-after' policy [delay=%s]" % self.delay ) # Schedule to change task state to RUNNING again. sched = sched_base.get_system_scheduler() job = sched_base.SchedulerJob( run_after=self.delay, func_name=_COMPLETE_TASK_PATH, func_args={ 'task_ex_id': task.get_id(), 'state': end_state, 'state_info': end_state_info } ) sched.schedule(job)
def test_scheduler_delete_calls(self, method): method.side_effect = self.target_method job = sched_base.SchedulerJob( run_after=DELAY, func_name=TARGET_METHOD_PATH, func_args={ 'name': 'task', 'id': '321' }, ) self.scheduler.schedule(job) calls = db_api.get_delayed_calls_to_start(get_time_delay()) self._assert_single_item(calls, target_method_name=TARGET_METHOD_PATH) self.queue.get() eventlet.sleep(0.1) self.assertRaises(exc.DBEntityNotFoundError, db_api.get_delayed_call, calls[0].id)
def after_task_complete(self, task_ex, task_spec): """Possible Cases: 1. state = SUCCESS if continue_on is not specified, no need to move to next iteration; if current:count achieve retry:count then policy breaks the loop (regardless on continue-on condition); otherwise - check continue_on condition and if it is True - schedule the next iteration, otherwise policy breaks the loop. 2. retry:count = 5, current:count = 2, state = ERROR, state = IDLE/DELAYED, current:count = 3 3. retry:count = 5, current:count = 4, state = ERROR Iterations complete therefore state = #{state}, current:count = 4. """ super(RetryPolicy, self).after_task_complete(task_ex, task_spec) # There is nothing to repeat if self.count == 0: return # TODO(m4dcoder): If the task_ex.action_executions and # task_ex.workflow_executions collection are not called, # then the retry_no in the runtime_context of the task_ex will not # be updated accurately. To be exact, the retry_no will be one # iteration behind. ex = task_ex.executions # noqa context_key = 'retry_task_policy' runtime_context = _ensure_context_has_key(task_ex.runtime_context, context_key) wf_ex = task_ex.workflow_execution ctx_view = data_flow.ContextView( data_flow.get_current_task_dict(task_ex), data_flow.evaluate_task_outbound_context(task_ex), wf_ex.context, wf_ex.input) continue_on_evaluation = expressions.evaluate(self._continue_on_clause, ctx_view) break_on_evaluation = expressions.evaluate(self._break_on_clause, ctx_view) task_ex.runtime_context = runtime_context state = task_ex.state if not states.is_completed(state) or states.is_cancelled(state): return policy_context = runtime_context[context_key] retry_no = 0 if 'retry_no' in policy_context: retry_no = policy_context['retry_no'] del policy_context['retry_no'] retries_remain = retry_no < self.count stop_continue_flag = (task_ex.state == states.SUCCESS and not self._continue_on_clause) stop_continue_flag = (stop_continue_flag or (self._continue_on_clause and not continue_on_evaluation)) break_triggered = (task_ex.state == states.ERROR and break_on_evaluation) if not retries_remain or break_triggered or stop_continue_flag: return data_flow.invalidate_task_execution_result(task_ex) policy_context['retry_no'] = retry_no + 1 runtime_context[context_key] = policy_context # NOTE(vgvoleg): join tasks in direct workflows can't be # retried as-is, because these tasks can't start without # a correct logical state. if hasattr(task_spec, "get_join") and task_spec.get_join(): from mistral.engine import task_handler as t_h _log_task_delay(task_ex, self.delay, states.WAITING) task_ex.state = states.WAITING t_h._schedule_refresh_task_state(task_ex.id, self.delay) return _log_task_delay(task_ex, self.delay) task_ex.state = states.RUNNING_DELAYED sched = sched_base.get_system_scheduler() job = sched_base.SchedulerJob(run_after=self.delay, func_name=_CONTINUE_TASK_PATH, func_args={'task_ex_id': task_ex.id}) sched.schedule(job)
def after_task_complete(self, task): """Possible Cases: 1. state = SUCCESS if continue_on is not specified, no need to move to next iteration; if current:count achieve retry:count then policy breaks the loop (regardless on continue-on condition); otherwise - check continue_on condition and if it is True - schedule the next iteration, otherwise policy breaks the loop. 2. retry:count = 5, current:count = 2, state = ERROR, state = IDLE/DELAYED, current:count = 3 3. retry:count = 5, current:count = 4, state = ERROR Iterations complete therefore state = #{state}, current:count = 4. """ super(RetryPolicy, self).after_task_complete(task) # There is nothing to repeat if self.count == 0: return # TODO(m4dcoder): If the task_ex.action_executions and # task_ex.workflow_executions collection are not called, # then the retry_no in the runtime_context of the task_ex will not # be updated accurately. To be exact, the retry_no will be one # iteration behind. ex = task.task_ex.executions # noqa ctx_key = 'retry_task_policy' expr_ctx = task.get_expression_context( ctx=data_flow.evaluate_task_outbound_context(task.task_ex)) continue_on_evaluation = expressions.evaluate(self._continue_on_clause, expr_ctx) break_on_evaluation = expressions.evaluate(self._break_on_clause, expr_ctx) state = task.get_state() if not states.is_completed(state) or states.is_cancelled(state): return policy_ctx = task.get_policy_context(ctx_key) retry_no = 0 if 'retry_no' in policy_ctx: retry_no = policy_ctx['retry_no'] del policy_ctx['retry_no'] retries_remain = retry_no < self.count stop_continue_flag = (task.get_state() == states.SUCCESS and not self._continue_on_clause) stop_continue_flag = (stop_continue_flag or (self._continue_on_clause and not continue_on_evaluation)) break_triggered = (task.get_state() == states.ERROR and break_on_evaluation) if not retries_remain or break_triggered or stop_continue_flag: return task.invalidate_result() policy_ctx['retry_no'] = retry_no + 1 task.touch_runtime_context() # NOTE(vgvoleg): join tasks in direct workflows can't be # retried as-is, because these tasks can't start without # a correct logical state. if hasattr(task.task_spec, "get_join") and task.task_spec.get_join(): # TODO(rakhmerov): This is an example of broken encapsulation. # The control over such operations should belong to the class Task. # If it's done, from the outside of the class there will be just # one visible operation "continue_task()" or something like that. from mistral.engine import task_handler as t_h task.set_state(states.WAITING, "Delayed by 'retry' policy [delay=%s]" % self.delay) t_h._schedule_refresh_task_state(task.get_id(), self.delay) return task.set_state(states.RUNNING_DELAYED, "Delayed by 'retry' policy [delay=%s]" % self.delay) sched = sched_base.get_system_scheduler() job = sched_base.SchedulerJob(run_after=self.delay, func_name=_CONTINUE_TASK_PATH, func_args={'task_ex_id': task.get_id()}) sched.schedule(job)