def history(self): # type: () -> Optional[History] if not isinstance(self._history, History): history = History(self._history) history.parse() return history return self._history
def get_task(workflow_execution, task_id, details=False): history = History(workflow_execution.history()) history.parse() task = history.activities[task_id] header = ['type', 'id', 'name', 'version', 'state', 'timestamp', 'input', 'result', 'reason'] # TODO... if details: header.append('details') # print >>sys.stderr, task state = task['state'] rows = \ [ [ task['type'], task['id'], task['name'], task['version'], state, task[state + '_timestamp'], task['input'], task.get('result'), # Absent for failed tasks task.get('reason'), ]] if details: rows[0].append(task.get('details')) return header, rows
def get_task(workflow_execution, task_id, details=False): history = History(workflow_execution.history()) history.parse() task = history.activities[task_id] header = [ "type", "id", "name", "version", "state", "timestamp", "input", "result", "reason", ] # TODO... if details: header.append("details") # print >>sys.stderr, task state = task["state"] rows = [[ task["type"], task["id"], task["name"], task["version"], state, task[state + "_timestamp"], task["input"], task.get("result"), # Absent for failed tasks task.get("reason"), ]] if details: rows[0].append(task.get("details")) return header, rows
def test_workflow_with_repair_and_force_activities(): workflow = ATestDefinitionWithInput history = builder.History(workflow, input={'args': [4]}) # Now let's build the history to repair previous_history = builder.History(workflow, input={'args': [4]}) decision_id = previous_history.last_id (previous_history.add_activity_task( increment, decision_id=decision_id, last_state='completed', activity_id='activity-tests.data.activities.increment-1', input={'args': 4}, result=57) # obviously wrong but helps see if things work ) to_repair = History(previous_history) to_repair.parse() executor = Executor(DOMAIN, workflow, repair_with=to_repair, force_activities="increment|something_else") # The executor should not schedule anything, it should use previous history decisions, _ = executor.replay(Response(history=history)) assert len(decisions) == 1 assert decisions[0]['decisionType'] == 'ScheduleActivityTask' attrs = decisions[0]['scheduleActivityTaskDecisionAttributes'] assert not attrs['taskList']['name'].startswith("FAKE-") check_task_scheduled_decision(decisions[0], increment)
def get_task(workflow_execution, task_id, details=False): history = History(workflow_execution.history()) history.parse() task = history._activities[task_id] header = [ 'type', 'id', 'name', 'version', 'state', 'timestamp', 'input', 'result', 'reason' ] # TODO... if details: header.append('details') # print >>sys.stderr, task state = task['state'] rows = \ [ [ task['type'], task['id'], task['name'], task['version'], state, task[state + '_timestamp'], task['input'], task.get('result'), # Absent for failed tasks task.get('reason'), ]] if details: rows[0].append(task.get('details')) return header, rows
def status(workflow_execution, nb_tasks=None): history = History(workflow_execution.history()) history.parse() header = 'Tasks', 'Last State', 'Last State Time', 'Scheduled Time' rows = [(task['name'], ) + get_timestamps(task) for task in history._tasks[::-1]] if nb_tasks: rows = rows[:nb_tasks] return header, rows
def status(workflow_execution, nb_tasks=None): history = History(workflow_execution.history()) history.parse() header = "Tasks", "Last State", "Last State Time", "Scheduled Time" rows = [(task["name"], ) + get_timestamps(task) for task in history.tasks[::-1]] if nb_tasks: rows = rows[:nb_tasks] return header, rows
def status(workflow_execution, nb_tasks=None): history = History(workflow_execution.history()) history.parse() header = 'Tasks', 'Last State', 'Last State Time', 'Scheduled Time' rows = [ (task['name'],) + get_timestamps(task) for task in history._tasks[::-1] ] if nb_tasks: rows = rows[:nb_tasks] return header, rows
def activity_rerun(domain, workflow_id, run_id, input, scheduled_id, activity_id): # handle params if not activity_id and not scheduled_id: logger.error("Please supply --scheduled-id or --activity-id.") sys.exit(1) input_override = None if input: input_override = format.decode(input) # find workflow execution try: wfe = helpers.get_workflow_execution(domain, workflow_id, run_id) except (swf.exceptions.DoesNotExistError, IndexError): logger.error("Couldn't find execution, exiting.") sys.exit(1) logger.info("Found execution: workflowId={} runId={}".format( wfe.workflow_id, wfe.run_id)) # now rerun the specified activity history = History(wfe.history()) history.parse() task, args, kwargs, meta, params = helpers.find_activity( history, scheduled_id=scheduled_id, activity_id=activity_id, input=input_override, ) kwargs["context"].update({ "workflow_id": wfe.workflow_id, "run_id": wfe.run_id, }) logger.debug("Found activity. Last execution:") for line in json_dumps(params, pretty=True).split("\n"): logger.debug(line) if input_override: logger.info("NB: input will be overriden with the passed one!") logger.info("Will re-run: {}(*{}, **{}) [+meta={}]".format( task, args, kwargs, meta)) # download binaries if needed download_binaries(meta.get("binaries", {})) # execute the activity task with the correct arguments instance = ActivityTask(task, *args, **kwargs) result = instance.execute() if hasattr(instance, "post_execute"): instance.post_execute() logger.info("Result (JSON): {}".format(json_dumps(result, compact=False)))
def activity_rerun(domain, workflow_id, run_id, input, scheduled_id, activity_id): # handle params if not activity_id and not scheduled_id: logger.error("Please supply --scheduled-id or --activity-id.") sys.exit(1) input_override = None if input: input_override = format.decode(input) # find workflow execution try: wfe = helpers.get_workflow_execution(domain, workflow_id, run_id) except (swf.exceptions.DoesNotExistError, IndexError): logger.error("Couldn't find execution, exiting.") sys.exit(1) logger.info("Found execution: workflowId={} runId={}".format(wfe.workflow_id, wfe.run_id)) # now rerun the specified activity history = History(wfe.history()) history.parse() task, args, kwargs, meta, params = helpers.find_activity( history, scheduled_id=scheduled_id, activity_id=activity_id, input=input_override, ) logger.debug("Found activity. Last execution:") for line in json_dumps(params, pretty=True).split("\n"): logger.debug(line) if input_override: logger.info("NB: input will be overriden with the passed one!") logger.info("Will re-run: {}(*{}, **{}) [+meta={}]".format(task, args, kwargs, meta)) # download binaries if needed download_binaries(meta.get("binaries", {})) # execute the activity task with the correct arguments instance = ActivityTask(task, *args, **kwargs) result = instance.execute() if hasattr(instance, 'post_execute'): instance.post_execute() logger.info("Result (JSON): {}".format(json_dumps(result, compact=False)))
def get_task(workflow_execution, task_id): history = History(workflow_execution.history()) history.parse() task = history._activities[task_id] header = ('type', 'id', 'name', 'version', 'state', 'timestamp', 'input', 'result') state = task['state'] rows = [( task['type'], task['id'], task['name'], task['version'], state, task[state + '_timestamp'], task['input'], task['result'], )] return header, rows
def info(workflow_execution): history = History(workflow_execution.history()) history.parse() if history.tasks: first_event = history.tasks[0] first_timestamp = first_event[first_event['state'] + '_timestamp'] last_event = history.tasks[-1] last_timestamp = last_event.get('timestamp') or last_event[ last_event['state'] + '_timestamp'] workflow_input = first_event['input'] else: first_event = history.events[0] first_timestamp = first_event.timestamp last_event = history.events[-1] last_timestamp = last_event.timestamp workflow_input = first_event.input execution_time = (last_timestamp - first_timestamp).total_seconds() header = ( 'domain', 'workflow_type.name', 'workflow_type.version', 'task_list', 'workflow_id', 'run_id', 'tag_list', 'execution_time', 'input', ) ex = workflow_execution rows = [( ex.domain.name, ex.workflow_type.name, ex.workflow_type.version, ex.task_list, ex.workflow_id, ex.run_id, ','.join(ex.tag_list), execution_time, workflow_input, )] return header, rows
def info(workflow_execution): history = History(workflow_execution.history()) history.parse() if history.tasks: first_event = history.tasks[0] first_timestamp = first_event[first_event["state"] + "_timestamp"] last_event = history.tasks[-1] last_timestamp = (last_event.get("timestamp") or last_event[last_event["state"] + "_timestamp"]) workflow_input = first_event["input"] else: first_event = history.events[0] first_timestamp = first_event.timestamp last_event = history.events[-1] last_timestamp = last_event.timestamp workflow_input = first_event.input execution_time = (last_timestamp - first_timestamp).total_seconds() header = ( "domain", "workflow_type.name", "workflow_type.version", "task_list", "workflow_id", "run_id", "tag_list", "execution_time", "input", ) ex = workflow_execution rows = [( ex.domain.name, ex.workflow_type.name, ex.workflow_type.version, ex.task_list, ex.workflow_id, ex.run_id, ",".join(ex.tag_list), execution_time, workflow_input, )] return header, rows
def get_task(workflow_execution, task_id): history = History(workflow_execution.history()) history.parse() task = history._activities[task_id] header = ( 'type', 'id', 'name', 'version', 'state', 'timestamp', 'input', 'result' ) state = task['state'] rows = [( task['type'], task['id'], task['name'], task['version'], state, task[state + '_timestamp'], task['input'], task['result'], )] return header, rows
def info(workflow_execution): history = History(workflow_execution.history()) history.parse() if history._tasks: first_event = history._tasks[0] first_timestamp = first_event[first_event['state'] + '_timestamp'] last_event = history._tasks[-1] last_timestamp = last_event[last_event['state'] + '_timestamp'] workflow_input = first_event['input'] else: first_event = history.events[0] first_timestamp = first_event.timestamp last_event = history.events[0] last_timestamp = last_event.timestamp workflow_input = first_event.input execution_time = (last_timestamp - first_timestamp).total_seconds() header = ( 'domain', 'workflow_type.name', 'workflow_type.version', 'task_list', 'workflow_id', 'run_id', 'tag_list', 'execution_time', 'input', ) ex = workflow_execution rows = [( ex.domain.name, ex.workflow_type.name, ex.workflow_type.version, ex.task_list, ex.workflow_id, ex.run_id, ','.join(ex.tag_list), execution_time, workflow_input, )] return header, rows
def activity_rerun(domain, workflow_id, run_id, input, scheduled_id, activity_id): # handle params if not activity_id and not scheduled_id: logger.error("Please supply --scheduled-id or --activity-id.") sys.exit(1) input_override = None if input: input_override = json.loads(input) # find workflow execution try: wfe = helpers.get_workflow_execution(domain, workflow_id, run_id) except (swf.exceptions.DoesNotExistError, IndexError): logger.error("Couldn't find execution, exiting.") sys.exit(1) logger.info("Found execution: workflowId={} runId={}".format( wfe.workflow_id, wfe.run_id)) # now rerun the specified activity history = History(wfe.history()) history.parse() func, args, kwargs, params = helpers.find_activity( history, scheduled_id=scheduled_id, activity_id=activity_id, input=input_override, ) logger.debug("Found activity. Last execution:") for line in json_dumps(params, pretty=True).split("\n"): logger.debug(line) if input_override: logger.info("NB: input will be overriden with the passed one!") logger.info("Will re-run: {}(*{}, **{})".format(func.__name__, args, kwargs)) # finally replay the function with the correct arguments result = func(*args, **kwargs) logger.info("Result (JSON): {}".format(json_dumps(result)))
def test_workflow_with_repair_if_task_failed(): workflow = ATestDefinitionWithInput history = builder.History(workflow, input={'args': [4]}) # Now let's build the history to repair previous_history = builder.History(workflow, input={'args': [4]}) decision_id = previous_history.last_id (previous_history.add_activity_task( increment, decision_id=decision_id, last_state='failed', activity_id='activity-tests.data.activities.increment-1', input={'args': 4}, result=57) # obviously wrong but helps see if things work ) to_repair = History(previous_history) to_repair.parse() executor = Executor(DOMAIN, workflow, repair_with=to_repair) # The executor should not schedule anything, it should use previous history decisions, _ = executor.replay(Response(history=history)) check_task_scheduled_decision(decisions[0], increment)
def standalone(context, workflow, domain, workflow_id, execution_timeout, tags, decision_tasks_timeout, input, input_file, nb_workers, nb_deciders, heartbeat, display_status, repair, force_activities, ): """ This command spawn a decider and an activity worker to execute a workflow with a single main process. """ disable_boto_connection_pooling() if force_activities and not repair: raise ValueError( "You should only use --force-activities with --repair." ) workflow_class = get_workflow(workflow) if not workflow_id: workflow_id = workflow_class.name wf_input = {} if input or input_file: wf_input = get_or_load_input(input_file, input) if repair: repair_run_id = None if " " in repair: repair, repair_run_id = repair.split(" ", 1) # get the previous execution history, it will serve as "default history" # for activities that succeeded in the previous execution logger.info( 'retrieving history of previous execution: domain={} ' 'workflow_id={} run_id={}'.format(domain, repair, repair_run_id) ) workflow_execution = get_workflow_execution(domain, repair, run_id=repair_run_id) previous_history = History(workflow_execution.history()) repair_run_id = workflow_execution.run_id previous_history.parse() # get the previous execution input if none passed if not input and not input_file: wf_input = previous_history.events[0].input if not tags: tags = workflow_execution.tag_list else: previous_history = None repair_run_id = None if not tags: get_tag_list = getattr(workflow_class, 'get_tag_list', None) if get_tag_list: tags = get_tag_list(workflow_class, *wf_input.get('args', ()), **wf_input.get('kwargs', {})) else: tags = getattr(workflow_class, 'tag_list', None) if tags == Workflow.INHERIT_TAG_LIST: tags = None task_list = create_unique_task_list(workflow_id) logger.info('using task list {}'.format(task_list)) decider_proc = multiprocessing.Process( target=decider.command.start, args=( [workflow], domain, task_list, ), kwargs={ 'nb_processes': nb_deciders, 'repair_with': previous_history, 'force_activities': force_activities, 'is_standalone': True, 'repair_workflow_id': repair or None, 'repair_run_id': repair_run_id, }, ) decider_proc.start() worker_proc = multiprocessing.Process( target=worker.command.start, args=( domain, task_list, ), kwargs={ 'nb_processes': nb_workers, 'heartbeat': heartbeat, }, ) worker_proc.start() print('starting workflow {}'.format(workflow), file=sys.stderr) ex = start_workflow.callback( workflow, domain, workflow_id, task_list, execution_timeout, tags, decision_tasks_timeout, format.input(wf_input), None, local=False, ) while True: time.sleep(2) ex = helpers.get_workflow_execution( domain, ex.workflow_id, ex.run_id, ) if display_status: print('status: {}'.format(ex.status), file=sys.stderr) if ex.status == ex.STATUS_CLOSED: print('execution {} finished'.format(ex.workflow_id), file=sys.stderr) break os.kill(worker_proc.pid, signal.SIGTERM) worker_proc.join() os.kill(decider_proc.pid, signal.SIGTERM) decider_proc.join()
class Executor(executor.Executor): """ Manage a workflow's execution with Amazon SWF. It replays the workflow's definition from the start until it blocks (i.e. raises :py:class:`exceptions.ExecutionBlocked`). SWF stores the history of all events that occurred in the workflow and passes it to the executor. Only one executor handles a workflow at a time. It means the history is consistent and there is no concurrent modifications on the execution of the workflow. """ def __init__(self, domain, workflow, task_list=None): super(Executor, self).__init__(workflow) self._tasks = TaskRegistry() self.domain = domain self.task_list = task_list def reset(self): """ Clears the state of the execution. It is required to ensure the id of the tasks are assigned the same way on each replay. """ self._open_activity_count = 0 self._decisions = [] self._tasks = TaskRegistry() def _make_task_id(self, task): """ Assign a new ID to *task*. :returns: String with at most 256 characters. """ index = self._tasks.add(task) task_id = '{name}-{idx}'.format(name=task.name, idx=index) return task_id def _get_future_from_activity_event(self, event): """Maps an activity event to a Future with the corresponding state. :param event: workflow event. :type event: swf.event.Event. """ future = futures.Future() # state is PENDING. state = event['state'] if state == 'scheduled': future._state = futures.PENDING elif state == 'schedule_failed': if event['cause'] == 'ACTIVITY_TYPE_DOES_NOT_EXIST': activity_type = swf.models.ActivityType( self.domain, name=event['activity_type']['name'], version=event['activity_type']['version']) logger.info('Creating activity type {} in domain {}'.format( activity_type.name, self.domain.name)) try: activity_type.save() except swf.exceptions.AlreadyExistsError: logger.info( 'Activity type {} in domain {} already exists'.format( activity_type.name, self.domain.name)) return None logger.info('failed to schedule {}: {}'.format( event['activity_type']['name'], event['cause'], )) return None elif state == 'started': future._state = futures.RUNNING elif state == 'completed': future._state = futures.FINISHED result = event['result'] future._result = json.loads(result) if result else None elif state == 'canceled': future._state = futures.CANCELLED elif state == 'failed': future._state = futures.FINISHED future._exception = exceptions.TaskFailed( name=event['id'], reason=event['reason'], details=event.get('details'), ) elif state == 'timed_out': future._state = futures.FINISHED future._exception = exceptions.TimeoutError( event['timeout_type'], event['timeout_value']) return future def _get_future_from_child_workflow_event(self, event): """Maps a child workflow event to a Future with the corresponding state. """ future = futures.Future() state = event['state'] if state == 'start_initiated': future._state = futures.PENDING elif state == 'started': future._state = futures.RUNNING elif state == 'completed': future._state = futures.FINISHED future._result = json.loads(event['result']) return future def find_activity_event(self, task, history): activity = history._activities.get(task.id) return activity def find_child_workflow_event(self, task, history): return history._child_workflows.get(task.id) def find_event(self, task, history): if isinstance(task, ActivityTask): return self.find_activity_event(task, history) elif isinstance(task, WorkflowTask): return self.find_child_workflow_event(task, history) else: return TypeError('invalid type {} for task {}'.format( type(task), task)) return None def make_activity_task(self, func, *args, **kwargs): return ActivityTask(func, *args, **kwargs) def make_workflow_task(self, func, *args, **kwargs): return WorkflowTask(func, *args, **kwargs) def resume_activity(self, task, event): future = self._get_future_from_activity_event(event) if not future: # Task in history does not count. return None if not future.finished: # Still pending or running... return future if future.exception is None: # Result available! return future if event.get('retry', 0) == task.activity.retry: # No more retry! if task.activity.raises_on_failure: raise exceptions.TaskException(task, future.exception) return future # with future.exception set. # Otherwise retry the task by scheduling it again. return None # means the is not in SWF. def resume_child_workflow(self, task, event): return self._get_future_from_child_workflow_event(event) def schedule_task(self, task, task_list=None): logger.debug('executor is scheduling task {} on task_list {}'.format( task.name, task_list, )) decisions = task.schedule(self.domain, task_list) # ``decisions`` contains a single decision. self._decisions.extend(decisions) self._open_activity_count += 1 if len(self._decisions) == constants.MAX_DECISIONS - 1: # We add a timer to wake up the workflow immediately after # completing these decisions. timer = swf.models.decision.TimerDecision( 'start', id='resume-after-{}'.format(task.id), start_to_fire_timeout='0') self._decisions.append(timer) raise exceptions.ExecutionBlocked() def resume(self, task, *args, **kwargs): """Resume the execution of a task. If the task was scheduled, returns a future that wraps its state, otherwise schedules it. """ task.id = self._make_task_id(task) event = self.find_event(task, self._history) future = None if event: if event['type'] == 'activity': future = self.resume_activity(task, event) if future and future._state in (futures.PENDING, futures.RUNNING): self._open_activity_count += 1 elif event['type'] == 'child_workflow': future = self.resume_child_workflow(task, event) if not future: self.schedule_task(task, task_list=self.task_list) future = futures.Future() # return a pending future. if self._open_activity_count == constants.MAX_OPEN_ACTIVITY_COUNT: logger.warning('limit of {} open activities reached'.format( constants.MAX_OPEN_ACTIVITY_COUNT)) raise exceptions.ExecutionBlocked return future def submit(self, func, *args, **kwargs): """Register a function and its arguments for asynchronous execution. ``*args`` and ``**kwargs`` must be serializable in JSON. """ errors = [] arguments = [] keyword_arguments = {} result = None try: for arg in args: if isinstance(arg, futures.Future) and arg.failed: exc = arg._exception if isinstance(exc, exceptions.MultipleExceptions): errors.extend(exc.exceptions) else: errors.append(exc) else: arguments.append(executor.get_actual_value(arg)) for key, val in kwargs.iteritems(): if isinstance(val, futures.Future) and val.failed: exc = val._exception if isinstance(exc, exceptions.MultipleExceptions): errors.extend(exc.exceptions) else: errors.append(val._exception) else: keyword_arguments[key] = executor.get_actual_value(val) except exceptions.ExecutionBlocked: result = futures.Future() finally: if errors: result = futures.Future() result._state = futures.FINISHED result._exception = exceptions.MultipleExceptions( 'futures failed', errors, ) if result is not None: return result try: if isinstance(func, Activity): make_task = self.make_activity_task elif issubclass(func, Workflow): make_task = self.make_workflow_task else: raise TypeError task = make_task(func, *arguments, **keyword_arguments) except TypeError: raise TypeError('invalid type {} for {}'.format( type(func), func)) return self.resume(task, *arguments, **keyword_arguments) def map(self, callable, iterable): """Submit *callable* with each of the items in ``*iterables``. All items in ``*iterables`` must be serializable in JSON. """ iterable = executor.get_actual_value(iterable) return super(Executor, self).map(callable, iterable) def starmap(self, callable, iterable): iterable = executor.get_actual_value(iterable) return super(Executor, self).starmap(callable, iterable) def replay(self, history): """Executes the workflow from the start until it blocks. """ self.reset() self._history = History(history) self._history.parse() workflow_started_event = history[0] args = () kwargs = {} input = workflow_started_event.input if input is None: input = {} args = input.get('args', ()) kwargs = input.get('kwargs', {}) try: result = self.run_workflow(*args, **kwargs) except exceptions.ExecutionBlocked: logger.info('{} open activities ({} decisions)'.format( self._open_activity_count, len(self._decisions), )) return self._decisions, {} except exceptions.TaskException, err: reason = 'Workflow execution error in task {}: "{}"'.format( err.task.name, getattr(err.exception, 'reason', repr(err.exception))) logger.exception(reason) details = getattr(err.exception, 'details', None) self.on_failure(reason, details) decision = swf.models.decision.WorkflowExecutionDecision() decision.fail( reason=swf.format.reason(reason), details=swf.format.details(details), ) return [decision], {} except Exception, err: reason = 'Cannot replay the workflow: {}({})'.format( err.__class__.__name__, err, ) tb = traceback.format_exc() details = 'Traceback:\n{}'.format(tb) logger.exception(reason + '\n' + details) self.on_failure(reason) decision = swf.models.decision.WorkflowExecutionDecision() decision.fail( reason=swf.format.reason(reason), details=swf.format.details(details), ) return [decision], {}
def standalone( context, workflow, domain, workflow_id, execution_timeout, tags, decision_tasks_timeout, input, input_file, nb_workers, nb_deciders, heartbeat, display_status, repair, force_activities, ): """ This command spawn a decider and an activity worker to execute a workflow with a single main process. """ disable_boto_connection_pooling() if force_activities and not repair: raise ValueError( "You should only use --force-activities with --repair.") workflow_class = get_workflow(workflow) if not workflow_id: workflow_id = workflow_class.name wf_input = {} if input or input_file: wf_input = get_or_load_input(input_file, input) if repair: repair_run_id = None if " " in repair: repair, repair_run_id = repair.split(" ", 1) # get the previous execution history, it will serve as "default history" # for activities that succeeded in the previous execution logger.info('retrieving history of previous execution: domain={} ' 'workflow_id={} run_id={}'.format(domain, repair, repair_run_id)) workflow_execution = get_workflow_execution(domain, repair, run_id=repair_run_id) previous_history = History(workflow_execution.history()) repair_run_id = workflow_execution.run_id previous_history.parse() # get the previous execution input if none passed if not input and not input_file: wf_input = previous_history.events[0].input if not tags: tags = workflow_execution.tag_list else: previous_history = None repair_run_id = None if not tags: get_tag_list = getattr(workflow_class, 'get_tag_list', None) if get_tag_list: tags = get_tag_list(workflow_class, *wf_input.get('args', ()), **wf_input.get('kwargs', {})) else: tags = getattr(workflow_class, 'tag_list', None) if tags == Workflow.INHERIT_TAG_LIST: tags = None task_list = create_unique_task_list(workflow_id) logger.info('using task list {}'.format(task_list)) decider_proc = multiprocessing.Process( target=decider.command.start, args=( [workflow], domain, task_list, ), kwargs={ 'nb_processes': nb_deciders, 'repair_with': previous_history, 'force_activities': force_activities, 'is_standalone': True, 'repair_workflow_id': repair or None, 'repair_run_id': repair_run_id, }, ) decider_proc.start() worker_proc = multiprocessing.Process( target=worker.command.start, args=( domain, task_list, ), kwargs={ 'nb_processes': nb_workers, 'heartbeat': heartbeat, }, ) worker_proc.start() print('starting workflow {}'.format(workflow), file=sys.stderr) ex = start_workflow.callback( workflow, domain, workflow_id, task_list, execution_timeout, tags, decision_tasks_timeout, format.input(wf_input), None, local=False, ) while True: time.sleep(2) ex = helpers.get_workflow_execution( domain, ex.workflow_id, ex.run_id, ) if display_status: print('status: {}'.format(ex.status), file=sys.stderr) if ex.status == ex.STATUS_CLOSED: print('execution {} finished'.format(ex.workflow_id), file=sys.stderr) break os.kill(worker_proc.pid, signal.SIGTERM) worker_proc.join() os.kill(decider_proc.pid, signal.SIGTERM) decider_proc.join()
class Executor(executor.Executor): """ Manage a workflow's execution with Amazon SWF. It replays the workflow's definition from the start until it blocks (i.e. raises :py:class:`exceptions.ExecutionBlocked`). SWF stores the history of all events that occurred in the workflow and passes it to the executor. Only one executor handles a workflow at a time. It means the history is consistent and there is no concurrent modifications on the execution of the workflow. :ivar domain: domain :type domain: swf.models.domain.Domain :ivar task_list: task list :type task_list: Optional[str] :ivar repair_with: previous history to use for repairing :type repair_with: Optional[simpleflow.history.History] :ivar force_activities: regex with activities to force :type _history: History """ def __init__(self, domain, workflow_class, task_list=None, repair_with=None, force_activities=None): super(Executor, self).__init__(workflow_class) self._history = None self._execution_context = {} self.domain = domain self.task_list = task_list self.repair_with = repair_with if force_activities: self.force_activities = re.compile(force_activities) else: self.force_activities = None self.reset() # noinspection PyAttributeOutsideInit def reset(self): """ Clears the state of the execution. It is required to ensure the id of the tasks are assigned the same way on each replay. """ self._open_activity_count = 0 self._decisions = [] self._append_timer = False # Append an immediate timer decision self._tasks = TaskRegistry() self._idempotent_tasks_to_submit = set() self._execution = None self.current_priority = None self.create_workflow() def _make_task_id(self, a_task, *args, **kwargs): """ Assign a new ID to *a_task*. :type a_task: ActivityTask | WorkflowTask :returns: String with at most 256 characters. :rtype: str """ if isinstance(a_task, ActivityTask) and hasattr( a_task.activity.callable, 'get_task_id'): suffix = a_task.activity.callable.get_task_id( self.workflow, *args, **kwargs) elif not a_task.idempotent: # If idempotency is False or unknown, let's generate a task id by # incrementing an id after the a_task name. # (default strategy, backwards compatible with previous versions) suffix = self._tasks.add(a_task) else: # If a_task is idempotent, we can do better and hash arguments. # It makes the workflow resistant to retries or variations on the # same task name (see #11). arguments = json_dumps({ "args": args, "kwargs": kwargs }, sort_keys=True) suffix = hashlib.md5(arguments.encode('utf-8')).hexdigest() if isinstance(a_task, (WorkflowTask, )): # Some task types must have globally unique names. suffix = '{}--{}--{}'.format(self._workflow_id, hex_hash(self._run_id), suffix) task_id = '{name}-{suffix}'.format(name=a_task.name, suffix=suffix) if len(task_id) > 256: # Better safe than sorry... task_id = task_id[0:223] + "-" + hashlib.md5( task_id.encode('utf-8')).hexdigest() return task_id def _get_future_from_activity_event(self, event): """Maps an activity event to a Future with the corresponding state. :param event: activity event :type event: dict[str, Any] :rtype: futures.Future """ future = futures.Future() # state is PENDING. state = event['state'] if state == 'scheduled': pass elif state == 'schedule_failed': if event['cause'] == 'ACTIVITY_TYPE_DOES_NOT_EXIST': activity_type = swf.models.ActivityType( self.domain, name=event['activity_type']['name'], version=event['activity_type']['version']) logger.info('creating activity type {} in domain {}'.format( activity_type.name, self.domain.name)) try: activity_type.save() except swf.exceptions.AlreadyExistsError: logger.info( 'oops: Activity type {} in domain {} already exists, creation failed, continuing...' .format(activity_type.name, self.domain.name)) return None logger.info('failed to schedule {}: {}'.format( event['activity_type']['name'], event['cause'], )) return None elif state == 'started': future.set_running() elif state == 'completed': result = event['result'] future.set_finished(json_loads_or_raw(result)) elif state == 'canceled': future.set_cancelled() elif state == 'failed': exception = exceptions.TaskFailed(name=event['id'], reason=event['reason'], details=event.get('details')) future.set_exception(exception) elif state == 'timed_out': exception = exceptions.TimeoutError(event['timeout_type'], event['timeout_value']) future.set_exception(exception) return future def _get_future_from_child_workflow_event(self, event): """Maps a child workflow event to a Future with the corresponding state. :param event: child workflow event :type event: dict[str, Any] """ future = futures.Future() state = event['state'] if state == 'start_initiated': pass # future._state = futures.PENDING elif state == 'start_failed': if event['cause'] == 'WORKFLOW_TYPE_DOES_NOT_EXIST': workflow_type = swf.models.WorkflowType( self.domain, name=event['name'], version=event['version'], ) logger.info('Creating workflow type {} in domain {}'.format( workflow_type.name, self.domain.name, )) try: workflow_type.save() except swf.exceptions.AlreadyExistsError: # Could have be created by a concurrent workflow execution. pass return None future.set_exception( exceptions.TaskFailed( name=event['id'], reason=event['cause'], details=event.get('details'), )) elif state == 'started': future.set_running() elif state == 'completed': future.set_finished(json_loads_or_raw(event['result'])) elif state == 'failed': future.set_exception( exceptions.TaskFailed( name=event['id'], reason=event['reason'], details=event.get('details'), )) elif state == 'timed_out': future.set_exception( exceptions.TimeoutError( event['timeout_type'], None, )) elif state == 'canceled': future.set_exception( exceptions.TaskCanceled(event.get('details'), )) elif state == 'terminated': future.set_exception(exceptions.TaskTerminated()) return future def _get_future_from_marker_event(self, a_task, event): """Maps a marker event to a Future with the corresponding state. :param a_task: currently unused :type a_task: :param event: marker event :type event: dict[str, Any] :rtype: futures.Future """ future = futures.Future() if not event: return future state = event['state'] if state == 'recorded': future.set_finished(event['details']) elif state == 'failed': future.set_exception( exceptions.TaskFailed( name=event['name'], reason=event['cause'], )) return future def get_future_from_signal_event(self, a_task, event): """Maps a signal event to a Future with the corresponding state. :param a_task: currently unused :type a_task: Optional[SignalTask] :param event: signal event :type event: dict[str, Any] :rtype: futures.Future """ future = futures.Future() if not event: return future state = event['state'] if state == 'signaled': future.set_finished(event['input']) return future def get_future_from_external_workflow_event(self, a_task, event): """Maps an external workflow event to a Future with the corresponding state. :param a_task: currently unused :type a_task: :param event: external workflow event :type event: dict[str, Any] :rtype: futures.Future """ future = futures.Future() if not event: return future state = event['state'] if state == 'signal_execution_initiated': # Don't re-initiate signal sending future.set_running() elif state == 'execution_signaled': future.set_finished(event['input']) elif state == 'signal_execution_failed': future.set_exception( exceptions.TaskFailed( name=event['name'], reason=event['cause'], )) return future def get_future_from_signal(self, signal_name): """ :param signal_name: :type signal_name: str :return: :rtype: futures.Future """ event = self._history.signals.get(signal_name) return self.get_future_from_signal_event(None, event) def find_activity_event(self, a_task, history): """ Get the event corresponding to a activity task, if any. :param a_task: :type a_task: ActivityTask :param history: :type history: simpleflow.history.History :return: :rtype: Optional[dict[str, Any]] """ activity = history.activities.get(a_task.id) return activity def find_child_workflow_event(self, a_task, history): """ Get the event corresponding to a child workflow, if any. :param a_task: :type a_task: WorkflowTask :param history: :type history: simpleflow.history.History :return: :rtype: Optional[dict] """ return history.child_workflows.get(a_task.id) def find_signal_event(self, a_task, history): """ Get the event corresponding to a signal, if any. :param a_task: :type a_task: SignalTask :param history: :type history: simpleflow.history.History :return: :rtype: Optional[dict] """ # FIXME could look directly in signaled_workflows? event = history.signals.get(a_task.name) if not event: if a_task.workflow_id is None: # Broadcast, should be in signals return None signaled_workflows = history.signaled_workflows.get( a_task.name, []) for w in signaled_workflows: if w['workflow_id'] == a_task.workflow_id and ( a_task.run_id is None or w['run_id'] == a_task.run_id): event = w break return event def find_marker_event(self, a_task, history): """ Get the event corresponding to a activity task, if any. :param a_task: :type a_task: MarkerTask :param history: :type history: simpleflow.history.History :return: :rtype: Optional[dict[str, Any]] """ json_details = a_task.get_json_details() marker_list = history.markers.get(a_task.name) if not marker_list: return None marker_list = list( filter( lambda m: m['state'] == 'recorded' and m['details'] == json_details, marker_list)) return marker_list[-1] if marker_list else None TASK_TYPE_TO_EVENT_FINDER = { ActivityTask: find_activity_event, WorkflowTask: find_child_workflow_event, SignalTask: find_signal_event, MarkerTask: find_marker_event, } def find_event(self, a_task, history): """ Get the event corresponding to an activity or child workflow, if any :param a_task: :type a_task: ActivityTask | WorkflowTask | SignalTask :param history: :type history: simpleflow.history.History :return: :rtype: Optional[dict] """ for typ in inspect.getmro(type(a_task)): finder = self.TASK_TYPE_TO_EVENT_FINDER.get(typ) if finder: return finder(self, a_task, history) raise TypeError('invalid type {} for task {}'.format( type(a_task), a_task)) def resume_activity(self, a_task, event): """ Resume an activity task. :param a_task: :type a_task: ActivityTask :param event: :type event: dict :return: :rtype: futures.Future | None """ future = self._get_future_from_activity_event(event) if not future: # schedule failed, maybe OK later. return None if not future.finished: # Still pending or running... return future if future.exception is None: # Result available! return future # Compare number of retries in history with configured max retries # NB: we used to do a strict comparison (==), but that can lead to # infinite retries in case the code is redeployed with a decreased # retry limit and a workflow has a already crossed the new limit. So # ">=" is better there. if event.get('retry', 0) >= a_task.activity.retry: if a_task.activity.raises_on_failure: raise exceptions.TaskException(a_task, future.exception) return future # with future.exception set. # Otherwise retry the task by scheduling it again. return None # means the task is not in SWF. def resume_child_workflow(self, a_task, event): """ Resume a child workflow. :param a_task: :type a_task: WorkflowTask :param event: :type event: dict :return: :rtype: Optional[simpleflow.futures.Future] """ future = self._get_future_from_child_workflow_event(event) if not future: # WORKFLOW_TYPE_DOES_NOT_EXIST, will be created return None if future.finished and future.exception: raise future.exception return future def schedule_task(self, a_task, task_list=None): """ Let a task schedule itself. If too many decisions are in flight, add a timer decision and raise ExecutionBlocked. :param a_task: :type a_task: ActivityTask | WorkflowTask | SignalTask | MarkerTask :param task_list: :type task_list: Optional[str] :raise: exceptions.ExecutionBlocked if too many decisions waiting """ if a_task.idempotent: task_identifier = (type(a_task), self.domain, a_task.id) if task_identifier in self._idempotent_tasks_to_submit: logger.debug('Not resubmitting task {}'.format(a_task.name)) return self._idempotent_tasks_to_submit.add(task_identifier) # if isinstance(a_task, SignalTask): # if a_task.workflow_id is None: # a_task.workflow_id = self._execution_context['workflow_id'] # if a_task.run_id is None: # a_task.run_id = self._execution_context['run_id'] # NB: ``decisions`` contains a single decision. decisions = a_task.schedule(self.domain, task_list, priority=self.current_priority) # Ready to schedule if isinstance(a_task, ActivityTask): self._open_activity_count += 1 elif isinstance(a_task, MarkerTask): self._append_timer = True # markers don't generate decisions, so force a wake-up timer # Check if we won't violate the 1MB limit on API requests ; if so, do NOT # schedule the requested task and block execution instead, with a timer # to wake up the workflow immediately after completing these decisions. # See: http://docs.aws.amazon.com/amazonswf/latest/developerguide/swf-dg-limits.html request_size = len(json.dumps(self._decisions + decisions)) # We keep a 5kB of error margin for headers, json structure, and the # timer decision, and 32kB for the context, even if we don't use it now. if request_size > constants.MAX_REQUEST_SIZE - 5000 - 32000: # TODO: at this point we may check that self._decisions is not empty # If it's the case, it means that a single decision was weighting # more than 900kB, so we have bigger problems. self._append_timer = True raise exceptions.ExecutionBlocked() self._decisions.extend(decisions) # Check if we won't exceed max decisions -1 # TODO: if we had exactly MAX_DECISIONS - 1 to take, this will wake up # the workflow for no reason. Evaluate if we can do better. if len(self._decisions) == constants.MAX_DECISIONS - 1: # We add a timer to wake up the workflow immediately after # completing these decisions. self._append_timer = True raise exceptions.ExecutionBlocked() def _add_start_timer_decision(self, id): timer = swf.models.decision.TimerDecision('start', id=id, start_to_fire_timeout='0') self._decisions.append(timer) EVENT_TYPE_TO_FUTURE = { 'activity': resume_activity, 'child_workflow': resume_child_workflow, 'signal': get_future_from_signal_event, 'external_workflow': get_future_from_external_workflow_event, 'marker': _get_future_from_marker_event, } def resume(self, a_task, *args, **kwargs): """Resume the execution of a task. Called by `submit`. If the task was scheduled, returns a future that wraps its state, otherwise schedules it. If in repair mode, we may fake the task to repair from the previous history. :param a_task: :type a_task: ActivityTask | WorkflowTask | SignalTask :param args: :param args: list :type kwargs: :type kwargs: dict :rtype: futures.Future :raise: exceptions.ExecutionBlocked if open activities limit reached """ if not a_task.id: # Can be already set (WorkflowTask) a_task.id = self._make_task_id(a_task, *args, **kwargs) event = self.find_event(a_task, self._history) logger.debug('executor: resume {}, event={}'.format(a_task, event)) future = None # in repair mode, check if we absolutely want to re-execute this task force_execution = (self.force_activities and self.force_activities.search(a_task.id)) # try to fill in the blanks with the workflow we're trying to repair if any # TODO: maybe only do that for idempotent tasks?? (not enough information to decide?) if not event and self.repair_with and not force_execution: # try to find a former event matching this task former_event = self.find_event(a_task, self.repair_with) # ... but only keep the event if the task was successful if former_event and former_event['state'] == 'completed': logger.info('faking task completed successfully in previous ' 'workflow: {}'.format(former_event['id'])) json_hash = hashlib.md5( json_dumps(former_event).encode('utf-8')).hexdigest() fake_task_list = "FAKE-" + json_hash # schedule task on a fake task list self.schedule_task(a_task, task_list=fake_task_list) future = futures.Future() # start a dedicated process to handle the fake activity run_fake_task_worker(self.domain.name, fake_task_list, former_event) # back to normal execution flow if event: ttf = self.EVENT_TYPE_TO_FUTURE.get(event['type']) if ttf: future = ttf(self, a_task, event) if event['type'] == 'activity': if future and future.state in (futures.PENDING, futures.RUNNING): self._open_activity_count += 1 if not future: self.schedule_task(a_task, task_list=self.task_list) future = futures.Future() # return a pending future. if self._open_activity_count == constants.MAX_OPEN_ACTIVITY_COUNT: logger.warning('limit of {} open activities reached'.format( constants.MAX_OPEN_ACTIVITY_COUNT)) raise exceptions.ExecutionBlocked return future def _compute_priority(self, priority_set_on_submit, a_task): """ Computes the correct task priority, with the following precedence (first is better/preferred): - priority set with self.submit(..., __priority=<N>) - priority set on the activity task decorator if any - priority set on the workflow execution - None otherwise :param priority_set_on_submit: :type priority_set_on_submit: str|int|PRIORITY_NOT_SET :param a_task: :type a_task: ActivityTask|WorkflowTask :returns: the priority for this task :rtype: str|int|None """ if priority_set_on_submit is not PRIORITY_NOT_SET: return priority_set_on_submit elif (isinstance(a_task, ActivityTask) and a_task.activity.task_priority is not PRIORITY_NOT_SET): return a_task.activity.task_priority elif self._workflow.task_priority is not PRIORITY_NOT_SET: return self._workflow.task_priority return None def submit(self, func, *args, **kwargs): """Register a function and its arguments for asynchronous execution. ``*args`` and ``**kwargs`` must be serializable in JSON. :type func: simpleflow.base.Submittable | Activity | Workflow """ # NB: we don't set self.current_priority here directly, because we need # to extract it from the underlying Activity() if it's not passed to # self.submit() ; we DO need to pop the "__priority" kwarg though, so it # doesn't pollute the rest of the code. priority_set_on_submit = kwargs.pop("__priority", PRIORITY_NOT_SET) # casts simpleflow.task.*Task to their equivalent in simpleflow.swf.task if not isinstance(func, SwfTask): if isinstance(func, base_task.ActivityTask): func = ActivityTask.from_generic_task(func) elif isinstance(func, base_task.WorkflowTask): func = WorkflowTask.from_generic_task(func) elif isinstance(func, base_task.SignalTask): func = SignalTask.from_generic_task(func, self._workflow_id, self._run_id, None, None) elif isinstance(func, base_task.MarkerTask): func = MarkerTask.from_generic_task(func) try: # do not use directly "Submittable" here because we want to catch if # we don't have an instance from a class known to work under simpleflow.swf if isinstance( func, (ActivityTask, WorkflowTask, SignalTask, MarkerTask)): # no need to wrap it, already wrapped in the correct format a_task = func elif isinstance(func, Activity): a_task = ActivityTask(func, *args, **kwargs) elif issubclass_(func, Workflow): a_task = WorkflowTask(self, func, *args, **kwargs) elif isinstance(func, WaitForSignal): future = self.get_future_from_signal(func.signal_name) logger.debug( 'submitted WaitForSignalTask({}): future={}'.format( func.signal_name, future)) return future elif isinstance(func, Submittable): raise TypeError( 'invalid type Submittable {} for {} (you probably wanted a simpleflow.swf.task.*Task)' .format(type(func), func)) else: raise TypeError('invalid type {} for {}'.format( type(func), func)) except exceptions.ExecutionBlocked: return futures.Future() # extract priority now that we have a *Task self.current_priority = self._compute_priority(priority_set_on_submit, a_task) # finally resume task return self.resume(a_task, *a_task.args, **a_task.kwargs) # TODO: check if really used or remove it def map(self, callable, iterable): """Submit *callable* with each of the items in ``*iterables``. All items in ``*iterables`` must be serializable in JSON. """ iterable = task.get_actual_value(iterable) return super(Executor, self).map(callable, iterable) # TODO: check if really used or remove it def starmap(self, callable, iterable): iterable = task.get_actual_value(iterable) return super(Executor, self).starmap(callable, iterable) def replay(self, decision_response, decref_workflow=True): """Replay the workflow from the start until it blocks. Called by the DeciderWorker. :param decision_response: an object wrapping the PollForDecisionTask response :type decision_response: swf.responses.Response :param decref_workflow : Decref workflow once replay is done (to save memory) :type decref_workflow : boolean :returns: a list of decision and a context dict (obsolete, empty) :rtype: ([swf.models.decision.base.Decision], dict) """ self.reset() history = decision_response.history self._history = History(history) self._history.parse() self.build_execution_context(decision_response) self._execution = decision_response.execution workflow_started_event = history[0] input = workflow_started_event.input if input is None: input = {} args = input.get('args', ()) kwargs = input.get('kwargs', {}) self.before_replay() try: self.propagate_signals() result = self.run_workflow(*args, **kwargs) except exceptions.ExecutionBlocked: logger.info('{} open activities ({} decisions)'.format( self._open_activity_count, len(self._decisions), )) self.after_replay() if decref_workflow: self.decref_workflow() if self._append_timer: self._add_start_timer_decision('_simpleflow_wake_up_timer') return self._decisions, {} except exceptions.TaskException as err: reason = 'Workflow execution error in task {}: "{}"'.format( err.task.name, getattr(err.exception, 'reason', repr(err.exception))) logger.exception(reason) details = getattr(err.exception, 'details', None) self.on_failure(reason, details) decision = swf.models.decision.WorkflowExecutionDecision() decision.fail( reason=swf.format.reason(reason), details=swf.format.details(details), ) self.after_closed() if decref_workflow: self.decref_workflow() return [decision], {} except Exception as err: reason = 'Cannot replay the workflow: {}({})'.format( err.__class__.__name__, err, ) tb = traceback.format_exc() details = 'Traceback:\n{}'.format(tb) logger.exception(reason + '\n' + details) self.on_failure(reason) decision = swf.models.decision.WorkflowExecutionDecision() decision.fail( reason=swf.format.reason(reason), details=swf.format.details(details), ) self.after_closed() if decref_workflow: self.decref_workflow() return [decision], {} self.after_replay() decision = swf.models.decision.WorkflowExecutionDecision() decision.complete(result=swf.format.result(json_dumps(result))) self.on_completed() self.after_closed() if decref_workflow: self.decref_workflow() return [decision], {} def decref_workflow(self): """ Set the `_workflow` ivar to None in the hope of reducing memory consumption. """ self._workflow = None def before_replay(self): return self._workflow.before_replay(self._history) def after_replay(self): return self._workflow.after_replay(self._history) def after_closed(self): return self._workflow.after_closed(self._history) def on_failure(self, reason, details=None): try: self._workflow.on_failure(self._history, reason, details) except NotImplementedError: pass def on_completed(self): try: self._workflow.on_completed(self._history) except NotImplementedError: pass def fail(self, reason, details=None): self.on_failure(reason, details) decision = swf.models.decision.WorkflowExecutionDecision() decision.fail( reason=swf.format.reason( 'Workflow execution failed: {}'.format(reason)), details=swf.format.details(details), ) self._decisions.append(decision) raise exceptions.ExecutionBlocked('workflow execution failed') def run(self, decision_response): return self.replay(decision_response) def get_execution_context(self): return self._execution_context def build_execution_context(self, decision_response): """ Extract data from the execution and history. :param decision_response: :type decision_response: swf.responses.Response """ execution = decision_response.execution if not execution: # For tests that don't provide an execution object. return history = decision_response.history workflow_started_event = history[0] self._execution_context = dict( name=execution.workflow_type.name, version=execution.workflow_type.version, workflow_id=execution.workflow_id, run_id=execution.run_id, tag_list=getattr(workflow_started_event, 'tag_list', None) or [], # attribute is absent if no tagList continued_execution_run_id=getattr(workflow_started_event, 'continued_execution_run_id', None), parent_workflow_id=getattr(workflow_started_event, 'parent_workflow_execution', {}).get('workflowId'), parent_run_id=getattr(workflow_started_event, 'parent_workflow_execution', {}).get('runId'), ) @property def _workflow_id(self): return self._execution_context.get('workflow_id') @property def _run_id(self): return self._execution_context.get('run_id') def signal(self, name, workflow_id=None, run_id=None, propagate=True, *args, **kwargs): """ Send a signal. :param name: :param workflow_id: :param run_id: :param propagate: :param args: :param kwargs: :return: """ logger.debug( 'signal: name={name}, workflow_id={workflow_id}, run_id={run_id}, propagate={propagate}' .format( name=name, workflow_id=workflow_id if workflow_id else self._workflow_id, run_id=run_id if workflow_id else self._run_id, propagate=propagate, )) extra_input = {'__propagate': False} if not propagate else None return SignalTask( name, workflow_id=workflow_id if workflow_id else self._workflow_id, run_id=run_id if workflow_id else self._run_id, extra_input=extra_input, *args, **kwargs) def wait_signal(self, name): logger.debug('{} - wait_signal({})'.format(self._workflow_id, name)) return WaitForSignal(name) def propagate_signals(self): """ Send every signals we got to our parent and children. Don't send to workflows present in history.signaled_workflows. """ history = self._history if not history.signals: return known_workflows_ids = [] if self._execution_context['parent_workflow_id']: known_workflows_ids.append( (self._execution_context['parent_workflow_id'], self._execution_context['parent_run_id'])) known_workflows_ids.extend((w['workflow_id'], w['run_id']) for w in history.child_workflows.values() if w['state'] == 'started') known_workflows_ids = frozenset(known_workflows_ids) for signal in history.signals.values(): input = signal['input'] propagate = input.get('__propagate', True) if not propagate: continue name = signal['name'] orig_workflow_id = input.get('__workflow_id') orig_run_id = input.get('__run_id') input = { 'args': input.get('args'), 'kwargs': input.get('kwargs'), '__workflow_id': self._workflow_id, '__run_id': self._run_id, } sender = (signal['external_workflow_id'] or orig_workflow_id, signal['external_run_id'] or orig_run_id) signaled_workflows_ids = set( (w['workflow_id'], w['run_id']) for w in history.signaled_workflows[name]) signaled_workflows_ids.add((orig_workflow_id, orig_run_id)) not_signaled_workflows_ids = list(known_workflows_ids - signaled_workflows_ids - {sender}) for workflow_id, run_id in not_signaled_workflows_ids: try: self._execution.signal( signal_name=name, input=input, workflow_id=workflow_id, run_id=run_id, ) except swf.models.workflow.WorkflowExecutionDoesNotExist: logger.info('Workflow {} {} disappeared'.format( workflow_id, run_id)) def record_marker(self, name, details=None): return MarkerTask(name, details) def list_markers(self, all=False): if all: return [ Marker(m['name'], json_loads_or_raw(m['details'])) for ml in self._history.markers.values() for m in ml ] rc = [] for ml in self._history.markers.values(): m = ml[-1] if m['state'] == 'recorded': rc.append(Marker(m['name'], json_loads_or_raw(m['details']))) return rc
class Executor(executor.Executor): """ Executes all tasks synchronously in a single local process. """ def __init__(self, workflow_class): super(Executor, self).__init__(workflow_class) self.update_workflow_class() self.nb_activities = 0 self.signals_sent = set() def update_workflow_class(self): """ Returns the workflow class with all the needed attributes for swf.models.history.builder.History() This allows to get a SWF-compatible history in local executions so that the metrology feature works correctly. """ cls = self._workflow_class for attr in ( "decision_tasks_timeout", "execution_timeout", ): if not hasattr(cls, attr): setattr(cls, attr, None) return cls def initialize_history(self, input): self._history = builder.History(self._workflow_class, input=input) def submit(self, func, *args, **kwargs): logger.info('executing task {}(args={}, kwargs={})'.format( func, args, kwargs)) future = futures.Future() context = self.get_execution_context() context["activity_id"] = str(self.nb_activities) self.nb_activities += 1 # Ensure signals ordering if isinstance(func, SignalTask): self.signals_sent.add(func.name) elif isinstance(func, WaitForSignal): signal_name = func.signal_name if signal_name not in self.signals_sent: raise NotImplementedError( 'wait_signal({}) before signal was sent: unsupported by the local executor' .format(signal_name)) if isinstance(func, Submittable): task = func # *args, **kwargs already resolved. task.context = context func = getattr(task, 'activity', None) elif isinstance(func, Activity): task = ActivityTask(func, context=context, *args, **kwargs) elif issubclass(func, Workflow): task = WorkflowTask(self, func, *args, **kwargs) else: raise TypeError('invalid type {} for {}'.format(type(func), func)) try: future._result = task.execute() state = 'completed' except Exception as err: future._exception = err logger.info('rescuing exception: {}'.format(err)) if isinstance(func, Activity) and func.raises_on_failure: message = err.args[0] if err.args else '' raise exceptions.TaskFailed(func.name, message) state = 'failed' finally: future._state = futures.FINISHED self._history.add_activity_task(func, decision_id=None, last_state=state, activity_id=context["activity_id"], input={ 'args': args, 'kwargs': kwargs }, result=future._result) return future def run(self, input=None): if input is None: input = {} args = input.get('args', ()) kwargs = input.get('kwargs', {}) self.create_workflow() self.initialize_history(input) self.before_replay() result = self.run_workflow(*args, **kwargs) # Hack: self._history must be available to the callback as a # simpleflow.history.History, not a swf.models.history.builder.History self._history = History(self._history) self._history.parse() self.after_replay() self.on_completed() self.after_closed() return result def after_closed(self): return self._workflow.after_closed(self._history) def get_execution_context(self): return { "name": "local", "version": "1.0", "run_id": "local", "workflow_id": "local", "tag_list": [] } def signal(self, name, *args, **kwargs): return SignalTask(name, *args, **kwargs) def wait_signal(self, name): return WaitForSignal(name)
class Executor(executor.Executor): """ Executes all tasks synchronously in a single local process. """ def __init__(self, workflow_class): super(Executor, self).__init__(workflow_class) self.update_workflow_class() self.nb_activities = 0 self.signals_sent = set() self._markers = collections.OrderedDict() self.wf_run_id = [] self.wf_id = [] def update_workflow_class(self): """ Returns the workflow class with all the needed attributes for swf.models.history.builder.History() This allows to get a SWF-compatible history in local executions so that the metrology feature works correctly. """ cls = self._workflow_class for attr in ("decision_tasks_timeout", "execution_timeout", ): if not hasattr(cls, attr): setattr(cls, attr, None) return cls def initialize_history(self, input): self._history = builder.History( self._workflow_class, input=input) def on_new_workflow(self, task): self.wf_run_id.append("{}".format(uuid.uuid4())) self.wf_id.append( task.id if task.id else "local_{}".format(task.workflow.name.lower()), ) def on_completed_workflow(self): self.wf_run_id.pop() self.wf_id.pop() def submit(self, func, *args, **kwargs): logger.info('executing task {}(args={}, kwargs={})'.format( func, args, kwargs)) future = futures.Future() context = self.get_run_context() context["activity_id"] = str(self.nb_activities) self.nb_activities += 1 # Ensure signals ordering if isinstance(func, SignalTask): self.signals_sent.add(func.name) elif isinstance(func, WaitForSignal): signal_name = func.signal_name if signal_name not in self.signals_sent: raise NotImplementedError( 'wait_signal({}) before signal was sent: unsupported by the local executor'.format(signal_name) ) elif isinstance(func, MarkerTask): self._markers.setdefault(func.name, []).append(Marker(func.name, func.details)) if isinstance(func, Submittable): task = func # *args, **kwargs already resolved. task.context = context func = getattr(task, 'activity', None) elif isinstance(func, Activity): task = ActivityTask(func, context=context, *args, **kwargs) elif issubclass(func, Workflow): task = WorkflowTask(self, func, *args, **kwargs) else: raise TypeError('invalid type {} for {}'.format( type(func), func)) if isinstance(task, WorkflowTask): self.on_new_workflow(task) try: future._result = task.execute() if hasattr(task, 'post_execute'): task.post_execute() state = 'completed' except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() future._exception = exc_value logger.exception('rescuing exception: {}'.format(exc_value)) if (isinstance(func, Activity) or issubclass_(func, Workflow)) and getattr(func, 'raises_on_failure', None): tb = traceback.format_tb(exc_traceback) message = format_exc(exc_value) details = json_dumps( { 'error': exc_type.__name__, 'message': str(exc_value), 'traceback': tb, }, default=repr ) raise exceptions.TaskFailed( func.name, message, details, ) state = 'failed' finally: if isinstance(task, WorkflowTask): self.on_completed_workflow() future._state = futures.FINISHED if func: self._history.add_activity_task( func, decision_id=None, last_state=state, activity_id=context["activity_id"], input={'args': args, 'kwargs': kwargs}, result=future.result) return future def run(self, input=None): if input is None: input = {} args = input.get('args', ()) kwargs = input.get('kwargs', {}) self.create_workflow() self.initialize_history(input) self.before_replay() result = self.run_workflow(*args, **kwargs) # Hack: self._history must be available to the callback as a # simpleflow.history.History, not a swf.models.history.builder.History self._history = History(self._history) self._history.parse() self.after_replay() self.on_completed() self.after_closed() return result def after_closed(self): return self._workflow.after_closed(self._history) def get_run_context(self): return { "name": "local", "version": "1.0", "run_id": self.wf_run_id[-1] if self.wf_run_id else "local", "workflow_id": self.wf_id[-1] if self.wf_id else "local", "tag_list": [] } def signal(self, name, *args, **kwargs): return SignalTask(name, *args, **kwargs) def wait_signal(self, name): return WaitForSignal(name) def record_marker(self, name, details=None): return MarkerTask(name, details) def list_markers(self, all=False): if all: return [m for ml in self._markers.values() for m in ml] return [m[-1] for m in self._markers.values()] def get_event_details(self, event_type, event_name): return None # To be implemented if needed
class Executor(executor.Executor): """ Manage a workflow's execution with Amazon SWF. It replays the workflow's definition from the start until it blocks (i.e. raises :py:class:`exceptions.ExecutionBlocked`). SWF stores the history of all events that occurred in the workflow and passes it to the executor. Only one executor handles a workflow at a time. It means the history is consistent and there is no concurrent modifications on the execution of the workflow. """ def __init__(self, domain, workflow, task_list=None, repair_with=None, force_activities=None): super(Executor, self).__init__(workflow) self.domain = domain self.task_list = task_list self.repair_with = repair_with if force_activities: self.force_activities = re.compile(force_activities) else: self.force_activities = None self.reset() def reset(self): """ Clears the state of the execution. It is required to ensure the id of the tasks are assigned the same way on each replay. """ self._open_activity_count = 0 self._decisions = [] self._tasks = TaskRegistry() def _make_task_id(self, a_task, *args, **kwargs): """ Assign a new ID to *a_task*. :type a_task: ActivityTask | WorkflowTask :returns: String with at most 256 characters. """ if not a_task.idempotent: # If idempotency is False or unknown, let's generate a task id by # incrementing and id after the a_task name. # (default strategy, backwards compatible with previous versions) suffix = self._tasks.add(a_task) else: # If a_task is idempotent, we can do better and hash arguments. # It makes the workflow resistant to retries or variations on the # same task name (see #11). arguments = json_dumps({"args": args, "kwargs": kwargs}) suffix = hashlib.md5(arguments).hexdigest() task_id = '{name}-{idx}'.format(name=a_task.name, idx=suffix) return task_id def _get_future_from_activity_event(self, event): """Maps an activity event to a Future with the corresponding state. :param event: workflow event. :type event: swf.event.Event. """ future = futures.Future() # state is PENDING. state = event['state'] if state == 'scheduled': future._state = futures.PENDING elif state == 'schedule_failed': if event['cause'] == 'ACTIVITY_TYPE_DOES_NOT_EXIST': activity_type = swf.models.ActivityType( self.domain, name=event['activity_type']['name'], version=event['activity_type']['version']) logger.info('creating activity type {} in domain {}'.format( activity_type.name, self.domain.name)) try: activity_type.save() except swf.exceptions.AlreadyExistsError: logger.info( 'oops: Activity type {} in domain {} already exists, creation failed, continuing...' .format(activity_type.name, self.domain.name)) return None logger.info('failed to schedule {}: {}'.format( event['activity_type']['name'], event['cause'], )) return None elif state == 'started': future._state = futures.RUNNING elif state == 'completed': future._state = futures.FINISHED result = event['result'] future._result = json.loads(result) if result else None elif state == 'canceled': future._state = futures.CANCELLED elif state == 'failed': future._state = futures.FINISHED future._exception = exceptions.TaskFailed( name=event['id'], reason=event['reason'], details=event.get('details')) elif state == 'timed_out': future._state = futures.FINISHED future._exception = exceptions.TimeoutError( event['timeout_type'], event['timeout_value']) return future @staticmethod def _get_future_from_child_workflow_event(event): """Maps a child workflow event to a Future with the corresponding state. """ future = futures.Future() state = event['state'] if state == 'start_initiated': future._state = futures.PENDING elif state == 'started': future._state = futures.RUNNING elif state == 'completed': future._state = futures.FINISHED future._result = json.loads(event['result']) return future @staticmethod def find_activity_event(a_task, history): activity = history._activities.get(a_task.id) return activity @staticmethod def find_child_workflow_event(a_task, history): return history._child_workflows.get(a_task.id) def find_event(self, a_task, history): if isinstance(a_task, ActivityTask): return self.find_activity_event(a_task, history) elif isinstance(a_task, WorkflowTask): return self.find_child_workflow_event(a_task, history) else: raise TypeError('invalid type {} for task {}'.format( type(a_task), a_task)) def resume_activity(self, a_task, event): future = self._get_future_from_activity_event(event) if not future: # Task in history does not count. return None if not future.finished: # Still pending or running... return future if future.exception is None: # Result available! return future # Compare number of retries in history with configured max retries # NB: we used to do a strict comparison (==), but that can lead to # infinite retries in case the code is redeployed with a decreased # retry limit and a workflow has a already crossed the new limit. So # ">=" is better there. if event.get('retry', 0) >= a_task.activity.retry: if a_task.activity.raises_on_failure: raise exceptions.TaskException(a_task, future.exception) return future # with future.exception set. # Otherwise retry the task by scheduling it again. return None # means the task is not in SWF. def resume_child_workflow(self, a_task, event): return self._get_future_from_child_workflow_event(event) def schedule_task(self, a_task, task_list=None): logger.debug('executor is scheduling task {} on task_list {}'.format( a_task.name, task_list, )) decisions = a_task.schedule(self.domain, task_list) # ``decisions`` contains a single decision. self._decisions.extend(decisions) self._open_activity_count += 1 if len(self._decisions) == constants.MAX_DECISIONS - 1: # We add a timer to wake up the workflow immediately after # completing these decisions. timer = swf.models.decision.TimerDecision( 'start', id='resume-after-{}'.format(a_task.id), start_to_fire_timeout='0') self._decisions.append(timer) raise exceptions.ExecutionBlocked() def resume(self, a_task, *args, **kwargs): """Resume the execution of a task. If the task was scheduled, returns a future that wraps its state, otherwise schedules it. """ a_task.id = self._make_task_id(a_task, *args, **kwargs) event = self.find_event(a_task, self._history) future = None # check if we absolutely want to execute this task in repair mode force_execution = self.force_activities and \ self.force_activities.search(a_task.id) # try to fill in the blanks with the workflow we're trying to repair if any # TODO: maybe only do that for idempotent tasks?? if not event and self.repair_with and not force_execution: # try to find a former event matching this task former_event = self.find_event(a_task, self.repair_with) # ... but only keep the event if the task was successful if former_event and former_event['state'] == 'completed': logger.info( 'faking task completed successfully in previous ' \ 'workflow: {}'.format(former_event['id']) ) json_hash = hashlib.md5(json_dumps(former_event)).hexdigest() fake_task_list = "FAKE-" + json_hash # schedule task on a fake task list self.schedule_task(a_task, task_list=fake_task_list) future = futures.Future() # start a dedicated process to handle the fake activity run_fake_task_worker(self.domain.name, fake_task_list, former_event) # back to normal execution flow if event: if event['type'] == 'activity': future = self.resume_activity(a_task, event) if future and future._state in (futures.PENDING, futures.RUNNING): self._open_activity_count += 1 elif event['type'] == 'child_workflow': future = self.resume_child_workflow(a_task, event) if not future: self.schedule_task(a_task, task_list=self.task_list) future = futures.Future() # return a pending future. if self._open_activity_count == constants.MAX_OPEN_ACTIVITY_COUNT: logger.warning('limit of {} open activities reached'.format( constants.MAX_OPEN_ACTIVITY_COUNT)) raise exceptions.ExecutionBlocked return future def submit(self, func, *args, **kwargs): """Register a function and its arguments for asynchronous execution. ``*args`` and ``**kwargs`` must be serializable in JSON. """ try: if isinstance(func, Activity): a_task = ActivityTask(func, *args, **kwargs) elif issubclass_(func, Workflow): a_task = WorkflowTask(func, *args, **kwargs) else: raise TypeError('invalid type {} for {}'.format( type(func), func)) except exceptions.ExecutionBlocked: return futures.Future() return self.resume(a_task, *a_task.args, **a_task.kwargs) # TODO: check if really used or remove it def map(self, callable, iterable): """Submit *callable* with each of the items in ``*iterables``. All items in ``*iterables`` must be serializable in JSON. """ iterable = task.get_actual_value(iterable) return super(Executor, self).map(callable, iterable) # TODO: check if really used or remove it def starmap(self, callable, iterable): iterable = task.get_actual_value(iterable) return super(Executor, self).starmap(callable, iterable) def replay(self, decision_response): """Executes the workflow from the start until it blocks. :param decision_response: an object wrapping the PollForDecisionTask response :type decision_response: swf.responses.Response :returns: a list of decision and a context dict :rtype: ([swf.models.decision.base.Decision], dict) """ self.reset() history = decision_response.history self._history = History(history) self._history.parse() workflow_started_event = history[0] input = workflow_started_event.input if input is None: input = {} args = input.get('args', ()) kwargs = input.get('kwargs', {}) self.before_replay() try: result = self.run_workflow(*args, **kwargs) except exceptions.ExecutionBlocked: logger.info('{} open activities ({} decisions)'.format( self._open_activity_count, len(self._decisions), )) self.after_replay() return self._decisions, {} except exceptions.TaskException as err: reason = 'Workflow execution error in task {}: "{}"'.format( err.task.name, getattr(err.exception, 'reason', repr(err.exception))) logger.exception(reason) details = getattr(err.exception, 'details', None) self.on_failure(reason, details) decision = swf.models.decision.WorkflowExecutionDecision() decision.fail( reason=swf.format.reason(reason), details=swf.format.details(details), ) self.after_closed() return [decision], {} except Exception as err: reason = 'Cannot replay the workflow: {}({})'.format( err.__class__.__name__, err, ) tb = traceback.format_exc() details = 'Traceback:\n{}'.format(tb) logger.exception(reason + '\n' + details) self.on_failure(reason) decision = swf.models.decision.WorkflowExecutionDecision() decision.fail( reason=swf.format.reason(reason), details=swf.format.details(details), ) self.after_closed() return [decision], {} self.after_replay() decision = swf.models.decision.WorkflowExecutionDecision() decision.complete(result=swf.format.result(json.dumps(result))) self.on_completed() self.after_closed() return [decision], {} def before_replay(self): return self._workflow.before_replay(self._history) def after_replay(self): return self._workflow.after_replay(self._history) def after_closed(self): return self._workflow.after_closed(self._history) def on_failure(self, reason, details=None): try: self._workflow.on_failure(self._history, reason, details) except NotImplementedError: pass def on_completed(self): try: self._workflow.on_completed(self._history) except NotImplementedError: pass def fail(self, reason, details=None): self.on_failure(reason, details) decision = swf.models.decision.WorkflowExecutionDecision() decision.fail( reason=swf.format.reason( 'Workflow execution failed: {}'.format(reason)), details=swf.format.details(details), ) self._decisions.append(decision) raise exceptions.ExecutionBlocked('workflow execution failed') def run(self, decision_response): return self.replay(decision_response)
class Executor(executor.Executor): """ Manage a workflow's execution with Amazon SWF. It replays the workflow's definition from the start until it blocks (i.e. raises :py:class:`exceptions.ExecutionBlocked`). SWF stores the history of all events that occurred in the workflow and passes it to the executor. Only one executor handles a workflow at a time. It means the history is consistent and there is no concurrent modifications on the execution of the workflow. """ def __init__(self, domain, workflow, task_list=None): super(Executor, self).__init__(workflow) self._tasks = TaskRegistry() self.domain = domain self.task_list = task_list def reset(self): """ Clears the state of the execution. It is required to ensure the id of the tasks are assigned the same way on each replay. """ self._open_activity_count = 0 self._decisions = [] self._tasks = TaskRegistry() def _make_task_id(self, task, *args, **kwargs): """ Assign a new ID to *task*. :returns: String with at most 256 characters. """ if not task.idempotent: # If idempotency is False or unknown, let's generate a task id by # incrementing and id after the task name. # (default strategy, backwards compatible with previous versions) suffix = self._tasks.add(task) else: # If task is idempotent, we can do better and hash arguments. # It makes the workflow resistant to retries or variations on the # same task name (see #11). arguments = json.dumps({"args": args, "kwargs": kwargs}) suffix = hashlib.md5(arguments).hexdigest() task_id = '{name}-{idx}'.format(name=task.name, idx=suffix) return task_id def _get_future_from_activity_event(self, event, task): """Maps an activity event to a Future with the corresponding state. :param event: workflow event. :type event: swf.event.Event. """ future = futures.Future() # state is PENDING. state = event['state'] if state == 'scheduled': future._state = futures.PENDING elif state == 'schedule_failed': if event['cause'] == 'ACTIVITY_TYPE_DOES_NOT_EXIST': activity_type = swf.models.ActivityType( self.domain, name=event['activity_type']['name'], version=event['activity_type']['version']) logger.info('creating activity type {} in domain {}'.format( activity_type.name, self.domain.name)) try: activity_type.save() except swf.exceptions.AlreadyExistsError: logger.info( 'oops: Activity type {} in domain {} already exists, creation failed, continuing...'.format( activity_type.name, self.domain.name)) return None logger.info('failed to schedule {}: {}'.format( event['activity_type']['name'], event['cause'], )) return None elif state == 'started': future._state = futures.RUNNING elif state == 'completed': future._state = futures.FINISHED result = event['result'] future._result = json.loads(result) if result else None elif state == 'canceled': future._state = futures.CANCELLED elif state == 'failed': future._state = futures.FINISHED future._exception = exceptions.TaskFailed( name=event['id'], reason=event['reason'], details=event.get('details')) elif state == 'timed_out': future._state = futures.FINISHED future._exception = exceptions.TimeoutError( event['timeout_type'], event['timeout_value']) return future def _get_future_from_child_workflow_event(self, event): """Maps a child workflow event to a Future with the corresponding state. """ future = futures.Future() state = event['state'] if state == 'start_initiated': future._state = futures.PENDING elif state == 'started': future._state = futures.RUNNING elif state == 'completed': future._state = futures.FINISHED future._result = json.loads(event['result']) return future def find_activity_event(self, task, history): activity = history._activities.get(task.id) return activity def find_child_workflow_event(self, task, history): return history._child_workflows.get(task.id) def find_event(self, task, history): if isinstance(task, ActivityTask): return self.find_activity_event(task, history) elif isinstance(task, WorkflowTask): return self.find_child_workflow_event(task, history) else: return TypeError('invalid type {} for task {}'.format( type(task), task)) return None def resume_activity(self, task, event): future = self._get_future_from_activity_event(event, task) if not future: # Task in history does not count. return None if not future.finished: # Still pending or running... return future if future.exception is None: # Result available! return future if event.get('retry', 0) == task.activity.retry: # No more retry! if task.activity.raises_on_failure: raise exceptions.TaskException(task, future.exception) return future # with future.exception set. # Otherwise retry the task by scheduling it again. return None # means the is not in SWF. def resume_child_workflow(self, task, event): return self._get_future_from_child_workflow_event(event) def schedule_task(self, task, task_list=None): logger.debug('executor is scheduling task {} on task_list {}'.format( task.name, task_list, )) decisions = task.schedule(self.domain, task_list) # ``decisions`` contains a single decision. self._decisions.extend(decisions) self._open_activity_count += 1 if len(self._decisions) == constants.MAX_DECISIONS - 1: # We add a timer to wake up the workflow immediately after # completing these decisions. timer = swf.models.decision.TimerDecision( 'start', id='resume-after-{}'.format(task.id), start_to_fire_timeout='0') self._decisions.append(timer) raise exceptions.ExecutionBlocked() def resume(self, task, *args, **kwargs): """Resume the execution of a task. If the task was scheduled, returns a future that wraps its state, otherwise schedules it. """ task.id = self._make_task_id(task, *args, **kwargs) event = self.find_event(task, self._history) future = None if event: if event['type'] == 'activity': future = self.resume_activity(task, event) if future and future._state in (futures.PENDING, futures.RUNNING): self._open_activity_count += 1 if future and future.cancelled: cancel_decision = swf.models.decision.WorkflowExecutionDecision() cancel_decision.cancel() self._decisions.append(cancel_decision) raise exceptions.ExecutionBlocked() elif event['type'] == 'child_workflow': future = self.resume_child_workflow(task, event) if not future: self.schedule_task(task, task_list=self.task_list) future = futures.Future() # return a pending future. if self._open_activity_count == constants.MAX_OPEN_ACTIVITY_COUNT: logger.warning('limit of {} open activities reached'.format( constants.MAX_OPEN_ACTIVITY_COUNT)) raise exceptions.ExecutionBlocked return future def submit(self, func, *args, **kwargs): """Register a function and its arguments for asynchronous execution. ``*args`` and ``**kwargs`` must be serializable in JSON. """ try: if isinstance(func, Activity): task = ActivityTask(func, *args, **kwargs) elif issubclass(func, Workflow): task = WorkflowTask(func, *args, **kwargs) else: # NB: isinstance() and issubclass() may raise a TypeError too # hence the try/except reraising a TypeError. Found reason in # commit 8faa8636. # TODO: see if we can avoid that, that hides TypeError's in # tasks creation, which is annoying, because the re-raised # exception can be misleading in that case. raise TypeError except exceptions.ExecutionBlocked: return futures.Future() except TypeError: raise TypeError('invalid type {} for {}'.format( type(func), func)) return self.resume(task, *task.args, **task.kwargs) # TODO: check if really used or remove it def map(self, callable, iterable): """Submit *callable* with each of the items in ``*iterables``. All items in ``*iterables`` must be serializable in JSON. """ iterable = task.get_actual_value(iterable) return super(Executor, self).map(callable, iterable) # TODO: check if really used or remove it def starmap(self, callable, iterable): iterable = task.get_actual_value(iterable) return super(Executor, self).starmap(callable, iterable) def replay(self, history): """Executes the workflow from the start until it blocks. """ self.reset() self._history = History(history) self._history.parse() workflow_started_event = history[0] args = () kwargs = {} input = workflow_started_event.input if input is None: input = {} args = input.get('args', ()) kwargs = input.get('kwargs', {}) # check if there is a workflow cancellation request if self._history.is_cancel_requested: # list all the running activities cancellable_activities_id = self._history.list_cancellable_activities() if len(cancellable_activities_id) == 0: # nothing to cancel, completing the workflow as cancelled cancel_decision = swf.models.decision.WorkflowExecutionDecision() cancel_decision.cancel() logger.info('Sucessfully canceled the workflow.') return [cancel_decision], {} cancel_activities_decisions = [] for activity_id in cancellable_activities_id: # send cancel request to each of them decision = swf.models.decision.ActivityTaskDecision( 'request_cancel', activity_id=activity_id, ) cancel_activities_decisions.append(decision) return cancel_activities_decisions, {} # handle workflow on start delay if self._workflow.delayed_start_timer > 0: if 'delayed_start_timer' not in self._history._timers: logger.info('Scheduling delayed start decision.') timer = swf.models.decision.TimerDecision( 'start', id='delayed_start_timer', start_to_fire_timeout=str(self._workflow.delayed_start_timer)) self._decisions.append(timer) return self._decisions, {} elif self._history._timers['delayed_start_timer']['state'] != 'fired': # wait for the timer event, no-op logger.info('Timer has not fired yet.') return [], {} if self._history.is_workflow_started: # the workflow has just started self.on_start(args, kwargs) # workflow not cancelled try: result = self.run_workflow(*args, **kwargs) except exceptions.ExecutionBlocked: logger.info('{} open activities ({} decisions)'.format( self._open_activity_count, len(self._decisions), )) return self._decisions, {} except exceptions.TaskException, err: reason = 'Workflow execution error in task {}: "{}"'.format( err.task.name, getattr(err.exception, 'reason', repr(err.exception))) logger.info(reason) details = getattr(err.exception, 'details', None) self.on_failure(reason, details, args, kwargs) decision = swf.models.decision.WorkflowExecutionDecision() if self._workflow.is_daemon: # do not fail daemon workflow logger.info('Task failed. Re-running continue_as_new for the daemon workflow.') decision.continue_as_new( input=input, task_list={ 'name': self.task_list }, task_timeout=str(self._workflow.decision_tasks_timeout), execution_timeout=str(self._workflow.execution_timeout), workflow_type_version=str(self._workflow.version)) else: decision.fail( reason=swf.format.reason(reason), details=swf.format.details(details), ) return [decision], {} except Exception, err: reason = 'Cannot replay the workflow: {}({})'.format( err.__class__.__name__, err, ) tb = traceback.format_exc() details = 'Traceback:\n{}'.format(tb) logger.exception(reason + '\n' + details) self.on_failure(reason, details, args, kwargs) decision = swf.models.decision.WorkflowExecutionDecision() if self._workflow.is_daemon: # do not fail daemon workflow logger.info('Unexpected workflow error. Re-running continue_as_new for the daemon workflow.') decision.continue_as_new(input=input, task_list={ 'name': self.task_list }, task_timeout=str(self._workflow.decision_tasks_timeout)) else: decision.fail( reason=swf.format.reason(reason), details=swf.format.details(details), ) return [decision], {}
class Executor(executor.Executor): """ Executes all tasks synchronously in a single local process. """ def __init__(self, workflow_class): super(Executor, self).__init__(workflow_class) self.update_workflow_class() self.nb_activities = 0 self.signals_sent = set() self._markers = collections.OrderedDict() self.wf_run_id = [] self.wf_id = [] def update_workflow_class(self): """ Returns the workflow class with all the needed attributes for swf.models.history.builder.History() This allows to get a SWF-compatible history in local executions so that the metrology feature works correctly. """ cls = self._workflow_class for attr in ( "decision_tasks_timeout", "execution_timeout", ): if not hasattr(cls, attr): setattr(cls, attr, None) return cls def initialize_history(self, input): self._history = builder.History(self._workflow_class, input=input) def on_new_workflow(self, task): self.wf_run_id.append("{}".format(uuid.uuid4())) self.wf_id.append( task.id if task.id else "local_{}".format(task.workflow.name.lower()), ) def on_completed_workflow(self): self.wf_run_id.pop() self.wf_id.pop() def submit(self, func, *args, **kwargs): logger.info('executing task {}(args={}, kwargs={})'.format( func, args, kwargs)) future = futures.Future() context = self.get_run_context() context["activity_id"] = str(self.nb_activities) self.nb_activities += 1 # Ensure signals ordering if isinstance(func, SignalTask): self.signals_sent.add(func.name) elif isinstance(func, WaitForSignal): signal_name = func.signal_name if signal_name not in self.signals_sent: raise NotImplementedError( 'wait_signal({}) before signal was sent: unsupported by the local executor' .format(signal_name)) elif isinstance(func, MarkerTask): self._markers.setdefault(func.name, []).append(Marker(func.name, func.details)) if isinstance(func, Submittable): task = func # *args, **kwargs already resolved. task.context = context func = getattr(task, 'activity', None) elif isinstance(func, Activity): task = ActivityTask(func, context=context, *args, **kwargs) elif issubclass(func, Workflow): task = WorkflowTask(self, func, *args, **kwargs) else: raise TypeError('invalid type {} for {}'.format(type(func), func)) if isinstance(task, WorkflowTask): self.on_new_workflow(task) try: future._result = task.execute() if hasattr(task, 'post_execute'): task.post_execute() state = 'completed' except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() future._exception = exc_value logger.exception('rescuing exception: {}'.format(exc_value)) if (isinstance(func, Activity) or issubclass_(func, Workflow)) and getattr( func, 'raises_on_failure', None): tb = traceback.format_tb(exc_traceback) message = format_exc(exc_value) details = json_dumps( { 'error': exc_type.__name__, 'message': str(exc_value), 'traceback': tb, }, default=repr) raise exceptions.TaskFailed( func.name, message, details, ) state = 'failed' finally: if isinstance(task, WorkflowTask): self.on_completed_workflow() future._state = futures.FINISHED if func: self._history.add_activity_task(func, decision_id=None, last_state=state, activity_id=context["activity_id"], input={ 'args': args, 'kwargs': kwargs }, result=future.result) return future def run(self, input=None): if input is None: input = {} args = input.get('args', ()) kwargs = input.get('kwargs', {}) self.create_workflow() self.initialize_history(input) self.before_replay() result = self.run_workflow(*args, **kwargs) # Hack: self._history must be available to the callback as a # simpleflow.history.History, not a swf.models.history.builder.History self._history = History(self._history) self._history.parse() self.after_replay() self.on_completed() self.after_closed() return result def after_closed(self): return self._workflow.after_closed(self._history) def get_run_context(self): return { "name": "local", "version": "1.0", "run_id": self.wf_run_id[-1] if self.wf_run_id else "local", "workflow_id": self.wf_id[-1] if self.wf_id else "local", "tag_list": [] } def signal(self, name, *args, **kwargs): return SignalTask(name, *args, **kwargs) def wait_signal(self, name): return WaitForSignal(name) def record_marker(self, name, details=None): return MarkerTask(name, details) def list_markers(self, all=False): if all: return [m for ml in self._markers.values() for m in ml] return [m[-1] for m in self._markers.values()] def get_event_details(self, event_type, event_name): return None # To be implemented if needed
class Executor(executor.Executor): """ Manage a workflow's execution with Amazon SWF. It replays the workflow's definition from the start until it blocks (i.e. raises :py:class:`exceptions.ExecutionBlocked`). SWF stores the history of all events that occurred in the workflow and passes it to the executor. Only one executor handles a workflow at a time. It means the history is consistent and there is no concurrent modifications on the execution of the workflow. """ def __init__(self, domain, workflow): super(Executor, self).__init__(workflow) self._tasks = TaskRegistry() self.domain = domain def reset(self): """ Clears the state of the execution. It is required to ensure the id of the tasks are assigned the same way on each replay. """ self._decisions = [] self._tasks = TaskRegistry() def _make_task_id(self, task): """ Assign a new ID to *task*. :returns: String with at most 256 characters. """ index = self._tasks.add(task) task_id = '{name}-{idx}'.format(name=task.name, idx=index) return task_id def _get_future_from_activity_event(self, event): """Maps an activity event to a Future with the corresponding state. :param event: workflow event. :type event: swf.event.Event. """ future = futures.Future() state = event['state'] if state == 'scheduled': future._state = futures.PENDING elif state == 'schedule_failed': if event['cause'] == 'ACTIVITY_TYPE_DOES_NOT_EXIST': activity_type = swf.models.ActivityType( self.domain, name=event['activity_type']['name'], version=event['activity_type']['version']) logger.info('Creating activity type {} in domain {}'.format( activity_type.name, self.domain.name)) try: activity_type.save() except swf.exceptions.AlreadyExistsError: logger.info( 'Activity type {} in domain {} already exists'.format( activity_type.name, self.domain.name)) return None elif state == 'started': future._state = futures.RUNNING elif state == 'completed': future._state = futures.FINISHED future._result = json.loads(event['result']) elif state == 'canceled': future._state = futures.CANCELLED elif state == 'failed': future._state = futures.FINISHED future._exception = exceptions.TaskFailed( reason=event['reason'], details=event['details']) elif state == 'timed_out': future._state = futures.FINISHED future._exception = exceptions.TimeoutError( event['timeout_type'], event['timeout_value']) return future def _get_future_from_child_workflow_event(self, event): """Maps a child workflow event to a Future with the corresponding state. """ future = futures.Future() state = event['state'] if state == 'start_initiated': future._state = futures.PENDING elif state == 'started': future._state = futures.RUNNING elif state == 'completed': future._state = futures.FINISHED future._result = json.loads(event['result']) return future def find_activity_event(self, task, history): activity = history._activities.get(task.id) return activity def find_child_workflow_event(self, task, history): return history._child_workflows.get(task.id) def find_event(self, task, history): if isinstance(task, ActivityTask): return self.find_activity_event(task, history) elif isinstance(task, WorkflowTask): return self.find_child_workflow_event(task, history) else: return TypeError('invalid type {} for task {}'.format( type(task), task)) return None def make_activity_task(self, func, *args, **kwargs): return ActivityTask(func, *args, **kwargs) def make_workflow_task(self, func, *args, **kwargs): return WorkflowTask(func, *args, **kwargs) def resume_activity(self, task, event): future = self._get_future_from_activity_event(event) if not future: # Task in history does not count. return None if not future.finished: # Still pending or running... return future if future.exception is None: # Result available! return future if event.get('retry', 0) == task.activity.retry: # No more retry! if task.activity.raises_on_failure: raise exceptions.TaskException(task, future.exception) return future # with future.exception set. # Otherwise retry the task by scheduling it again. return None # means the is not in SWF. def resume_child_workflow(self, task, event): return self._get_future_from_child_workflow_event(event) def schedule_task(self, task): decisions = task.schedule(self.domain) # ``decisions`` contains a single decision. self._decisions.extend(decisions) if len(self._decisions) == constants.MAX_DECISIONS - 1: # We add a timer to wake up the workflow immediately after # completing these decisions. timer = swf.models.decision.TimerDecision( 'start', id='resume-after-{}'.format(task.id), start_to_fire_timeout='0') self._decisions.append(timer) raise exceptions.ExecutionBlocked() def resume(self, task, *args, **kwargs): """Resume the execution of a task. If the task was scheduled, returns a future that wraps its state, otherwise schedules it. """ task.id = self._make_task_id(task) event = self.find_event(task, self._history) future = None if event: if event['type'] == 'activity': future = self.resume_activity(task, event) elif event['type'] == 'child_workflow': future = self.resume_child_workflow(task, event) if not future: self.schedule_task(task) future = futures.Future() # return a pending future. return future def submit(self, func, *args, **kwargs): """Register a function and its arguments for asynchronous execution. ``*args`` and ``**kwargs`` must be serializable in JSON. """ try: args = [executor.get_actual_value(arg) for arg in args] kwargs = {key: executor.get_actual_value(val) for key, val in kwargs.iteritems()} except exceptions.ExecutionBlocked: return futures.Future() try: if isinstance(func, Activity): task = self.make_activity_task(func, *args, **kwargs) elif issubclass(func, Workflow): task = self.make_workflow_task(func, *args, **kwargs) else: raise TypeError except TypeError: raise TypeError('invalid type {} for {}'.format( type(func), func)) return self.resume(task, *args, **kwargs) def map(self, callable, iterable): """Submit *callable* with each of the items in ``*iterables``. All items in ``*iterables`` must be serializable in JSON. """ iterable = executor.get_actual_value(iterable) return super(Executor, self).map(callable, iterable) def starmap(self, callable, iterable): iterable = executor.get_actual_value(iterable) return super(Executor, self).starmap(callable, iterable) def replay(self, history): """Executes the workflow from the start until it blocks. """ self.reset() self._history = History(history) self._history.parse() workflow_started_event = history[0] args = () kwargs = {} input = workflow_started_event.input if input is None: input = {} args = input.get('args', ()) kwargs = input.get('kwargs', {}) try: result = self.run_workflow(*args, **kwargs) except exceptions.ExecutionBlocked: return self._decisions, {} except exceptions.TaskException, err: reason = 'Workflow execution error in task {}: "{}"'.format( err.task.name, getattr(err.exception, 'reason', repr(err.exception))) logger.exception(reason) details = getattr(err.exception, 'details', None) self.on_failure(reason, details) decision = swf.models.decision.WorkflowExecutionDecision() decision.fail( reason=reason, details=details) return [decision], {} except Exception, err: reason = 'Cannot replay the workflow {}({})'.format( err.__class__.__name__, err) logger.exception(reason) self.on_failure(reason) decision = swf.models.decision.WorkflowExecutionDecision() decision.fail(reason=reason) return [decision], {}