def test_two_recaptured_neq(self): captured = _captured_failure('Woot!') fail_obj = failure.Failure(exception_str=captured.exception_str, traceback_str=captured.traceback_str, exc_type_names=list(captured)) new_exc_str = captured.exception_str.replace('Woot', 'w00t') fail_obj2 = failure.Failure(exception_str=new_exc_str, traceback_str=captured.traceback_str, exc_type_names=list(captured)) self.assertNotEqual(fail_obj, fail_obj2) self.assertFalse(fail_obj2.matches(fail_obj))
def analyze(old_state, new_state, event): # This reaction function is responsible for analyzing all nodes # that have finished executing and completing them and figuring # out what nodes are now ready to be ran (and then triggering those # nodes to be scheduled in the future); handles failures that # occur during this process safely... next_up = set() while memory.done: fut = memory.done.pop() atom = fut.atom try: outcome, result = fut.result() retain = do_complete(atom, outcome, result) if isinstance(result, failure.Failure): if retain: memory.failures.append(result) else: # NOTE(harlowja): avoid making any # intention request to storage unless we are # sure we are in DEBUG enabled logging (otherwise # we will call this all the time even when DEBUG # is not enabled, which would suck...) if LOG.isEnabledFor(logging.DEBUG): intention = self._storage.get_atom_intention( atom.name) LOG.debug( "Discarding failure '%s' (in" " response to outcome '%s') under" " completion units request during" " completion of atom '%s' (intention" " is to %s)", result, outcome, atom, intention) except Exception: memory.failures.append(failure.Failure()) LOG.exception("Engine '%s' atom post-completion" " failed", atom) else: try: more_work = set(iter_next_atoms(atom=atom)) except Exception: memory.failures.append(failure.Failure()) LOG.exception( "Engine '%s' atom post-completion" " next atom searching failed", atom) else: next_up.update(more_work) if is_runnable() and next_up and not memory.failures: memory.next_up.update(next_up) return SCHEDULE elif memory.not_done: return WAIT else: return FINISH
def analyze(old_state, new_state, event): # This reaction function is responsible for analyzing all nodes # that have finished executing/reverting and figuring # out what nodes are now ready to be ran (and then triggering those # nodes to be scheduled in the future); handles failures that # occur during this process safely... next_up = set() while memory.done: fut = memory.done.pop() # Force it to be completed so that we can ensure that # before we iterate over any successors or predecessors # that we know it has been completed and saved and so on... complete_an_atom(fut) if not memory.failures: atom = fut.atom try: more_work = set(iter_next_atoms(atom=atom)) except Exception: memory.failures.append(failure.Failure()) LOG.exception( "Engine '%s' atom post-completion" " next atom searching failed", atom) else: next_up.update(more_work) if is_runnable() and next_up and not memory.failures: memory.next_up.update(next_up) return SCHEDULE elif memory.not_done: return WAIT else: return FINISH
def test_no_type_names(self): fail_obj = _captured_failure('Woot!') fail_obj = failure.Failure(exception_str=fail_obj.exception_str, traceback_str=fail_obj.traceback_str, exc_type_names=[]) self.assertEqual([], list(fail_obj)) self.assertEqual("Failure: Woot!", fail_obj.pformat())
def test_task_detail_with_failure(self): lb_id = uuidutils.generate_uuid() lb_name = 'lb-%s' % (lb_id) lb = logbook.LogBook(name=lb_name, uuid=lb_id) fd = logbook.FlowDetail('test', uuid=uuidutils.generate_uuid()) lb.add(fd) td = logbook.TaskDetail("detail-1", uuid=uuidutils.generate_uuid()) try: raise RuntimeError('Woot!') except Exception: td.failure = failure.Failure() fd.add(td) with contextlib.closing(self._get_connection()) as conn: conn.save_logbook(lb) conn.update_flow_details(fd) conn.update_atom_details(td) # Read failure back with contextlib.closing(self._get_connection()) as conn: lb2 = conn.get_logbook(lb_id) fd2 = lb2.find(fd.uuid) td2 = fd2.find(td.uuid) self.assertEqual(td2.failure.exception_str, 'Woot!') self.assertIs(td2.failure.check(RuntimeError), RuntimeError) self.assertEqual(td2.failure.traceback_str, td.failure.traceback_str) self.assertIsInstance(td2, logbook.TaskDetail)
def complete_an_atom(fut): # This completes a single atom saving its result in # storage and preparing whatever predecessors or successors will # now be ready to execute (or revert or retry...); it also # handles failures that occur during this process safely... atom = fut.atom try: outcome, result = fut.result() do_complete(atom, outcome, result) if isinstance(result, failure.Failure): retain = do_complete_failure(atom, outcome, result) if retain: memory.failures.append(result) else: # NOTE(harlowja): avoid making any intention request # to storage unless we are sure we are in DEBUG # enabled logging (otherwise we will call this all # the time even when DEBUG is not enabled, which # would suck...) if LOG.isEnabledFor(logging.DEBUG): intention = get_atom_intention(atom.name) LOG.debug( "Discarding failure '%s' (in response" " to outcome '%s') under completion" " units request during completion of" " atom '%s' (intention is to %s)", result, outcome, atom, intention) except Exception: memory.failures.append(failure.Failure()) LOG.exception("Engine '%s' atom post-completion failed", atom)
def test_recaptured_not_eq(self): captured = _captured_failure('Woot!') fail_obj = failure.Failure(exception_str=captured.exception_str, traceback_str=captured.traceback_str, exc_type_names=list(captured)) self.assertFalse(fail_obj == captured) self.assertTrue(fail_obj != captured) self.assertTrue(fail_obj.matches(captured))
def test_no_capture_exc_args(self): captured = _captured_failure(Exception("I am not valid JSON")) fail_obj = failure.Failure(exception_str=captured.exception_str, traceback_str=captured.traceback_str, exc_type_names=list(captured), exc_args=list(captured.exception_args)) fail_json = fail_obj.to_dict(include_args=False) self.assertNotEqual(fail_obj.exception_args, fail_json['exc_args']) self.assertEqual(fail_json['exc_args'], tuple())
def test_failure_copy_recaptured(self): captured = _captured_failure('Woot!') fail_obj = failure.Failure(exception_str=captured.exception_str, traceback_str=captured.traceback_str, exc_type_names=list(captured)) copied = fail_obj.copy() self.assertIsNot(fail_obj, copied) self.assertEqual(fail_obj, copied) self.assertFalse(fail_obj != copied) self.assertTrue(fail_obj.matches(copied))
def wrap_all_failures(): """Convert any exceptions to WrappedFailure. When you expect several failures, it may be convenient to wrap any exception with WrappedFailure in order to unify error handling. """ try: yield except Exception: raise exceptions.WrappedFailure([failure.Failure()])
def test_flattening(self): f1 = _captured_failure('Wrap me') f2 = _captured_failure('Wrap me, too') f3 = _captured_failure('Woot!') try: raise exceptions.WrappedFailure([f1, f2]) except Exception: fail_obj = failure.Failure() wf = exceptions.WrappedFailure([fail_obj, f3]) self.assertEqual([f1, f2, f3], list(wf))
def test_causes_supress_context(self): f = None try: try: self._raise_many(["Still still not working", "Still not working", "Not working"]) except RuntimeError as e: six.raise_from(e, None) except RuntimeError: f = failure.Failure() self.assertIsNotNone(f) self.assertEqual([], list(f.causes))
def _execute_task(task, arguments, progress_callback=None): with notifier.register_deregister(task.notifier, _UPDATE_PROGRESS, callback=progress_callback): try: task.pre_execute() result = task.execute(**arguments) except Exception: # NOTE(imelnikov): wrap current exception with Failure # object and return it. result = failure.Failure() finally: task.post_execute() return (EXECUTED, result)
def test_connect(failures): try: # See if we can make a connection happen. # # NOTE(harlowja): note that even though we are connecting # once it does not mean that we will be able to connect in # the future, so this is more of a sanity test and is not # complete connection insurance. with contextlib.closing(self._engine.connect()): pass except sa_exc.OperationalError as ex: if _is_db_connection_error(six.text_type(ex.args[0])): failures.append(failure.Failure()) return False return True
def _revert_task(task, arguments, result, failures, progress_callback=None): arguments = arguments.copy() arguments[task_atom.REVERT_RESULT] = result arguments[task_atom.REVERT_FLOW_FAILURES] = failures with notifier.register_deregister(task.notifier, _UPDATE_PROGRESS, callback=progress_callback): try: task.pre_revert() result = task.revert(**arguments) except Exception: # NOTE(imelnikov): wrap current exception with Failure # object and return it. result = failure.Failure() finally: task.post_revert() return (REVERTED, result)
def capture_failure(): """Captures the occurring exception and provides a failure object back. This will save the current exception information and yield back a failure object for the caller to use (it will raise a runtime error if no active exception is being handled). This is useful since in some cases the exception context can be cleared, resulting in None being attempted to be saved after an exception handler is run. This can happen when eventlet switches greenthreads or when running an exception handler, code raises and catches an exception. In both cases the exception context will be cleared. To work around this, we save the exception state, yield a failure and then run other code. For example:: >>> from taskflow.utils import misc >>> >>> def cleanup(): ... pass ... >>> >>> def save_failure(f): ... print("Saving %s" % f) ... >>> >>> try: ... raise IOError("Broken") ... except Exception: ... with misc.capture_failure() as fail: ... print("Activating cleanup") ... cleanup() ... save_failure(fail) ... Activating cleanup Saving Failure: IOError: Broken """ exc_info = sys.exc_info() if not any(exc_info): raise RuntimeError("No active exception is being handled") else: yield failure.Failure(exc_info=exc_info)
def complete_an_atom(fut): # This completes a single atom saving its result in # storage and preparing whatever predecessors or successors will # now be ready to execute (or revert or retry...); it also # handles failures that occur during this process safely... atom = fut.atom try: outcome, result = fut.result() do_complete(atom, outcome, result) if isinstance(result, failure.Failure): retain = do_complete_failure(atom, outcome, result) if retain: memory.failures.append(result) else: # NOTE(harlowja): avoid making any intention request # to storage unless we are sure we are in DEBUG # enabled logging (otherwise we will call this all # the time even when DEBUG is not enabled, which # would suck...) if LOG.isEnabledFor(logging.DEBUG): intention = get_atom_intention(atom.name) LOG.debug( "Discarding failure '%s' (in response" " to outcome '%s') under completion" " units request during completion of" " atom '%s' (intention is to %s)", result, outcome, atom, intention) if gather_statistics: statistics['discarded_failures'] += 1 if gather_statistics: statistics['completed'] += 1 except futures.CancelledError: # Well it got cancelled, skip doing anything # and move on; at a further time it will be resumed # and something should be done with it to get it # going again. return WAS_CANCELLED except Exception: memory.failures.append(failure.Failure()) LOG.exception("Engine '%s' atom post-completion" " failed", atom) return FAILED_COMPLETING else: return SUCCESSFULLY_COMPLETED
def schedule(self, nodes): """Schedules the provided nodes for *future* completion. This method should schedule a future for each node provided and return a set of those futures to be waited on (or used for other similar purposes). It should also return any failure objects that represented scheduling failures that may have occurred during this scheduling process. """ futures = set() for node in nodes: try: futures.add(self._schedule_node(node)) except Exception: # Immediately stop scheduling future work so that we can # exit execution early (rather than later) if a single task # fails to schedule correctly. return (futures, [failure.Failure()]) return (futures, [])
def test_causes(self): f = None try: self._raise_many(["Still still not working", "Still not working", "Not working"]) except RuntimeError: f = failure.Failure() self.assertIsNotNone(f) self.assertEqual(2, len(f.causes)) self.assertEqual("Still not working", f.causes[0].exception_str) self.assertEqual("Not working", f.causes[1].exception_str) f = f.causes[0] self.assertEqual(1, len(f.causes)) self.assertEqual("Not working", f.causes[0].exception_str) f = f.causes[0] self.assertEqual(0, len(f.causes))
def analyze(old_state, new_state, event): # This reaction function is responsible for analyzing all nodes # that have finished executing/reverting and figuring # out what nodes are now ready to be ran (and then triggering those # nodes to be scheduled in the future); handles failures that # occur during this process safely... next_up = set() with self._storage.lock.write_lock(): while memory.done: fut = memory.done.pop() # Force it to be completed so that we can ensure that # before we iterate over any successors or predecessors # that we know it has been completed and saved and so on... completion_status = complete_an_atom(fut) if (not memory.failures and completion_status != WAS_CANCELLED): atom = fut.atom try: more_work = set(iter_next_atoms(atom=atom)) except Exception: memory.failures.append(failure.Failure()) LOG.exception( "Engine '%s' atom post-completion" " next atom searching failed", atom) else: next_up.update(more_work) current_flow_state = self._storage.get_flow_state() if (current_flow_state == st.RUNNING and next_up and not memory.failures): memory.next_up.update(next_up) return SCHEDULE elif memory.not_done: if current_flow_state == st.SUSPENDING: memory.cancel_futures() return WAIT else: return FINISH
def setUp(self): super(ReCreatedFailureTestCase, self).setUp() fail_obj = _captured_failure('Woot!') self.fail_obj = failure.Failure(exception_str=fail_obj.exception_str, traceback_str=fail_obj.traceback_str, exc_type_names=list(fail_obj))
def _captured_failure(msg): try: raise RuntimeError(msg) except Exception: return failure.Failure()
def run_iter(self, timeout=None): """Runs the engine using iteration (or die trying). :param timeout: timeout to wait for any atoms to complete (this timeout will be used during the waiting period that occurs after the waiting state is yielded when unfinished atoms are being waited on). Instead of running to completion in a blocking manner, this will return a generator which will yield back the various states that the engine is going through (and can be used to run multiple engines at once using a generator per engine). The iterator returned also responds to the ``send()`` method from :pep:`0342` and will attempt to suspend itself if a truthy value is sent in (the suspend may be delayed until all active atoms have finished). NOTE(harlowja): using the ``run_iter`` method will **not** retain the engine lock while executing so the user should ensure that there is only one entity using a returned engine iterator (one per engine) at a given time. """ self.compile() self.prepare() self.validate() # Keep track of the last X state changes, which if a failure happens # are quite useful to log (and the performance of tracking this # should be negligible). last_transitions = collections.deque( maxlen=max(1, self.MAX_MACHINE_STATES_RETAINED)) with _start_stop(self._task_executor, self._retry_executor): self._change_state(states.RUNNING) if self._gather_statistics: self._statistics.clear() w = timeutils.StopWatch() w.start() else: w = None try: closed = False machine, memory = self._runtime.builder.build( self._statistics, timeout=timeout, gather_statistics=self._gather_statistics) r = runners.FiniteRunner(machine) for transition in r.run_iter(builder.START): last_transitions.append(transition) _prior_state, new_state = transition # NOTE(harlowja): skip over meta-states if new_state in builder.META_STATES: continue if new_state == states.FAILURE: failure.Failure.reraise_if_any(memory.failures) if closed: continue try: try_suspend = yield new_state except GeneratorExit: # The generator was closed, attempt to suspend and # continue looping until we have cleanly closed up # shop... closed = True self.suspend() except Exception: # Capture the failure, and ensure that the # machine will notice that something externally # has sent an exception in and that it should # finish up and reraise. memory.failures.append(failure.Failure()) closed = True else: if try_suspend: self.suspend() except Exception: with excutils.save_and_reraise_exception(): LOG.exception( "Engine execution has failed, something" " bad must of happened (last" " %s machine transitions were %s)", last_transitions.maxlen, list(last_transitions)) self._change_state(states.FAILURE) else: if last_transitions: _prior_state, new_state = last_transitions[-1] if new_state not in self.IGNORABLE_STATES: self._change_state(new_state) if new_state not in self.NO_RERAISING_STATES: failures = self.storage.get_failures() more_failures = self.storage.get_revert_failures() fails = itertools.chain( six.itervalues(failures), six.itervalues(more_failures)) failure.Failure.reraise_if_any(fails) finally: if w is not None: w.stop() self._statistics['active_for'] = w.elapsed()
def _execute_retry(retry, arguments): try: result = retry.execute(**arguments) except Exception: result = failure.Failure() return (ex.EXECUTED, result)
def _revert_retry(retry, arguments): try: result = retry.revert(**arguments) except Exception: result = failure.Failure() return (ex.REVERTED, result)