def test_task_on_failure(self, ): shp_export_task = ShpExportTask() celery_uid = str(uuid.uuid4()) # assume task is running running_task = ExportTask.objects.create(run=self.run, celery_uid=celery_uid, status='RUNNING', name=shp_export_task.name) exc = None exc_info = None try: raise ValueError('some unexpected error') except ValueError as e: exc = e exc_info = sys.exc_info() einfo = ExceptionInfo(exc_info=exc_info) shp_export_task.on_failure(exc, task_id=celery_uid, einfo=einfo, args={}, kwargs={'run_uid': str(self.run.uid)}) task = ExportTask.objects.get(celery_uid=celery_uid) self.assertIsNotNone(task) exception = task.exceptions.all()[0] exc_info = cPickle.loads(str(exception.exception)).exc_info error_type, msg, tb = exc_info[0], exc_info[1], exc_info[2] self.assertEquals(error_type, ValueError) self.assertEquals('some unexpected error', str(msg))
def test_send_email(self): from celery.loaders import current_loader loader = current_loader() old_mail_admins = loader.mail_admins old_enable_mails = mytask.send_error_emails mail_sent = [False] def mock_mail_admins(*args, **kwargs): mail_sent[0] = True loader.mail_admins = mock_mail_admins mytask.send_error_emails = True try: tw = TaskRequest(mytask.name, gen_unique_id(), [1], {"f": "x"}) try: raise KeyError("moofoobar") except: einfo = ExceptionInfo(sys.exc_info()) tw.on_failure(einfo) self.assertTrue(mail_sent[0]) mail_sent[0] = False mytask.send_error_emails = False tw.on_failure(einfo) self.assertFalse(mail_sent[0]) finally: loader.mail_admins = old_mail_admins mytask.send_error_emails = old_enable_mails
def test_send_email(self): from celery import conf from celery.worker import job old_mail_admins = job.mail_admins old_enable_mails = conf.CELERY_SEND_TASK_ERROR_EMAILS mail_sent = [False] def mock_mail_admins(*args, **kwargs): mail_sent[0] = True job.mail_admins = mock_mail_admins conf.CELERY_SEND_TASK_ERROR_EMAILS = True try: tw = TaskRequest(mytask.name, gen_unique_id(), [1], {"f": "x"}) try: raise KeyError("foo") except KeyError: einfo = ExceptionInfo(sys.exc_info()) tw.on_failure(einfo) self.assertTrue(mail_sent[0]) mail_sent[0] = False conf.CELERY_SEND_TASK_ERROR_EMAILS = False tw.on_failure(einfo) self.assertFalse(mail_sent[0]) finally: job.mail_admins = old_mail_admins conf.CELERY_SEND_TASK_ERROR_EMAILS = old_enable_mails
def test_mark_as_failure(self): tb1 = self.create_backend() tb2 = self.create_backend() tid3 = gen_unique_id() try: raise KeyError("foo") except KeyError, exception: einfo = ExceptionInfo(sys.exc_info())
def test_on_retry(self): tw = TaskRequest(mytask.name, uuid(), [1], {"f": "x"}) tw.eventer = MockEventDispatcher() try: raise RetryTaskError("foo", KeyError("moofoobar")) except: einfo = ExceptionInfo(sys.exc_info()) tw.on_failure(einfo) self.assertIn("task-retried", tw.eventer.sent)
def handle_failure(self, exc, type_, tb, strtb): """Handle exception.""" einfo = ExceptionInfo((type_, exc, tb)) self.task.on_failure(exc, self.task_id, self.args, self.kwargs, einfo=einfo) return einfo
def test_on_success_when_failure(self): tw = TaskRequest(mytask.name, uuid(), [1], {'f': 'x'}) tw.time_start = 1 tw.on_failure = Mock() try: raise KeyError('foo') except Exception: tw.on_success(ExceptionInfo()) self.assertTrue(tw.on_failure.called)
def test_mark_as_failure(self): cb = CacheBackend() einfo = None tid3 = gen_unique_id() try: raise KeyError('foo') except KeyError, exception: einfo = ExceptionInfo(sys.exc_info()) pass
def test_on_success_BaseException(self): tw = TaskRequest(mytask.name, uuid(), [1], {'f': 'x'}) tw.time_start = 1 with self.assertRaises(SystemExit): try: raise SystemExit() except SystemExit: tw.on_success(ExceptionInfo()) else: assert False
def handle_after_return(self, status, retval, type_, tb, strtb): einfo = None if status in states.EXCEPTION_STATES: einfo = ExceptionInfo((retval, type_, tb)) self.task.after_return(status, retval, self.task_id, self.args, self.kwargs, einfo=einfo)
def test_log_error_when_Ignore(self): einfo = None try: raise Ignore() except Ignore: einfo = ExceptionInfo(internal=True) self.assertIsNotNone(einfo) req = self.get_request(self.add.s(2, 2)) req._log_error(einfo) req.on_ack.assert_called_with(req_logger, req.connection_errors)
def test_log_error_propagates_MemoryError(self): einfo = None try: raise MemoryError() except MemoryError: einfo = ExceptionInfo(internal=True) self.assertIsNotNone(einfo) req = self.get_request(self.add.s(2, 2)) with self.assertRaises(MemoryError): req._log_error(einfo)
def handle_failure(self, exc, type_, tb, strtb): """Handle exception.""" einfo = ExceptionInfo((type_, exc, tb)) self.task.on_failure(exc, self.task_id, self.args, self.kwargs, einfo=einfo) signals.task_failure.send(sender=self.task, task_id=self.task_id, exception=exc, args=self.args, kwargs=self.kwargs, traceback=tb, einfo=einfo) return einfo
def handle_retry(self, exc, type_, tb, strtb): """Handle retry exception.""" # Create a simpler version of the RetryTaskError that stringifies # the original exception instead of including the exception instance. # This is for reporting the retry in logs, email etc, while # guaranteeing pickleability. message, orig_exc = exc.args expanded_msg = "%s: %s" % (message, str(orig_exc)) einfo = ExceptionInfo((type_, type_(expanded_msg, None), tb)) self.task.on_retry(exc, self.task_id, self.args, self.kwargs, einfo) return einfo
def execute_safe(self, *args, **kwargs): """Same as :meth:`execute`, but catches errors.""" try: return self.execute(*args, **kwargs) except Exception, exc: _type, _value, _tb = sys.exc_info() _value = self.task.backend.prepare_exception(exc) exc_info = ExceptionInfo((_type, _value, _tb)) warnings.warn("Exception outside body: %s: %s\n%s" % tuple( map(str, (exc.__class__, exc, exc_info.traceback)))) return exc_info
def report_internal_error(task, exc): _type, _value, _tb = sys.exc_info() try: _value = task.backend.prepare_exception(exc) exc_info = ExceptionInfo((_type, _value, _tb), internal=True) warn( RuntimeWarning('Exception raised outside body: {0!r}:\n{1}'.format( exc, exc_info.traceback))) return exc_info finally: del (_tb)
def test_mark_as_failure(self): tb1 = self.create_backend() tb2 = self.create_backend() tid3 = uuid() try: raise KeyError("foo") except KeyError, exception: einfo = ExceptionInfo() tb1.mark_as_failure(tid3, exception, traceback=einfo.traceback) self.assertEqual(tb2.get_status(tid3), states.FAILURE) self.assertIsInstance(tb2.get_result(tid3), KeyError) self.assertEqual(tb2.get_traceback(tid3), einfo.traceback)
def test_on_failure_acks_late(self): tw = TaskRequest(mytask.name, uuid(), [1], {"f": "x"}) tw.time_start = 1 mytask.acks_late = True try: try: raise KeyError("foo") except KeyError: exc_info = ExceptionInfo(sys.exc_info()) tw.on_failure(exc_info) self.assertTrue(tw.acknowledged) finally: mytask.acks_late = False
def test_exception_info(self): try: raise LookupError("The quick brown fox jumps...") except Exception: einfo = ExceptionInfo() self.assertEqual(str(einfo), einfo.traceback) self.assertIsInstance(einfo.exception, LookupError) self.assertTupleEqual(einfo.exception.args, ("The quick brown fox jumps...", )) self.assertTrue(einfo.traceback) r = repr(einfo) self.assertTrue(r)
def OpenSSH(backend, log, server, cmds, run_async=False): """ Executes cmds to remote server using SSH with connection resuse for maximum performance """ script = '\n'.join(cmds) script = script.replace('\r', '') log.state = log.STARTED log.script = '\n'.join((log.script, script)) log.save(update_fields=('script', 'state', 'updated_at')) if not cmds: return try: ssh = sshrun(server.get_address(), script, executable=backend.script_executable, persist=True, run_async=run_async, silent=True) logger.debug('%s running on %s' % (backend, server)) if run_async: for state in ssh: log.stdout += state.stdout.decode('utf8') log.stderr += state.stderr.decode('utf8') log.save(update_fields=('stdout', 'stderr', 'updated_at')) exit_code = state.exit_code else: log.stdout += ssh.stdout.decode('utf8') log.stderr += ssh.stderr.decode('utf8') exit_code = ssh.exit_code if not log.exit_code: log.exit_code = exit_code if exit_code == 255 and log.stderr.startswith( 'ssh: connect to host'): log.state = log.TIMEOUT else: log.state = log.SUCCESS if exit_code == 0 else log.FAILURE logger.debug('%s execution state on %s is %s' % (backend, server, log.state)) log.save() except: log.state = log.ERROR log.traceback = ExceptionInfo(sys.exc_info()).traceback logger.error('Exception while executing %s on %s' % (backend, server)) logger.debug(log.traceback) log.save() finally: if log.state == log.STARTED: log.state = log.ABORTED log.save(update_fields=('state', 'updated_at'))
def handle_retry(self, task, store_errors=True): """Handle retry exception.""" # Create a simpler version of the RetryTaskError that stringifies # the original exception instead of including the exception instance. # This is for reporting the retry in logs, email etc, while # guaranteeing pickleability. req = task.request exc, type_, tb = self.retval, self.exc_type, self.tb message, orig_exc = self.retval.args if store_errors: task.backend.mark_as_retry(req.id, orig_exc, self.strtb) expanded_msg = "%s: %s" % (message, str(orig_exc)) einfo = ExceptionInfo((type_, type_(expanded_msg, None), tb)) task.on_retry(exc, req.id, req.args, req.kwargs, einfo) return einfo
def handle_retry(self, task, store_errors=True): """Handle retry exception.""" # the exception raised is the RetryTaskError semi-predicate, # and it's exc' attribute is the original exception raised (if any). req = task.request type_, _, tb = sys.exc_info() try: pred = self.retval einfo = ExceptionInfo((type_, pred, tb)) if store_errors: task.backend.mark_as_retry(req.id, pred.exc, einfo.traceback) task.on_retry(pred.exc, req.id, req.args, req.kwargs, einfo) return einfo finally: del (tb)
def run_instance(instance_id): from .models import Instance instance = Instance.objects.get(pk=instance_id) if not instance.execution.is_active: return 'no active' instance.state = Instance.STARTED instance.task_id = run_instance.request.id instance.last_try = now() # Make sure to have cleaned feedback fields (re-executing an instance) instance.exit_code = None instance.stderr = '' instance.stdout = '' instance.traceback = '' instance.save() try: # ssh connection ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) addr = str(instance.node.mgmt_net.addr) try: ssh.connect(addr, username='******', key_filename=MAINTENANCE_KEY_PATH) except socket.error: instance.state = Instance.TIMEOUT instance.save() return 'socket error' channel = ssh.get_transport().open_session() channel.exec_command(instance.script.replace('\r', '')) while True: # Non-blocking is the secret ingridient in the async sauce select.select([channel], [], []) if channel.recv_ready(): instance.stdout += channel.recv(1024) if channel.recv_stderr_ready(): instance.stderr += channel.recv_stderr(1024) instance.save() if channel.exit_status_ready(): break instance.exit_code = exit_code = channel.recv_exit_status() instance.state = Instance.SUCCESS if exit_code == 0 else Instance.FAILURE channel.close() ssh.close() instance.save() except: instance.state = Instance.ERROR instance.traceback = ExceptionInfo(sys.exc_info()).traceback instance.save()
def _join_exited_workers(self, shutdown=False): """Cleanup after any worker processes which have exited due to reaching their specified lifetime. Returns True if any workers were cleaned up. """ now = None # The worker may have published a result before being terminated, # but we have no way to accurately tell if it did. So we wait for # _lost_worker_timeout seconds before we mark the job with # WorkerLostError. for job in [ job for job in self._cache.values() if not job.ready() and job._worker_lost ]: now = now or time.time() if now - job._worker_lost > job._lost_worker_timeout: exc_info = None try: raise WorkerLostError("Worker exited prematurely.") except WorkerLostError: exc_info = ExceptionInfo(sys.exc_info()) job._set(None, (False, exc_info)) if shutdown and not len(self._pool): raise WorkersJoined() cleaned = [] for i in reversed(range(len(self._pool))): worker = self._pool[i] if worker.exitcode is not None: # worker exited debug('Supervisor: cleaning up worker %d' % i) worker.join() debug('Supervisor: worked %d joined' % i) cleaned.append(worker.pid) del self._pool[i] del self._poolctrl[worker.pid] if cleaned: for job in self._cache.values(): for worker_pid in job.worker_pids(): if worker_pid in cleaned and not job.ready(): job._worker_lost = time.time() continue if self._putlock is not None: for worker in cleaned: self._putlock.release() return True return False
def test_on_failure_Termianted(self): einfo = None try: raise Terminated('9') except Terminated: einfo = ExceptionInfo() self.assertIsNotNone(einfo) req = self.get_request(self.add.s(2, 2)) req.on_failure(einfo) req.eventer.send.assert_called_with( 'task-revoked', uuid=req.id, terminated=True, signum='9', expired=False, )
def test_on_worker_error(self): scratch = [None] def errback(einfo): scratch[0] = einfo pool = TaskPool(10) exc_info = None try: raise KeyError("foo") except KeyError: exc_info = ExceptionInfo(sys.exc_info()) pool.on_worker_error(errback, exc_info) self.assertTrue(scratch[0]) self.assertIs(scratch[0], exc_info)
def test_on_retry(self): tw = TaskRequest(mytask.name, uuid(), [1], {'f': 'x'}) tw.eventer = MockEventDispatcher() try: raise RetryTaskError('foo', KeyError('moofoobar')) except: einfo = ExceptionInfo() tw.on_failure(einfo) self.assertIn('task-retried', tw.eventer.sent) prev, module._does_info = module._does_info, False try: tw.on_failure(einfo) finally: module._does_info = prev einfo.internal = True tw.on_failure(einfo)
def test_on_failure_WorkerLostError(self): tw = TaskRequest(mytask.name, gen_unique_id(), [1], {"f": "x"}) try: raise WorkerLostError("do re mi") except WorkerLostError: exc_info = ExceptionInfo(sys.exc_info()) tw.on_failure(exc_info) self.assertEqual(mytask.backend.get_status(tw.task_id), states.FAILURE) mytask.ignore_result = True try: tw = TaskRequest(mytask.name, gen_unique_id(), [1], {"f": "x"}) tw.on_failure(exc_info) self.assertEqual(mytask.backend.get_status(tw.task_id), states.PENDING) finally: mytask.ignore_result = False
def trace_task(uuid, args, kwargs, request=None): R = I = None kwargs = kwdict(kwargs) try: push_task(task) task_request = Context(request or {}, args=args, called_directly=False, kwargs=kwargs) push_request(task_request) try: # -*- PRE -*- if prerun_receivers: send_prerun(sender=task, task_id=uuid, task=task, args=args, kwargs=kwargs) loader_task_init(uuid, task) if track_started: store_result(uuid, { 'pid': pid, 'hostname': hostname }, STARTED) # -*- TRACE -*- try: R = retval = fun(*args, **kwargs) state = SUCCESS except Ignore, exc: I, R = Info(IGNORED, exc), ExceptionInfo(internal=True) state, retval = I.state, I.retval except RetryTaskError, exc: I = Info(RETRY, exc) state, retval = I.state, I.retval R = I.handle_error_state(task, eager=eager) except Exception, exc: if propagate: raise I = Info(FAILURE, exc) state, retval = I.state, I.retval R = I.handle_error_state(task, eager=eager) [ subtask(errback).apply_async((uuid, )) for errback in task_request.errbacks or [] ]
def _test_on_failure(self, exception, logger): app = app_or_default() tid = uuid() tw = TaskRequest(mytask.name, tid, [4], {'f': 'x'}) try: raise exception except Exception: exc_info = ExceptionInfo() app.conf.CELERY_SEND_TASK_ERROR_EMAILS = True try: tw.on_failure(exc_info) self.assertTrue(logger.log.called) context = logger.log.call_args[0][2] self.assertEqual(mytask.name, context['name']) self.assertIn(tid, context['id']) finally: app.conf.CELERY_SEND_TASK_ERROR_EMAILS = False