def handle_stranded_tasks(self, engine): """Deal with jobs resident in an engine that died.""" lost = self.pending[engine] for msg_id in list(lost.keys()): if msg_id not in self.pending[engine]: # prevent double-handling of messages continue raw_msg = lost[msg_id].raw_msg idents,msg = self.session.feed_identities(raw_msg, copy=False) parent = self.session.unpack(msg[1].bytes) idents = [engine, idents[0]] # build fake error reply try: raise error.EngineError("Engine %r died while running task %r"%(engine, msg_id)) except: content = error.wrap_exception() # build fake header header = dict( status='error', engine=engine, date=datetime.now(), ) msg = self.session.msg('apply_reply', content, parent=parent, subheader=header) raw_reply = list(map(zmq.Message, self.session.serialize(msg, ident=idents))) # and dispatch it self.dispatch_result(raw_reply) # finally scrub completed/failed lists self.completed.pop(engine) self.failed.pop(engine)
def _handle_stranded_msgs(self, eid, uuid): """Handle messages known to be on an engine when the engine unregisters. It is possible that this will fire prematurely - that is, an engine will go down after completing a result, and the client will be notified of the unregistration and later receive the successful result. """ outstanding = self._outstanding_dict[uuid] for msg_id in list(outstanding): if msg_id in self.results: # we already continue try: raise error.EngineError( "Engine %r died while running task %r" % (eid, msg_id)) except: content = error.wrap_exception() # build a fake message: parent = {} header = {} parent['msg_id'] = msg_id header['engine'] = uuid header['date'] = datetime.now().strftime(util.ISO8601) msg = dict(parent_header=parent, header=header, content=content) self._handle_apply_reply(msg)
def handle_stranded_tasks(self, engine): """Deal with jobs resident in an engine that died.""" lost = self.pending.pop(engine) for msg_id, (raw_msg, targets, MET, follow, timeout) in lost.iteritems(): self.all_failed.add(msg_id) self.all_done.add(msg_id) idents, msg = self.session.feed_identities(raw_msg, copy=False) msg = self.session.unpack_message(msg, copy=False, content=False) parent = msg['header'] idents = [idents[0], engine] + idents[1:] # print (idents) try: raise error.EngineError( "Engine %r died while running task %r" % (engine, msg_id)) except: content = error.wrap_exception() msg = self.session.send(self.client_stream, 'apply_reply', content, parent=parent, ident=idents) self.session.send(self.mon_stream, msg, ident=['outtask'] + idents) self.update_graph(msg_id)
def _handle_stranded_msgs(self, eid, uuid): """Handle messages known to be on an engine when the engine unregisters. It is possible that this will fire prematurely - that is, an engine will go down after completing a result, and the client will be notified that the result failed and later receive the actual result. """ outstanding = self.queues[eid] for msg_id in outstanding: self.pending.remove(msg_id) self.all_completed.add(msg_id) try: raise error.EngineError( "Engine %r died while running task %r" % (eid, msg_id)) except: content = error.wrap_exception() # build a fake header: header = {} header['engine'] = uuid header['date'] = datetime.now() rec = dict(result_content=content, result_header=header, result_buffers=[]) rec['completed'] = header['date'] rec['engine_uuid'] = uuid try: self.db.update_record(msg_id, rec) except Exception: self.log.error("DB Error handling stranded msg %r" % msg_id, exc_info=True)