def dispatch_query(self, msg): """Route registration requests and queries from clients.""" idents, msg = self.session.feed_identities(msg) if not idents: self.log.error("Bad Query Message: %s" % msg) return client_id = idents[0] try: msg = self.session.unpack_message(msg, content=True) except: content = error.wrap_exception() self.log.error("Bad Query Message: %s" % msg, exc_info=True) self.session.send(self.query, "hub_error", ident=client_id, content=content) return # print client_id, header, parent, content #switch on message type: msg_type = msg['msg_type'] self.log.info("client::client %s requested %s" % (client_id, msg_type)) handler = self.query_handlers.get(msg_type, None) try: assert handler is not None, "Bad Message Type: %s" % msg_type except: content = error.wrap_exception() self.log.error("Bad Message Type: %s" % msg_type, exc_info=True) self.session.send(self.query, "hub_error", ident=client_id, content=content) return else: handler(idents, msg)
def dispatch_query(self, msg): """Route registration requests and queries from clients.""" idents, msg = self.session.feed_identities(msg) if not idents: self.log.error("Bad Query Message: %s"%msg) return client_id = idents[0] try: msg = self.session.unpack_message(msg, content=True) except: content = error.wrap_exception() self.log.error("Bad Query Message: %s"%msg, exc_info=True) self.session.send(self.query, "hub_error", ident=client_id, content=content) return # print client_id, header, parent, content #switch on message type: msg_type = msg['msg_type'] self.log.info("client::client %s requested %s"%(client_id, msg_type)) handler = self.query_handlers.get(msg_type, None) try: assert handler is not None, "Bad Message Type: %s"%msg_type except: content = error.wrap_exception() self.log.error("Bad Message Type: %s"%msg_type, exc_info=True) self.session.send(self.query, "hub_error", ident=client_id, content=content) return else: handler(idents, msg)
def get_results(self, client_id, msg): """Get the result of 1 or more messages.""" content = msg['content'] msg_ids = sorted(set(content['msg_ids'])) statusonly = content.get('status_only', False) pending = [] completed = [] content = dict(status='ok') content['pending'] = pending content['completed'] = completed buffers = [] if not statusonly: try: matches = self.db.find_records(dict(msg_id={'$in':msg_ids})) # turn match list into dict, for faster lookup records = {} for rec in matches: records[rec['msg_id']] = rec except Exception: content = error.wrap_exception() self.session.send(self.query, "result_reply", content=content, parent=msg, ident=client_id) return else: records = {} for msg_id in msg_ids: if msg_id in self.pending: pending.append(msg_id) elif msg_id in self.all_completed: completed.append(msg_id) if not statusonly: c,bufs = self._extract_record(records[msg_id]) content[msg_id] = c buffers.extend(bufs) elif msg_id in records: if rec['completed']: completed.append(msg_id) c,bufs = self._extract_record(records[msg_id]) content[msg_id] = c buffers.extend(bufs) else: pending.append(msg_id) else: try: raise KeyError('No such message: '+msg_id) except: content = error.wrap_exception() break self.session.send(self.query, "result_reply", content=content, parent=msg, ident=client_id, buffers=buffers)
def purge_results(self, client_id, msg): """Purge results from memory. This method is more valuable before we move to a DB based message storage mechanism.""" content = msg['content'] msg_ids = content.get('msg_ids', []) reply = dict(status='ok') if msg_ids == 'all': try: self.db.drop_matching_records(dict(completed={'$ne': None})) except Exception: reply = error.wrap_exception() else: for msg_id in msg_ids: if msg_id in self.all_completed: self.db.drop_record(msg_id) else: if msg_id in self.pending: try: raise IndexError("msg pending: %r" % msg_id) except: reply = error.wrap_exception() else: try: raise IndexError("No such msg: %r" % msg_id) except: reply = error.wrap_exception() break eids = content.get('engine_ids', []) for eid in eids: if eid not in self.engines: try: raise IndexError("No such engine: %i" % eid) except: reply = error.wrap_exception() break msg_ids = self.completed.pop(eid) uid = self.engines[eid].queue try: self.db.drop_matching_records( dict(engine_uuid=uid, completed={'$ne': None})) except Exception: reply = error.wrap_exception() break self.session.send(self.query, 'purge_reply', content=reply, ident=client_id)
def fail_unreachable(self, msg_id, why=error.ImpossibleDependency): """a task has become unreachable, send a reply with an ImpossibleDependency error.""" if msg_id not in self.queue_map: self.log.error("task %r already failed!", msg_id) return job = self.queue_map.pop(msg_id) # lazy-delete from the queue job.removed = True for mid in job.dependents: if mid in self.graph: self.graph[mid].remove(msg_id) try: raise why() except: content = error.wrap_exception() self.log.debug("task %r failing as unreachable with: %s", msg_id, content['ename']) self.all_done.add(msg_id) self.all_failed.add(msg_id) msg = self.session.send(self.client_stream, 'apply_reply', content, parent=job.header, ident=job.idents) self.session.send(self.mon_stream, msg, ident=[b'outtask']+job.idents) self.update_graph(msg_id, success=False)
def queue_status(self, client_id, msg): """Return the Queue status of one or more targets. if verbose: return the msg_ids else: return len of each type. keys: queue (pending MUX jobs) tasks (pending Task jobs) completed (finished jobs from both queues)""" content = msg['content'] targets = content['targets'] try: targets = self._validate_targets(targets) except: content = error.wrap_exception() self.session.send(self.query, "hub_error", content=content, ident=client_id) return verbose = content.get('verbose', False) content = dict(status='ok') for t in targets: queue = self.queues[t] completed = self.completed[t] tasks = self.tasks[t] if not verbose: queue = len(queue) completed = len(completed) tasks = len(tasks) content[str(t)] = {'queue': queue, 'completed': completed , 'tasks': tasks} content['unassigned'] = list(self.unassigned) if verbose else len(self.unassigned) # print (content) self.session.send(self.query, "queue_reply", content=content, ident=client_id)
def _handle_stranded_msgs(self, eid, uuid): """Handle messages known to be on an engine when the engine unregisters. It is possible that this will fire prematurely - that is, an engine will go down after completing a result, and the client will be notified of the unregistration and later receive the successful result. """ outstanding = self._outstanding_dict[uuid] for msg_id in list(outstanding): if msg_id in self.results: # we already continue try: raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id)) except: content = error.wrap_exception() # build a fake message: parent = {} header = {} parent['msg_id'] = msg_id header['engine'] = uuid header['date'] = datetime.now().strftime(util.ISO8601) msg = dict(parent_header=parent, header=header, content=content) self._handle_apply_reply(msg)
def _wrap_exception(self, method=None): # import here, because _wrap_exception is only used in parallel, # and parallel has higher min pyzmq version from IPython.parallel.error import wrap_exception e_info = dict(engine_uuid=self.ident, engine_id=self.int_id, method=method) content = wrap_exception(e_info) return content
def fail_unreachable(self, msg_id, why=error.ImpossibleDependency): """a task has become unreachable, send a reply with an ImpossibleDependency error.""" if msg_id not in self.depending: self.log.error("msg %r already failed!", msg_id) return raw_msg,targets,after,follow,timeout = self.depending.pop(msg_id) for mid in follow.union(after): if mid in self.graph: self.graph[mid].remove(msg_id) # FIXME: unpacking a message I've already unpacked, but didn't save: idents,msg = self.session.feed_identities(raw_msg, copy=False) header = self.session.unpack(msg[1].bytes) try: raise why() except: content = error.wrap_exception() self.all_done.add(msg_id) self.all_failed.add(msg_id) msg = self.session.send(self.client_stream, 'apply_reply', content, parent=header, ident=idents) self.session.send(self.mon_stream, msg, ident=[b'outtask']+idents) self.update_graph(msg_id, success=False)
def db_query(self, client_id, msg): """Perform a raw query on the task record database.""" content = msg["content"] query = content.get("query", {}) keys = content.get("keys", None) buffers = [] empty = list() try: records = self.db.find_records(query, keys) except Exception as e: content = error.wrap_exception() else: # extract buffers from reply content: if keys is not None: buffer_lens = [] if "buffers" in keys else None result_buffer_lens = [] if "result_buffers" in keys else None else: buffer_lens = None result_buffer_lens = None for rec in records: # buffers may be None, so double check b = rec.pop("buffers", empty) or empty if buffer_lens is not None: buffer_lens.append(len(b)) buffers.extend(b) rb = rec.pop("result_buffers", empty) or empty if result_buffer_lens is not None: result_buffer_lens.append(len(rb)) buffers.extend(rb) content = dict(status="ok", records=records, buffer_lens=buffer_lens, result_buffer_lens=result_buffer_lens) # self.log.debug (content) self.session.send(self.query, "db_reply", content=content, parent=msg, ident=client_id, buffers=buffers)
def handle_stranded_tasks(self, engine): """Deal with jobs resident in an engine that died.""" lost = self.pending[engine] for msg_id in lost.keys(): if msg_id not in self.pending[engine]: # prevent double-handling of messages continue raw_msg = lost[msg_id][0] idents,msg = self.session.feed_identities(raw_msg, copy=False) parent = self.session.unpack(msg[1].bytes) idents = [engine, idents[0]] # build fake error reply try: raise error.EngineError("Engine %r died while running task %r"%(engine, msg_id)) except: content = error.wrap_exception() # build fake header header = dict( status='error', engine=engine, date=datetime.now(), ) msg = self.session.msg('apply_reply', content, parent=parent, subheader=header) raw_reply = map(zmq.Message, self.session.serialize(msg, ident=idents)) # and dispatch it self.dispatch_result(raw_reply) # finally scrub completed/failed lists self.completed.pop(engine) self.failed.pop(engine)
def queue_status(self, client_id, msg): """Return the Queue status of one or more targets. if verbose: return the msg_ids else: return len of each type. keys: queue (pending MUX jobs) tasks (pending Task jobs) completed (finished jobs from both queues)""" content = msg['content'] targets = content['targets'] try: targets = self._validate_targets(targets) except: content = error.wrap_exception() self.session.send(self.query, "hub_error", content=content, ident=client_id) return verbose = content.get('verbose', False) content = dict(status='ok') for t in targets: queue = self.queues[t] completed = self.completed[t] tasks = self.tasks[t] if not verbose: queue = len(queue) completed = len(completed) tasks = len(tasks) content[bytes(t)] = {'queue': queue, 'completed': completed , 'tasks': tasks} content['unassigned'] = list(self.unassigned) if verbose else len(self.unassigned) self.session.send(self.query, "queue_reply", content=content, ident=client_id)
def fail_unreachable(self, msg_id, why=error.ImpossibleDependency): """a task has become unreachable, send a reply with an ImpossibleDependency error.""" if msg_id not in self.depending: self.log.error("msg %r already failed!", msg_id) return job = self.depending.pop(msg_id) for mid in job.dependents: if mid in self.graph: self.graph[mid].remove(msg_id) try: raise why() except: content = error.wrap_exception() self.all_done.add(msg_id) self.all_failed.add(msg_id) msg = self.session.send(self.client_stream, 'apply_reply', content, parent=job.header, ident=job.idents) self.session.send(self.mon_stream, msg, ident=[b'outtask'] + job.idents) self.update_graph(msg_id, success=False)
def fail_unreachable(self, msg_id, why=error.ImpossibleDependency): """a task has become unreachable, send a reply with an ImpossibleDependency error.""" if msg_id not in self.queue_map: self.log.error("task %r already failed!", msg_id) return job = self.queue_map.pop(msg_id) # lazy-delete from the queue job.removed = True for mid in job.dependents: if mid in self.graph: self.graph[mid].remove(msg_id) try: raise why() except: content = error.wrap_exception() self.log.debug("task %r failing as unreachable with: %s", msg_id, content['ename']) self.all_done.add(msg_id) self.all_failed.add(msg_id) msg = self.session.send(self.client_stream, 'apply_reply', content, parent=job.header, ident=job.idents) self.session.send(self.mon_stream, msg, ident=[b'outtask'] + job.idents) self.update_graph(msg_id, success=False)
def _handle_stranded_msgs(self, eid, uuid): """Handle messages known to be on an engine when the engine unregisters. It is possible that this will fire prematurely - that is, an engine will go down after completing a result, and the client will be notified of the unregistration and later receive the successful result. """ outstanding = self._outstanding_dict[uuid] for msg_id in list(outstanding): if msg_id in self.results: # we already continue try: raise error.EngineError( "Engine %r died while running task %r" % (eid, msg_id)) except: content = error.wrap_exception() # build a fake message: parent = {} header = {} parent['msg_id'] = msg_id header['engine'] = uuid header['date'] = datetime.now().strftime(util.ISO8601) msg = dict(parent_header=parent, header=header, content=content) self._handle_apply_reply(msg)
def fail_unreachable(self, msg_id, why=error.ImpossibleDependency): """a task has become unreachable, send a reply with an ImpossibleDependency error.""" if msg_id not in self.depending: self.log.error("msg %r already failed!" % msg_id) return raw_msg, targets, after, follow, timeout = self.depending.pop(msg_id) for mid in follow.union(after): if mid in self.graph: self.graph[mid].remove(msg_id) # FIXME: unpacking a message I've already unpacked, but didn't save: idents, msg = self.session.feed_identities(raw_msg, copy=False) msg = self.session.unpack_message(msg, copy=False, content=False) header = msg['header'] try: raise why() except: content = error.wrap_exception() self.all_done.add(msg_id) self.all_failed.add(msg_id) msg = self.session.send(self.client_stream, 'apply_reply', content, parent=header, ident=idents) self.session.send(self.mon_stream, msg, ident=['outtask'] + idents) self.update_graph(msg_id, success=False)
def queue_status(self, client_id, msg): """Return the Queue status of one or more targets. if verbose: return the msg_ids else: return len of each type. keys: queue (pending MUX jobs) tasks (pending Task jobs) completed (finished jobs from both queues)""" content = msg["content"] targets = content["targets"] try: targets = self._validate_targets(targets) except: content = error.wrap_exception() self.session.send(self.query, "hub_error", content=content, ident=client_id) return verbose = content.get("verbose", False) content = dict(status="ok") for t in targets: queue = self.queues[t] completed = self.completed[t] tasks = self.tasks[t] if not verbose: queue = len(queue) completed = len(completed) tasks = len(tasks) content[str(t)] = {"queue": queue, "completed": completed, "tasks": tasks} content["unassigned"] = list(self.unassigned) if verbose else len(self.unassigned) # print (content) self.session.send(self.query, "queue_reply", content=content, ident=client_id)
def handle_stranded_tasks(self, engine): """Deal with jobs resident in an engine that died.""" lost = self.pending.pop(engine) for msg_id, (raw_msg, targets, MET, follow, timeout) in lost.iteritems(): self.all_failed.add(msg_id) self.all_done.add(msg_id) idents, msg = self.session.feed_identities(raw_msg, copy=False) msg = self.session.unpack_message(msg, copy=False, content=False) parent = msg['header'] idents = [idents[0], engine] + idents[1:] # print (idents) try: raise error.EngineError( "Engine %r died while running task %r" % (engine, msg_id)) except: content = error.wrap_exception() msg = self.session.send(self.client_stream, 'apply_reply', content, parent=parent, ident=idents) self.session.send(self.mon_stream, msg, ident=['outtask'] + idents) self.update_graph(msg_id)
def handle_stranded_tasks(self, engine): """Deal with jobs resident in an engine that died.""" lost = self.pending[engine] for msg_id in list(lost.keys()): if msg_id not in self.pending[engine]: # prevent double-handling of messages continue raw_msg = lost[msg_id].raw_msg idents,msg = self.session.feed_identities(raw_msg, copy=False) parent = self.session.unpack(msg[1].bytes) idents = [engine, idents[0]] # build fake error reply try: raise error.EngineError("Engine %r died while running task %r"%(engine, msg_id)) except: content = error.wrap_exception() # build fake header header = dict( status='error', engine=engine, date=datetime.now(), ) msg = self.session.msg('apply_reply', content, parent=parent, subheader=header) raw_reply = list(map(zmq.Message, self.session.serialize(msg, ident=idents))) # and dispatch it self.dispatch_result(raw_reply) # finally scrub completed/failed lists self.completed.pop(engine) self.failed.pop(engine)
def _handle_stranded_msgs(self, eid, uuid): """Handle messages known to be on an engine when the engine unregisters. It is possible that this will fire prematurely - that is, an engine will go down after completing a result, and the client will be notified that the result failed and later receive the actual result. """ outstanding = self.queues[eid] for msg_id in outstanding: self.pending.remove(msg_id) self.all_completed.add(msg_id) try: raise error.EngineError( "Engine %r died while running task %r" % (eid, msg_id)) except: content = error.wrap_exception() # build a fake header: header = {} header['engine'] = uuid header['date'] = datetime.now() rec = dict(result_content=content, result_header=header, result_buffers=[]) rec['completed'] = header['date'] rec['engine_uuid'] = uuid try: self.db.update_record(msg_id, rec) except Exception: self.log.error("DB Error handling stranded msg %r" % msg_id, exc_info=True)
def _handle_stranded_msgs(self, eid, uuid): """Handle messages known to be on an engine when the engine unregisters. It is possible that this will fire prematurely - that is, an engine will go down after completing a result, and the client will be notified that the result failed and later receive the actual result. """ outstanding = self.queues[eid] for msg_id in outstanding: self.pending.remove(msg_id) self.all_completed.add(msg_id) try: raise error.EngineError("Engine %r died while running task %r"%(eid, msg_id)) except: content = error.wrap_exception() # build a fake header: header = {} header['engine'] = uuid header['date'] = datetime.now().strftime(ISO8601) rec = dict(result_content=content, result_header=header, result_buffers=[]) rec['completed'] = header['date'] rec['engine_uuid'] = uuid self.db.update_record(msg_id, rec)
def purge_results(self, client_id, msg): """Purge results from memory. This method is more valuable before we move to a DB based message storage mechanism.""" content = msg['content'] msg_ids = content.get('msg_ids', []) reply = dict(status='ok') if msg_ids == 'all': try: self.db.drop_matching_records(dict(completed={'$ne':None})) except Exception: reply = error.wrap_exception() else: for msg_id in msg_ids: if msg_id in self.all_completed: self.db.drop_record(msg_id) else: if msg_id in self.pending: try: raise IndexError("msg pending: %r"%msg_id) except: reply = error.wrap_exception() else: try: raise IndexError("No such msg: %r"%msg_id) except: reply = error.wrap_exception() break eids = content.get('engine_ids', []) for eid in eids: if eid not in self.engines: try: raise IndexError("No such engine: %i"%eid) except: reply = error.wrap_exception() break msg_ids = self.completed.pop(eid) uid = self.engines[eid].queue try: self.db.drop_matching_records(dict(engine_uuid=uid, completed={'$ne':None})) except Exception: reply = error.wrap_exception() break self.session.send(self.query, 'purge_reply', content=reply, ident=client_id)
def get_history(self, client_id, msg): """Get a list of all msg_ids in our DB records""" try: msg_ids = self.db.get_history() except Exception as e: content = error.wrap_exception() else: content = dict(status="ok", history=msg_ids) self.session.send(self.query, "history_reply", content=content, parent=msg, ident=client_id)
def purge_results(self, client_id, msg): """Purge results from memory. This method is more valuable before we move to a DB based message storage mechanism.""" content = msg['content'] self.log.info("Dropping records with %s", content) msg_ids = content.get('msg_ids', []) reply = dict(status='ok') if msg_ids == 'all': try: self.db.drop_matching_records(dict(completed={'$ne':None})) except Exception: reply = error.wrap_exception() else: pending = filter(lambda m: m in self.pending, msg_ids) if pending: try: raise IndexError("msg pending: %r" % pending[0]) except: reply = error.wrap_exception() else: try: self.db.drop_matching_records(dict(msg_id={'$in':msg_ids})) except Exception: reply = error.wrap_exception() if reply['status'] == 'ok': eids = content.get('engine_ids', []) for eid in eids: if eid not in self.engines: try: raise IndexError("No such engine: %i" % eid) except: reply = error.wrap_exception() break uid = self.engines[eid].queue try: self.db.drop_matching_records(dict(engine_uuid=uid, completed={'$ne':None})) except Exception: reply = error.wrap_exception() break self.session.send(self.query, 'purge_reply', content=reply, ident=client_id)
def get_history(self, client_id, msg): """Get a list of all msg_ids in our DB records""" try: msg_ids = self.db.get_history() except Exception as e: content = error.wrap_exception() else: content = dict(status='ok', history=msg_ids) self.session.send(self.query, "history_reply", content=content, parent=msg, ident=client_id)
def check_load(self, client_id, msg): content = msg["content"] try: targets = content["targets"] targets = self._validate_targets(targets) except: content = error.wrap_exception() self.session.send(self.query, "hub_error", content=content, ident=client_id) return content = dict(status="ok") # loads = {} for t in targets: content[bytes(t)] = len(self.queues[t]) + len(self.tasks[t]) self.session.send(self.query, "load_reply", content=content, ident=client_id)
def check_load(self, client_id, msg): content = msg['content'] try: targets = content['targets'] targets = self._validate_targets(targets) except: content = error.wrap_exception() self.session.send(self.query, "hub_error", content=content, ident=client_id) return content = dict(status='ok') # loads = {} for t in targets: content[bytes(t)] = len(self.queues[t])+len(self.tasks[t]) self.session.send(self.query, "load_reply", content=content, ident=client_id)
def get_results(self, client_id, msg): """Get the result of 1 or more messages.""" content = msg['content'] msg_ids = sorted(set(content['msg_ids'])) statusonly = content.get('status_only', False) pending = [] completed = [] content = dict(status='ok') content['pending'] = pending content['completed'] = completed buffers = [] if not statusonly: content['results'] = {} records = self.db.find_records(dict(msg_id={'$in': msg_ids})) for msg_id in msg_ids: if msg_id in self.pending: pending.append(msg_id) elif msg_id in self.all_completed: completed.append(msg_id) if not statusonly: rec = records[msg_id] io_dict = {} for key in 'pyin pyout pyerr stdout stderr'.split(): io_dict[key] = rec[key] content[msg_id] = { 'result_content': rec['result_content'], 'header': rec['header'], 'result_header': rec['result_header'], 'io': io_dict, } if rec['result_buffers']: buffers.extend(map(str, rec['result_buffers'])) else: try: raise KeyError('No such message: ' + msg_id) except: content = error.wrap_exception() break self.session.send(self.query, "result_reply", content=content, parent=msg, ident=client_id, buffers=buffers)
def db_query(self, client_id, msg): """Perform a raw query on the task record database.""" content = msg['content'] query = content.get('query', {}) keys = content.get('keys', None) query = util.extract_dates(query) buffers = [] empty = list() try: records = self.db.find_records(query, keys) except Exception as e: content = error.wrap_exception() else: # extract buffers from reply content: if keys is not None: buffer_lens = [] if 'buffers' in keys else None result_buffer_lens = [] if 'result_buffers' in keys else None else: buffer_lens = [] result_buffer_lens = [] for rec in records: # buffers may be None, so double check if buffer_lens is not None: b = rec.pop('buffers', empty) or empty buffer_lens.append(len(b)) buffers.extend(b) if result_buffer_lens is not None: rb = rec.pop('result_buffers', empty) or empty result_buffer_lens.append(len(rb)) buffers.extend(rb) content = dict(status='ok', records=records, buffer_lens=buffer_lens, result_buffer_lens=result_buffer_lens) self.session.send(self.query, "db_reply", content=content, parent=msg, ident=client_id, buffers=buffers)
def handle_stranded_tasks(self, engine): """Deal with jobs resident in an engine that died.""" lost = self.pending.pop(engine) for msg_id, (raw_msg, targets, MET, follow, timeout) in lost.iteritems(): self.all_failed.add(msg_id) self.all_done.add(msg_id) idents,msg = self.session.feed_identities(raw_msg, copy=False) msg = self.session.unpack_message(msg, copy=False, content=False) parent = msg['header'] idents = [idents[0],engine]+idents[1:] # print (idents) try: raise error.EngineError("Engine %r died while running task %r"%(engine, msg_id)) except: content = error.wrap_exception() msg = self.session.send(self.client_stream, 'apply_reply', content, parent=parent, ident=idents) self.session.send(self.mon_stream, msg, ident=['outtask']+idents) self.update_graph(msg_id)
def get_results(self, client_id, msg): """Get the result of 1 or more messages.""" content = msg['content'] msg_ids = sorted(set(content['msg_ids'])) statusonly = content.get('status_only', False) pending = [] completed = [] content = dict(status='ok') content['pending'] = pending content['completed'] = completed buffers = [] if not statusonly: content['results'] = {} records = self.db.find_records(dict(msg_id={'$in':msg_ids})) for msg_id in msg_ids: if msg_id in self.pending: pending.append(msg_id) elif msg_id in self.all_completed: completed.append(msg_id) if not statusonly: rec = records[msg_id] io_dict = {} for key in 'pyin pyout pyerr stdout stderr'.split(): io_dict[key] = rec[key] content[msg_id] = { 'result_content': rec['result_content'], 'header': rec['header'], 'result_header' : rec['result_header'], 'io' : io_dict, } if rec['result_buffers']: buffers.extend(map(str, rec['result_buffers'])) else: try: raise KeyError('No such message: '+msg_id) except: content = error.wrap_exception() break self.session.send(self.query, "result_reply", content=content, parent=msg, ident=client_id, buffers=buffers)
def map_sync(self, f, *args): res = [] exceptions = [] for group in zip(*args): # simulate network roundtrip group = loads(dumps(group)) tries = 0 while tries < self.tries: tries += 1 try: res.append(f(*group)) except: if tries < self.tries: pass else: # lol exceptions.append(unwrap_exception(wrap_exception())) break if exceptions: raise CompositeError("Mock Composite error", exceptions) else: return res
def fail_unreachable(self, msg_id, why=error.ImpossibleDependency): """a task has become unreachable, send a reply with an ImpossibleDependency error.""" if msg_id not in self.depending: self.log.error("msg %r already failed!", msg_id) return job = self.depending.pop(msg_id) for mid in job.dependents: if mid in self.graph: self.graph[mid].remove(msg_id) try: raise why() except: content = error.wrap_exception() self.all_done.add(msg_id) self.all_failed.add(msg_id) msg = self.session.send(self.client_stream, 'apply_reply', content, parent=job.header, ident=job.idents) self.session.send(self.mon_stream, msg, ident=[b'outtask']+job.idents) self.update_graph(msg_id, success=False)
def resubmit_task(self, client_id, msg): """Resubmit one or more tasks.""" def finish(reply): self.session.send(self.query, 'resubmit_reply', content=reply, ident=client_id) content = msg['content'] msg_ids = content['msg_ids'] reply = dict(status='ok') try: records = self.db.find_records({'msg_id' : {'$in' : msg_ids}}, keys=[ 'header', 'content', 'buffers']) except Exception: self.log.error('db::db error finding tasks to resubmit', exc_info=True) return finish(error.wrap_exception()) # validate msg_ids found_ids = [ rec['msg_id'] for rec in records ] pending_ids = [ msg_id for msg_id in found_ids if msg_id in self.pending ] if len(records) > len(msg_ids): try: raise RuntimeError("DB appears to be in an inconsistent state." "More matching records were found than should exist") except Exception: return finish(error.wrap_exception()) elif len(records) < len(msg_ids): missing = [ m for m in msg_ids if m not in found_ids ] try: raise KeyError("No such msg(s): %r" % missing) except KeyError: return finish(error.wrap_exception()) elif pending_ids: pass # no need to raise on resubmit of pending task, now that we # resubmit under new ID, but do we want to raise anyway? # msg_id = invalid_ids[0] # try: # raise ValueError("Task(s) %r appears to be inflight" % ) # except Exception: # return finish(error.wrap_exception()) # mapping of original IDs to resubmitted IDs resubmitted = {} # send the messages for rec in records: header = rec['header'] msg = self.session.msg(header['msg_type'], parent=header) msg_id = msg['msg_id'] msg['content'] = rec['content'] # use the old header, but update msg_id and timestamp fresh = msg['header'] header['msg_id'] = fresh['msg_id'] header['date'] = fresh['date'] msg['header'] = header self.session.send(self.resubmit, msg, buffers=rec['buffers']) resubmitted[rec['msg_id']] = msg_id self.pending.add(msg_id) msg['buffers'] = rec['buffers'] try: self.db.add_record(msg_id, init_record(msg)) except Exception: self.log.error("db::DB Error updating record: %s", msg_id, exc_info=True) finish(dict(status='ok', resubmitted=resubmitted)) # store the new IDs in the Task DB for msg_id, resubmit_id in resubmitted.iteritems(): try: self.db.update_record(msg_id, {'resubmitted' : resubmit_id}) except Exception: self.log.error("db::DB Error updating record: %s", msg_id, exc_info=True)
def resubmit_task(self, client_id, msg): """Resubmit one or more tasks.""" def finish(reply): self.session.send(self.query, 'resubmit_reply', content=reply, ident=client_id) content = msg['content'] msg_ids = content['msg_ids'] reply = dict(status='ok') try: records = self.db.find_records({'msg_id' : {'$in' : msg_ids}}, keys=[ 'header', 'content', 'buffers']) except Exception: self.log.error('db::db error finding tasks to resubmit', exc_info=True) return finish(error.wrap_exception()) # validate msg_ids found_ids = [ rec['msg_id'] for rec in records ] invalid_ids = filter(lambda m: m in self.pending, found_ids) if len(records) > len(msg_ids): try: raise RuntimeError("DB appears to be in an inconsistent state." "More matching records were found than should exist") except Exception: return finish(error.wrap_exception()) elif len(records) < len(msg_ids): missing = [ m for m in msg_ids if m not in found_ids ] try: raise KeyError("No such msg(s): %r" % missing) except KeyError: return finish(error.wrap_exception()) elif invalid_ids: msg_id = invalid_ids[0] try: raise ValueError("Task %r appears to be inflight" % msg_id) except Exception: return finish(error.wrap_exception()) # clear the existing records now = datetime.now() rec = empty_record() map(rec.pop, ['msg_id', 'header', 'content', 'buffers', 'submitted']) rec['resubmitted'] = now rec['queue'] = 'task' rec['client_uuid'] = client_id[0] try: for msg_id in msg_ids: self.all_completed.discard(msg_id) self.db.update_record(msg_id, rec) except Exception: self.log.error('db::db error upating record', exc_info=True) reply = error.wrap_exception() else: # send the messages for rec in records: header = rec['header'] # include resubmitted in header to prevent digest collision header['resubmitted'] = now msg = self.session.msg(header['msg_type']) msg['content'] = rec['content'] msg['header'] = header msg['header']['msg_id'] = rec['msg_id'] self.session.send(self.resubmit, msg, buffers=rec['buffers']) finish(dict(status='ok'))
def _wrap_exception(self, method=None): e_info = dict(engine_uuid=self.ident, engine_id=self.int_id, method=method) content=wrap_exception(e_info) return content
def register_engine(self, reg, msg): """Register a new engine.""" content = msg['content'] try: queue = content['queue'] except KeyError: self.log.error("registration::queue not specified", exc_info=True) return heart = content.get('heartbeat', None) """register a new engine, and create the socket(s) necessary""" eid = self._next_id # print (eid, queue, reg, heart) self.log.debug("registration::register_engine(%i, %r, %r, %r)"%(eid, queue, reg, heart)) content = dict(id=eid,status='ok') content.update(self.engine_info) # check if requesting available IDs: if queue in self.by_ident: try: raise KeyError("queue_id %r in use"%queue) except: content = error.wrap_exception() self.log.error("queue_id %r in use"%queue, exc_info=True) elif heart in self.hearts: # need to check unique hearts? try: raise KeyError("heart_id %r in use"%heart) except: self.log.error("heart_id %r in use"%heart, exc_info=True) content = error.wrap_exception() else: for h, pack in self.incoming_registrations.iteritems(): if heart == h: try: raise KeyError("heart_id %r in use"%heart) except: self.log.error("heart_id %r in use"%heart, exc_info=True) content = error.wrap_exception() break elif queue == pack[1]: try: raise KeyError("queue_id %r in use"%queue) except: self.log.error("queue_id %r in use"%queue, exc_info=True) content = error.wrap_exception() break msg = self.session.send(self.query, "registration_reply", content=content, ident=reg) if content['status'] == 'ok': if heart in self.heartmonitor.hearts: # already beating self.incoming_registrations[heart] = (eid,queue,reg[0],None) self.finish_registration(heart) else: purge = lambda : self._purge_stalled_registration(heart) dc = ioloop.DelayedCallback(purge, self.registration_timeout, self.loop) dc.start() self.incoming_registrations[heart] = (eid,queue,reg[0],dc) else: self.log.error("registration::registration %i failed: %s"%(eid, content['evalue'])) return eid
def register_engine(self, reg, msg): """Register a new engine.""" content = msg['content'] try: queue = content['queue'] except KeyError: self.log.error("registration::queue not specified", exc_info=True) return heart = content.get('heartbeat', None) """register a new engine, and create the socket(s) necessary""" eid = self._next_id # print (eid, queue, reg, heart) self.log.debug("registration::register_engine(%i, %r, %r, %r)" % (eid, queue, reg, heart)) content = dict(id=eid, status='ok') content.update(self.engine_info) # check if requesting available IDs: if queue in self.by_ident: try: raise KeyError("queue_id %r in use" % queue) except: content = error.wrap_exception() self.log.error("queue_id %r in use" % queue, exc_info=True) elif heart in self.hearts: # need to check unique hearts? try: raise KeyError("heart_id %r in use" % heart) except: self.log.error("heart_id %r in use" % heart, exc_info=True) content = error.wrap_exception() else: for h, pack in self.incoming_registrations.iteritems(): if heart == h: try: raise KeyError("heart_id %r in use" % heart) except: self.log.error("heart_id %r in use" % heart, exc_info=True) content = error.wrap_exception() break elif queue == pack[1]: try: raise KeyError("queue_id %r in use" % queue) except: self.log.error("queue_id %r in use" % queue, exc_info=True) content = error.wrap_exception() break msg = self.session.send(self.query, "registration_reply", content=content, ident=reg) if content['status'] == 'ok': if heart in self.heartmonitor.hearts: # already beating self.incoming_registrations[heart] = (eid, queue, reg[0], None) self.finish_registration(heart) else: purge = lambda: self._purge_stalled_registration(heart) dc = ioloop.DelayedCallback(purge, self.registration_timeout, self.loop) dc.start() self.incoming_registrations[heart] = (eid, queue, reg[0], dc) else: self.log.error("registration::registration %i failed: %s" % (eid, content['evalue'])) return eid
def _wrap_exception(self, method=None): e_info = dict(engine_uuid=self.ident, engine_id=self.int_id, method=method) content = wrap_exception(e_info) return content
def resubmit_task(self, client_id, msg): """Resubmit one or more tasks.""" def finish(reply): self.session.send(self.query, "resubmit_reply", content=reply, ident=client_id) content = msg["content"] msg_ids = content["msg_ids"] reply = dict(status="ok") try: records = self.db.find_records({"msg_id": {"$in": msg_ids}}, keys=["header", "content", "buffers"]) except Exception: self.log.error("db::db error finding tasks to resubmit", exc_info=True) return finish(error.wrap_exception()) # validate msg_ids found_ids = [rec["msg_id"] for rec in records] pending_ids = [msg_id for msg_id in found_ids if msg_id in self.pending] if len(records) > len(msg_ids): try: raise RuntimeError( "DB appears to be in an inconsistent state." "More matching records were found than should exist" ) except Exception: return finish(error.wrap_exception()) elif len(records) < len(msg_ids): missing = [m for m in msg_ids if m not in found_ids] try: raise KeyError("No such msg(s): %r" % missing) except KeyError: return finish(error.wrap_exception()) elif pending_ids: pass # no need to raise on resubmit of pending task, now that we # resubmit under new ID, but do we want to raise anyway? # msg_id = invalid_ids[0] # try: # raise ValueError("Task(s) %r appears to be inflight" % ) # except Exception: # return finish(error.wrap_exception()) # mapping of original IDs to resubmitted IDs resubmitted = {} # send the messages for rec in records: header = rec["header"] msg = self.session.msg(header["msg_type"], parent=header) msg_id = msg["msg_id"] msg["content"] = rec["content"] # use the old header, but update msg_id and timestamp fresh = msg["header"] header["msg_id"] = fresh["msg_id"] header["date"] = fresh["date"] msg["header"] = header self.session.send(self.resubmit, msg, buffers=rec["buffers"]) resubmitted[rec["msg_id"]] = msg_id self.pending.add(msg_id) msg["buffers"] = rec["buffers"] try: self.db.add_record(msg_id, init_record(msg)) except Exception: self.log.error("db::DB Error updating record: %s", msg_id, exc_info=True) finish(dict(status="ok", resubmitted=resubmitted)) # store the new IDs in the Task DB for msg_id, resubmit_id in resubmitted.iteritems(): try: self.db.update_record(msg_id, {"resubmitted": resubmit_id}) except Exception: self.log.error("db::DB Error updating record: %s", msg_id, exc_info=True)
def resubmit_task(self, client_id, msg): """Resubmit one or more tasks.""" def finish(reply): self.session.send(self.query, 'resubmit_reply', content=reply, ident=client_id) content = msg['content'] msg_ids = content['msg_ids'] reply = dict(status='ok') try: records = self.db.find_records({'msg_id' : {'$in' : msg_ids}}, keys=[ 'header', 'content', 'buffers']) except Exception: self.log.error('db::db error finding tasks to resubmit', exc_info=True) return finish(error.wrap_exception()) # validate msg_ids found_ids = [ rec['msg_id'] for rec in records ] pending_ids = [ msg_id for msg_id in found_ids if msg_id in self.pending ] if len(records) > len(msg_ids): try: raise RuntimeError("DB appears to be in an inconsistent state." "More matching records were found than should exist") except Exception: return finish(error.wrap_exception()) elif len(records) < len(msg_ids): missing = [ m for m in msg_ids if m not in found_ids ] try: raise KeyError("No such msg(s): %r" % missing) except KeyError: return finish(error.wrap_exception()) elif pending_ids: pass # no need to raise on resubmit of pending task, now that we # resubmit under new ID, but do we want to raise anyway? # msg_id = invalid_ids[0] # try: # raise ValueError("Task(s) %r appears to be inflight" % ) # except Exception: # return finish(error.wrap_exception()) # mapping of original IDs to resubmitted IDs resubmitted = {} # send the messages for rec in records: header = rec['header'] msg = self.session.msg(header['msg_type']) msg_id = msg['msg_id'] msg['content'] = rec['content'] header.update(msg['header']) msg['header'] = header self.session.send(self.resubmit, msg, buffers=rec['buffers']) resubmitted[rec['msg_id']] = msg_id self.pending.add(msg_id) msg['buffers'] = [] try: self.db.add_record(msg_id, init_record(msg)) except Exception: self.log.error("db::DB Error updating record: %s", msg_id, exc_info=True) finish(dict(status='ok', resubmitted=resubmitted)) # store the new IDs in the Task DB for msg_id, resubmit_id in resubmitted.iteritems(): try: self.db.update_record(msg_id, {'resubmitted' : resubmit_id}) except Exception: self.log.error("db::DB Error updating record: %s", msg_id, exc_info=True)