def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s.sync_center() done = s.start(0) sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' class Counter(SchedulerPlugin): def start(self, scheduler): scheduler.add_plugin(self) self.count = 0 def task_finished(self, scheduler, key, worker, nbytes): self.count += 1 counter = Counter() counter.start(s) assert counter.count == 0 sched.put_nowait({'op': 'update-graph', 'tasks': {'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, 'dependencies': {'y': {'x'}, 'z': {'y'}}, 'keys': ['z']}) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break assert counter.count == 3 sched.put_nowait({'op': 'close'}) yield done
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x-1': (inc, 1), 'x-2': (inc, 'x-1'), 'x-3': (inc, 'x-2'), 'y-1': (dec, 'x-3'), 'y-2': (dec, 'y-1'), 'e': (throws, 'y-2'), 'other': (inc, 123)}, keys=['e']) while True: msg = yield report.get() if msg['op'] == 'task-erred' and msg['key'] == 'e': break p = MultiProgressWidget(['e'], scheduler=s, complete=True) assert set(concat(p.all_keys.values())) == {'x-1', 'x-2', 'x-3', 'y-1', 'y-2', 'e'} assert all(b.value == 1.0 for b in p.bars.values()) assert p.texts['x'].value == '3 / 3' assert p.texts['y'].value == '2 / 2' sched.put_nowait({'op': 'close'}) yield done
def test_robust_to_bad_plugin(s, a, b): sched, report = Queue(), Queue() s.handle_queues(sched, report) class Bad(SchedulerPlugin): def task_finished(self, scheduler, key, worker, nbytes): raise Exception() bad = Bad() s.add_plugin(bad) sched.put_nowait({ 'op': 'update-graph', 'tasks': valmap(dumps_task, { 'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y') }), 'dependencies': { 'y': ['x'], 'z': ['y'] }, 'keys': ['z'] }) while True: # normal execution msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x-1': (inc, 1), 'x-2': (inc, 'x-1'), 'x-3': (inc, 'x-2'), 'y-1': (dec, 'x-3'), 'y-2': (dec, 'y-1'), 'e': (throws, 'y-2'), 'other': (inc, 123)}, keys=['e']) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'y-2': break p = MultiProgressWidget(['x-1', 'x-2', 'x-3'], scheduler=s) assert set(concat(p.all_keys.values())).issuperset({'x-1', 'x-2', 'x-3'}) assert 'x' in p.bars sched.put_nowait({'op': 'close'}) yield done
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x': (div, 1, 0)}, keys=['x']) progress = TextProgressBar(['x'], scheduler=s) progress.start() while True: msg = yield report.get() if msg.get('key') == 'x': break assert progress.status == 'error' assert not progress._timer.is_alive() progress = TextProgressBar(['x'], scheduler=s) progress.start() assert progress.status == 'error' assert not progress._timer or not progress._timer.is_alive() sched.put_nowait({'op': 'close'}) yield done
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, keys=['z']) progress = TextProgressBar(['z'], scheduler=s) progress.start() assert progress.all_keys == {'x', 'y', 'z'} assert progress.keys == {'x', 'y', 'z'} while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break assert progress.keys == set() check_bar_completed(capsys) assert progress not in s.plugins sched.put_nowait({'op': 'close'}) yield done
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' class Counter(SchedulerPlugin): def start(self, scheduler): scheduler.add_plugin(self) self.count = 0 def task_finished(self, scheduler, key, worker, nbytes): self.count += 1 counter = Counter() counter.start(s) assert counter.count == 0 sched.put_nowait({'op': 'update-graph', 'dsk': {'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, 'keys': ['z']}) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break assert counter.count == 3 sched.put_nowait({'op': 'close'}) yield done
class Stream(object): def __init__(self, raw_headers, header_table): self._queue = Queue() self._header_table = header_table self._current_headers = self._header_table.merge(raw_headers) @gen.coroutine def get(self, timeout=0): if timeout == 0: res, headers = yield self._queue.get() else: deadline = datetime.timedelta(seconds=timeout) res, headers = yield self._queue.get(deadline) self._current_headers = headers if isinstance(res, Exception): raise res else: raise gen.Return(res) def push(self, item, raw_headers): headers = self._header_table.merge(raw_headers) self._queue.put_nowait((item, headers)) def done(self, raw_headers): headers = self._header_table.merge(raw_headers) return self._queue.put_nowait((ChokeEvent(), headers)) def error(self, errnumber, reason, raw_headers): headers = self._header_table.merge(raw_headers) return self._queue.put_nowait((RequestError(errnumber, reason), headers)) @property def headers(self): return self._current_headers
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' class Bad(SchedulerPlugin): def task_finished(self, scheduler, key, worker, nbytes): raise Exception() bad = Bad() s.add_plugin(bad) sched.put_nowait({'op': 'update-graph', 'dsk': {'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, 'keys': ['z']}) while True: # normal execution msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break sched.put_nowait({'op': 'close'}) yield done
def test_multi_queues(s, a, b): sched, report = Queue(), Queue() s.handle_queues(sched, report) msg = yield report.get() assert msg['op'] == 'stream-start' # Test update graph sched.put_nowait({'op': 'update-graph', 'dsk': {'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, 'keys': ['z']}) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break slen, rlen = len(s.scheduler_queues), len(s.report_queues) sched2, report2 = Queue(), Queue() s.handle_queues(sched2, report2) assert slen + 1 == len(s.scheduler_queues) assert rlen + 1 == len(s.report_queues) sched2.put_nowait({'op': 'update-graph', 'dsk': {'a': (inc, 10)}, 'keys': ['a']}) for q in [report, report2]: while True: msg = yield q.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'a': break
def test_diagnostic(s, a, b): sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' class Counter(SchedulerPlugin): def start(self, scheduler): scheduler.add_plugin(self) self.count = 0 def task_finished(self, scheduler, key, worker, nbytes): self.count += 1 counter = Counter() counter.start(s) assert counter.count == 0 sched.put_nowait({'op': 'update-graph', 'tasks': {'x': dumps_task((inc, 1)), 'y': dumps_task((inc, 'x')), 'z': dumps_task((inc, 'y'))}, 'dependencies': {'y': ['x'], 'z': ['y']}, 'keys': ['z']}) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break assert counter.count == 3
class ConnectionPool(object): def __init__(self, servers, maxsize=15, minsize=1, loop=None, debug=0): loop = loop if loop is not None else tornado.ioloop.IOLoop.instance() if debug: logging.basicConfig( level=logging.DEBUG, format="'%(levelname)s %(asctime)s" " %(module)s:%(lineno)d %(process)d %(thread)d %(message)s'") self._loop = loop self._servers = servers self._minsize = minsize self._debug = debug self._in_use = set() self._pool = Queue(maxsize) @gen.coroutine def clear(self): """Clear pool connections.""" while not self._pool.empty(): conn = yield self._pool.get() conn.close_socket() def size(self): return len(self._in_use) + self._pool.qsize() @gen.coroutine def acquire(self): """Acquire connection from the pool, or spawn new one if pool maxsize permits. :return: ``Connetion`` (reader, writer) """ while self.size() < self._minsize: _conn = yield self._create_new_conn() yield self._pool.put(_conn) conn = None while not conn: if not self._pool.empty(): conn = yield self._pool.get() if conn is None: conn = yield self._create_new_conn() self._in_use.add(conn) raise gen.Return(conn) @gen.coroutine def _create_new_conn(self): conn = yield Connection.get_conn(self._servers, self._debug) raise gen.Return(conn) def release(self, conn): self._in_use.remove(conn) try: self._pool.put_nowait(conn) except (QueueEmpty, QueueFull): conn.close_socket()
def test_multi_queues(s, a, b): sched, report = Queue(), Queue() s.handle_queues(sched, report) msg = yield report.get() assert msg['op'] == 'stream-start' # Test update graph sched.put_nowait({ 'op': 'update-graph', 'tasks': valmap(dumps_task, { 'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y') }), 'dependencies': { 'x': [], 'y': ['x'], 'z': ['y'] }, 'keys': ['z'] }) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break slen, rlen = len(s.scheduler_queues), len(s.report_queues) sched2, report2 = Queue(), Queue() s.handle_queues(sched2, report2) assert slen + 1 == len(s.scheduler_queues) assert rlen + 1 == len(s.report_queues) sched2.put_nowait({ 'op': 'update-graph', 'tasks': { 'a': dumps_task((inc, 10)) }, 'dependencies': { 'a': [] }, 'keys': ['a'] }) for q in [report, report2]: while True: msg = yield q.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'a': break
class MockWebsocketHandler(LanguageServerWebSocketHandler): _messages_wrote = None # type: Queue def __init__(self): pass def initialize(self, manager): super().initialize(manager) self._messages_wrote = Queue() def write_message(self, message: Text) -> None: self.log.warning("write_message %s", message) self._messages_wrote.put_nowait(message)
async def get_peers(self) -> Set[str]: c = self.db.cursor() c.execute("SELECT url FROM peers") peers: Set[str] = set(p[0] for p in c.fetchall()) if (datetime.utcnow() - timedelta(hours=1) > self.last_peers_update or self.peer_cache_invalidated) \ and not self.fetching_peers: self.fetching_peers = True self.last_peers_update = datetime.utcnow() # Peer queue peers_to_check = Queue() for p in peers: peers_to_check.put_nowait(p) peers_to_check_set: Set[str] = peers.copy() results: List[bool] = [] attempted_contact: Set[str] = {CHORD_URL} # noinspection PyAsyncCall,PyTypeChecker workers = tornado.gen.multi([ self.peer_worker(peers, peers_to_check, peers_to_check_set, attempted_contact, results) for _ in range(WORKERS) ]) # Wait for all peers to be processed await peers_to_check.join() self.peer_cache_invalidated = self.peer_cache_invalidated or ( True in results) # Store any new peers in the possibly augmented set for peer in peers: insert_or_ignore_peer(c, peer) # Commit any new peers to the database self.db.commit() self.fetching_peers = False # Trigger exit for all workers for _ in range(WORKERS): peers_to_check.put_nowait(None) # Wait for workers to exit await workers return peers
class MockWebsocketHandler(LanguageServerWebSocketHandler): _messages_wrote = None # type: Queue _ping_sent = None # type: bool def __init__(self): self.request = HTTPServerRequest() self.application = Application() def initialize(self, manager): super().initialize(manager) self._messages_wrote = Queue() self._ping_sent = False def write_message(self, message: Text) -> None: self.log.warning("write_message %s", message) self._messages_wrote.put_nowait(message) def send_ping(self): self._ping_sent = True
def test_robust_to_bad_plugin(s, a, b): sched, report = Queue(), Queue(); s.handle_queues(sched, report) class Bad(SchedulerPlugin): def task_finished(self, scheduler, key, worker, nbytes): raise Exception() bad = Bad() s.add_plugin(bad) sched.put_nowait({'op': 'update-graph', 'dsk': {'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, 'keys': ['z']}) while True: # normal execution msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break
def test_diagnostic(s, a, b): sched, report = Queue(), Queue() s.handle_queues(sched, report) msg = yield report.get() assert msg['op'] == 'stream-start' class Counter(SchedulerPlugin): def start(self, scheduler): scheduler.add_plugin(self) self.count = 0 def task_finished(self, scheduler, key, worker, nbytes, **kwargs): self.count += 1 counter = Counter() counter.start(s) assert counter in s.plugins assert counter.count == 0 sched.put_nowait({ 'op': 'update-graph', 'tasks': { 'x': dumps_task((inc, 1)), 'y': dumps_task((inc, 'x')), 'z': dumps_task((inc, 'y')) }, 'dependencies': { 'y': ['x'], 'z': ['y'] }, 'keys': ['z'] }) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break assert counter.count == 3 s.remove_plugin(counter) assert counter not in s.plugins
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x-1': (inc, 1), 'x-2': (inc, 'x-1'), 'x-3': (inc, 'x-2'), 'y-1': (dec, 'x-3'), 'y-2': (dec, 'y-1')}, keys=['y-2']) p = MultiProgress(['y-2'], scheduler=s, func=lambda s: s.split('-')[0]) assert p.keys == {'x': {'x-1', 'x-2', 'x-3'}, 'y': {'y-1', 'y-2'}} while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'x-3': break assert p.keys == {'x': set(), 'y': {'y-1', 'y-2'}} while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'y-2': break assert p.keys == {'x': set(), 'y': set()} assert p.status == 'finished' sched.put_nowait({'op': 'close'}) yield done
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, keys=['z']) bars = [Progress(keys=['z'], scheduler=s) for i in range(10)] while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break assert all(b.status == 'finished' for b in bars) sched.put_nowait({'op': 'close'}) yield done
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x': (inc, 1), 'y': (inc, 'x'), 'z': (inc, 'y')}, keys=['z']) progress = ProgressWidget(['z'], scheduler=s) while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'z': break progress._update() assert progress.bar.value == 1.0 assert 's' in progress.bar.description sched.put_nowait({'op': 'close'}) yield done
class Rx(PrettyPrintable): def __init__(self, rx_tree, io_loop=None, servicename=None): # If it's not the main thread # and a current IOloop doesn't exist here, # IOLoop.instance becomes self._io_loop self._io_loop = io_loop or IOLoop.current() self._queue = Queue() self._done = False self.servicename = servicename self.rx_tree = rx_tree self.default_protocol = detect_protocol_type(rx_tree) @coroutine def get(self, timeout=0, protocol=None): if self._done and self._queue.empty(): raise ChokeEvent() # to pull variuos service errors if timeout <= 0 or timeout is None: item = yield self._queue.get() else: deadline = datetime.timedelta(seconds=timeout) item = yield self._queue.get(deadline) if isinstance(item, Exception): raise item if protocol is None: protocol = self.default_protocol name, payload = item res = protocol(name, payload) if isinstance(res, ProtocolError): raise ServiceError(self.servicename, res.reason, res.code, res.category) else: raise Return(res) def done(self): self._done = True def push(self, msg_type, payload): dispatch = self.rx_tree.get(msg_type) log.debug("dispatch %s %.300s", dispatch, payload) if dispatch is None: raise InvalidMessageType(self.servicename, CocaineErrno.INVALIDMESSAGETYPE, "unexpected message type %s" % msg_type) name, rx = dispatch log.debug("name `%s` rx `%s`", name, rx) self._queue.put_nowait((name, payload)) if rx == {}: # the last transition self.done() elif rx is not None: # not a recursive transition self.rx_tree = rx def error(self, err): self._queue.put_nowait(err) def closed(self): return self._done def _format(self): return "name: %s, queue: %s, done: %s" % ( self.servicename, self._queue, self._done)
class SubscribeListener(SubscribeCallback): def __init__(self): self.connected = False self.connected_event = Event() self.disconnected_event = Event() self.presence_queue = Queue() self.message_queue = Queue() self.error_queue = Queue() def status(self, pubnub, status): if utils.is_subscribed_event(status) and not self.connected_event.is_set(): self.connected_event.set() elif utils.is_unsubscribed_event(status) and not self.disconnected_event.is_set(): self.disconnected_event.set() elif status.is_error(): self.error_queue.put_nowait(status.error_data.exception) def message(self, pubnub, message): self.message_queue.put(message) def presence(self, pubnub, presence): self.presence_queue.put(presence) @tornado.gen.coroutine def _wait_for(self, coro): error = self.error_queue.get() wi = tornado.gen.WaitIterator(coro, error) while not wi.done(): result = yield wi.next() if wi.current_future == coro: raise gen.Return(result) elif wi.current_future == error: raise result else: raise Exception("Unexpected future resolved: %s" % str(wi.current_future)) @tornado.gen.coroutine def wait_for_connect(self): if not self.connected_event.is_set(): yield self._wait_for(self.connected_event.wait()) else: raise Exception("instance is already connected") @tornado.gen.coroutine def wait_for_disconnect(self): if not self.disconnected_event.is_set(): yield self._wait_for(self.disconnected_event.wait()) else: raise Exception("instance is already disconnected") @tornado.gen.coroutine def wait_for_message_on(self, *channel_names): channel_names = list(channel_names) while True: try: # NOQA env = yield self._wait_for(self.message_queue.get()) if env.channel in channel_names: raise tornado.gen.Return(env) else: continue finally: self.message_queue.task_done() @tornado.gen.coroutine def wait_for_presence_on(self, *channel_names): channel_names = list(channel_names) while True: try: try: env = yield self._wait_for(self.presence_queue.get()) except: # NOQA E722 pylint: disable=W0702 break if env.channel in channel_names: raise tornado.gen.Return(env) else: continue finally: self.presence_queue.task_done()
class Scheduler(object): def __init__(self, center, delete_batch_time=1): self.scheduler_queue = Queue() self.report_queue = Queue() self.delete_queue = Queue() self.status = None self.center = coerce_to_rpc(center) self.dask = dict() self.dependencies = dict() self.dependents = dict() self.generation = 0 self.has_what = defaultdict(set) self.held_data = set() self.in_play = set() self.keyorder = dict() self.nbytes = dict() self.ncores = dict() self.processing = dict() self.restrictions = dict() self.stacks = dict() self.waiting = dict() self.waiting_data = dict() self.who_has = defaultdict(set) self.exceptions = dict() self.tracebacks = dict() self.exceptions_blame = dict() self.delete_batch_time = delete_batch_time @gen.coroutine def _sync_center(self): self.ncores, self.has_what, self.who_has = yield [ self.center.ncores(), self.center.has_what(), self.center.who_has()] def start(self): collections = [self.dask, self.dependencies, self.dependents, self.waiting, self.waiting_data, self.in_play, self.keyorder, self.nbytes, self.processing, self.restrictions] for collection in collections: collection.clear() self.processing = {addr: set() for addr in self.ncores} self.stacks = {addr: list() for addr in self.ncores} self.worker_queues = {addr: Queue() for addr in self.ncores} self.coroutines = ([ self.scheduler(), delete(self.scheduler_queue, self.delete_queue, self.center.ip, self.center.port, self.delete_batch_time)] + [worker(self.scheduler_queue, self.worker_queues[w], w, n) for w, n in self.ncores.items()]) for cor in self.coroutines: if cor.done(): raise cor.exception() return All(self.coroutines) @gen.coroutine def _close(self): self.scheduler_queue.put_nowait({'op': 'close'}) yield All(self.coroutines) @gen.coroutine def cleanup(self): """ Clean up queues and coroutines, prepare to stop """ logger.debug("Cleaning up coroutines") n = 0 self.delete_queue.put_nowait({'op': 'close'}); n += 1 for w, nc in self.ncores.items(): for i in range(nc): self.worker_queues[w].put_nowait({'op': 'close'}); n += 1 for i in range(n): yield self.scheduler_queue.get() def mark_ready_to_run(self, key): """ Send task to an appropriate worker, trigger worker """ logger.debug("Mark %s ready to run", key) if key in self.waiting: assert not self.waiting[key] del self.waiting[key] new_worker = decide_worker(self.dependencies, self.stacks, self.who_has, self.restrictions, self.nbytes, key) self.stacks[new_worker].append(key) self.ensure_occupied(new_worker) def mark_key_in_memory(self, key, workers=None): logger.debug("Mark %s in memory", key) if workers is None: workers = self.who_has[key] for worker in workers: self.who_has[key].add(worker) self.has_what[worker].add(key) with ignoring(KeyError): self.processing[worker].remove(key) for dep in sorted(self.dependents.get(key, []), key=self.keyorder.get, reverse=True): if dep in self.waiting: s = self.waiting[dep] with ignoring(KeyError): s.remove(key) if not s: # new task ready to run self.mark_ready_to_run(dep) for dep in self.dependencies.get(key, []): if dep in self.waiting_data: s = self.waiting_data[dep] with ignoring(KeyError): s.remove(key) if not s and dep: self.release_key(dep) self.report_queue.put_nowait({'op': 'key-in-memory', 'key': key, 'workers': workers}) def ensure_occupied(self, worker): """ Spin up tasks on worker while it has tasks and free cores """ logger.debug('Ensure worker is occupied: %s', worker) while (self.stacks[worker] and self.ncores[worker] > len(self.processing[worker])): key = self.stacks[worker].pop() self.processing[worker].add(key) logger.debug("Send job to worker: %s, %s, %s", worker, key, self.dask[key]) self.worker_queues[worker].put_nowait( {'op': 'compute-task', 'key': key, 'task': self.dask[key], 'needed': self.dependencies[key]}) def seed_ready_tasks(self, keys=None): """ Distribute leaves among workers Takes an iterable of keys to consider for execution """ if keys is None: keys = self.dask new_stacks = assign_many_tasks( self.dependencies, self.waiting, self.keyorder, self.who_has, self.stacks, self.restrictions, self.nbytes, [k for k in keys if k in self.waiting and not self.waiting[k]]) logger.debug("Seed ready tasks: %s", new_stacks) for worker, stack in new_stacks.items(): if stack: self.ensure_occupied(worker) def release_key(self, key): """ Release key from distributed memory if its ready """ logger.debug("Release key %s", key) if key not in self.held_data and not self.waiting_data.get(key): self.delete_queue.put_nowait({'op': 'delete-task', 'key': key}) for w in self.who_has[key]: self.has_what[w].remove(key) del self.who_has[key] if key in self.waiting_data: del self.waiting_data[key] if key in self.in_play: self.in_play.remove(key) def update_data(self, extra_who_has, extra_nbytes): logger.debug("Update data %s", extra_who_has) for key, workers in extra_who_has.items(): self.mark_key_in_memory(key, workers) self.nbytes.update(extra_nbytes) self.held_data.update(extra_who_has) self.in_play.update(extra_who_has) def mark_failed(self, key, failing_key=None): """ When a task fails mark it and all dependent task as failed """ logger.debug("Mark key as failed %s", key) if key in self.exceptions_blame: return self.exceptions_blame[key] = failing_key self.report_queue.put_nowait({'op': 'task-erred', 'key': key, 'exception': self.exceptions[failing_key], 'traceback': self.tracebacks[failing_key]}) if key in self.waiting: del self.waiting[key] if key in self.waiting_data: del self.waiting_data[key] self.in_play.remove(key) for dep in self.dependents[key]: self.mark_failed(dep, failing_key) def log_state(self, msg=''): logger.debug("Runtime State: %s", msg) logger.debug('\n\nwaiting: %s\n\nstacks: %s\n\nprocessing: %s\n\n' 'in_play: %s\n\n', self.waiting, self.stacks, self.processing, self.in_play) def mark_worker_missing(self, worker): logger.debug("Mark worker as missing %s", worker) if worker not in self.processing: return keys = self.has_what.pop(worker) for i in range(self.ncores[worker]): # send close message, in case not dead self.worker_queues[worker].put_nowait({'op': 'close', 'report': False}) del self.worker_queues[worker] del self.ncores[worker] del self.stacks[worker] del self.processing[worker] if not self.stacks: logger.critical("Lost all workers") missing_keys = set() for key in keys: self.who_has[key].remove(worker) if not self.who_has[key]: missing_keys.add(key) gone_data = {k for k, v in self.who_has.items() if not v} self.in_play.difference_update(missing_keys) for k in gone_data: del self.who_has[k] def heal_state(self): """ Recover from catastrophic change """ logger.debug("Heal state") self.log_state("Before Heal") state = heal(self.dependencies, self.dependents, set(self.who_has), self.stacks, self.processing, self.waiting, self.waiting_data) released = state['released'] self.in_play.clear(); self.in_play.update(state['in_play']) add_keys = {k for k, v in self.waiting.items() if not v} for key in self.held_data & released: self.report_queue.put_nowait({'op': 'lost-key', 'key': key}) if self.stacks: for key in add_keys: self.mark_ready_to_run(key) for key in set(self.who_has) & released - self.held_data: self.delete_queue.put_nowait({'op': 'delete-task', 'key': key}) self.in_play.update(self.who_has) self.log_state("After Heal") def my_heal_missing_data(self, missing): logger.debug("Heal from missing data") return heal_missing_data(self.dask, self.dependencies, self.dependents, self.held_data, self.who_has, self.in_play, self.waiting, self.waiting_data, missing) @gen.coroutine def scheduler(self): """ The scheduler coroutine for dask scheduling This coroutine manages interactions with all worker cores and with the delete coroutine through queues. Parameters ---------- scheduler_queue: tornado.queues.Queue Get information from outside report_queue: tornado.queues.Queue Report information to outside worker_queues: dict {worker: tornado.queues.Queue} One queue per worker node. Each queue is listened to by several worker_core coroutines. delete_queue: tornado.queues.Queue One queue listened to by ``delete`` which connects to the center to delete unnecessary intermediate data who_has: dict {key: set} Mapping key to {set of worker-identities} has_what: dict {worker: set} Mapping worker-identity to {set of keys} ncores: dict {worker: int} Mapping worker-identity to number-of-cores """ assert (not self.dask) == (not self.dependencies), (self.dask, self.dependencies) self.heal_state() self.status = 'running' self.report_queue.put_nowait({'op': 'start'}) while True: msg = yield self.scheduler_queue.get() logger.debug("scheduler receives message %s", msg) if msg['op'] == 'close': break elif msg['op'] == 'update-graph': update_state(self.dask, self.dependencies, self.dependents, self.held_data, self.who_has, self.in_play, self.waiting, self.waiting_data, msg['dsk'], msg['keys']) cover_aliases(self.dask, msg['dsk']) self.restrictions.update(msg.get('restrictions', {})) new_keyorder = order(msg['dsk']) # TODO: define order wrt old graph for key in new_keyorder: if key not in self.keyorder: # TODO: add test for this self.keyorder[key] = (self.generation, new_keyorder[key]) # prefer old if len(msg['dsk']) > 1: self.generation += 1 # older graph generations take precedence for key in msg['dsk']: for dep in self.dependencies[key]: if dep in self.exceptions_blame: self.mark_failed(key, self.exceptions_blame[dep]) self.seed_ready_tasks(msg['dsk']) for key in msg['keys']: if self.who_has[key]: self.mark_key_in_memory(key) elif msg['op'] == 'update-data': self.update_data(msg['who-has'], msg['nbytes']) elif msg['op'] == 'task-finished': key, worker = msg['key'], msg['workers'][0] logger.debug("Mark task as finished %s, %s", key, worker) if key in self.processing[worker]: self.nbytes[key] = msg['nbytes'] self.mark_key_in_memory(key, [worker]) self.ensure_occupied(worker) else: logger.debug("Key not found in processing, %s, %s, %s", key, worker, self.processing[worker]) elif msg['op'] == 'task-erred': key, worker = msg['key'], msg['worker'] if key in self.processing[worker]: self.processing[worker].remove(key) self.exceptions[key] = msg['exception'] self.tracebacks[key] = msg['traceback'] self.mark_failed(key, key) self.ensure_occupied(worker) elif msg['op'] in ('missing-data', 'task-missing-data'): missing = set(msg['missing']) logger.debug("Recovering missing data: %s", missing) for k in missing: with ignoring(KeyError): workers = self.who_has.pop(k) for worker in workers: self.has_what[worker].remove(k) self.my_heal_missing_data(missing) if msg['op'] == 'task-missing-data': key = msg['key'] with ignoring(KeyError): self.processing[msg['worker']].remove(key) self.waiting[key] = missing logger.info('task missing data, %s, %s', key, self.waiting) with ignoring(KeyError): self.processing[msg['worker']].remove(msg['key']) self.ensure_occupied(msg['worker']) self.seed_ready_tasks() elif msg['op'] == 'worker-failed': worker = msg['worker'] self.mark_worker_missing(worker) if msg.get('heal', True): self.heal_state() elif msg['op'] == 'release-held-data': if msg['key'] in self.held_data: logger.debug("Release key: %s", msg['key']) self.held_data.remove(msg['key']) self.release_key(msg['key']) else: logger.warn("Bad message: %s", msg) logger.debug('Finished scheduling') yield self.cleanup() self.status = 'done'
class SQSDrain(object): """Implementation of IDrain that writes to an AWS SQS queue. """ def __init__(self, logger, loop, sqs_client, metric_prefix='emitter'): self.emitter = sqs_client self.logger = logger self.loop = loop self.metric_prefix = metric_prefix self.output_error = Event() self.state = RUNNING self.sender_tag = 'sender:%s.%s' % (self.__class__.__module__, self.__class__.__name__) self._send_queue = Queue() self._should_flush_queue = Event() self._flush_handle = None self.loop.spawn_callback(self._onSend) @gen.coroutine def _flush_send_batch(self, batch_size): send_batch = [ self._send_queue.get_nowait() for pos in range(min(batch_size, self.emitter.max_messages)) ] try: response = yield self.emitter.send_message_batch(*send_batch) except SQSError as err: self.logger.exception('Error encountered flushing data to SQS: %s', err) self.output_error.set() for msg in send_batch: self._send_queue.put_nowait(msg) else: if response.Failed: self.output_error.set() for req in response.Failed: self.logger.error('Message failed to send: %s', req.Id) self._send_queue.put_nowait(req) @gen.coroutine def _onSend(self): respawn = True while respawn: qsize = self._send_queue.qsize() # This will keep flushing until clear, # including items that show up in between flushes while qsize > 0: try: yield self._flush_send_batch(qsize) except Exception as err: self.logger.exception(err) self.output_error.set() qsize = self._send_queue.qsize() # We've cleared the backlog, remove any possible future flush if self._flush_handle: self.loop.remove_timeout(self._flush_handle) self._flush_handle = None self._should_flush_queue.clear() yield self._should_flush_queue.wait() @gen.coroutine def close(self, timeout=None): self.state = CLOSING yield self._send_queue.join(timeout) def emit_nowait(self, msg): if self._send_queue.qsize() >= self.emitter.max_messages: # Signal flush self._should_flush_queue.set() raise QueueFull() elif self._flush_handle is None: # Ensure we flush messages at least by MAX_TIMEOUT self._flush_handle = self.loop.add_timeout( MAX_TIMEOUT, lambda: self._should_flush_queue.set(), ) self.logger.debug("Drain emitting") self._send_queue.put_nowait(msg) @gen.coroutine def emit(self, msg, timeout=None): if self._send_queue.qsize() >= self.emitter.max_messages: # Signal flush self._should_flush_queue.set() elif self._flush_handle is None: # Ensure we flush messages at least by MAX_TIMEOUT self._flush_handle = self.loop.add_timeout( MAX_TIMEOUT, lambda: self._should_flush_queue.set(), ) yield self._send_queue.put(msg, timeout)
class DebugpyMessageQueue: HEADER = 'Content-Length: ' HEADER_LENGTH = 16 SEPARATOR = '\r\n\r\n' SEPARATOR_LENGTH = 4 def __init__(self, event_callback, log): self.tcp_buffer = '' self._reset_tcp_pos() self.event_callback = event_callback self.message_queue = Queue() self.log = log def _reset_tcp_pos(self): self.header_pos = -1 self.separator_pos = -1 self.message_size = 0 self.message_pos = -1 def _put_message(self, raw_msg): self.log.debug('QUEUE - _put_message:') msg = jsonapi.loads(raw_msg) if msg['type'] == 'event': self.log.debug('QUEUE - received event:') self.log.debug(msg) self.event_callback(msg) else: self.log.debug('QUEUE - put message:') self.log.debug(msg) self.message_queue.put_nowait(msg) def put_tcp_frame(self, frame): self.tcp_buffer += frame self.log.debug('QUEUE - received frame') while True: # Finds header if self.header_pos == -1: self.header_pos = self.tcp_buffer.find( DebugpyMessageQueue.HEADER) if self.header_pos == -1: return self.log.debug('QUEUE - found header at pos %i', self.header_pos) #Finds separator if self.separator_pos == -1: hint = self.header_pos + DebugpyMessageQueue.HEADER_LENGTH self.separator_pos = self.tcp_buffer.find( DebugpyMessageQueue.SEPARATOR, hint) if self.separator_pos == -1: return self.log.debug('QUEUE - found separator at pos %i', self.separator_pos) if self.message_pos == -1: size_pos = self.header_pos + DebugpyMessageQueue.HEADER_LENGTH self.message_pos = self.separator_pos + DebugpyMessageQueue.SEPARATOR_LENGTH self.message_size = int( self.tcp_buffer[size_pos:self.separator_pos]) self.log.debug('QUEUE - found message at pos %i', self.message_pos) self.log.debug('QUEUE - message size is %i', self.message_size) if len(self.tcp_buffer) - self.message_pos < self.message_size: return self._put_message( self.tcp_buffer[self.message_pos:self.message_pos + self.message_size]) if len(self.tcp_buffer) - self.message_pos == self.message_size: self.log.debug('QUEUE - resetting tcp_buffer') self.tcp_buffer = '' self._reset_tcp_pos() return else: self.tcp_buffer = self.tcp_buffer[self.message_pos + self.message_size:] self.log.debug('QUEUE - slicing tcp_buffer: %s', self.tcp_buffer) self._reset_tcp_pos() async def get_message(self): return await self.message_queue.get()
class Kernel(SingletonConfigurable): # --------------------------------------------------------------------------- # Kernel interface # --------------------------------------------------------------------------- # attribute to override with a GUI eventloop = Any(None) @observe("eventloop") def _update_eventloop(self, change): """schedule call to eventloop from IOLoop""" loop = ioloop.IOLoop.current() if change.new is not None: loop.add_callback(self.enter_eventloop) session = Instance(Session, allow_none=True) profile_dir = Instance("IPython.core.profiledir.ProfileDir", allow_none=True) shell_stream = Instance(ZMQStream, allow_none=True) shell_streams = List( help="""Deprecated shell_streams alias. Use shell_stream .. versionchanged:: 6.0 shell_streams is deprecated. Use shell_stream. """ ) @default("shell_streams") def _shell_streams_default(self): warnings.warn( "Kernel.shell_streams is deprecated in ipykernel 6.0. Use Kernel.shell_stream", DeprecationWarning, stacklevel=2, ) if self.shell_stream is not None: return [self.shell_stream] else: return [] @observe("shell_streams") def _shell_streams_changed(self, change): warnings.warn( "Kernel.shell_streams is deprecated in ipykernel 6.0. Use Kernel.shell_stream", DeprecationWarning, stacklevel=2, ) if len(change.new) > 1: warnings.warn( "Kernel only supports one shell stream. Additional streams will be ignored.", RuntimeWarning, stacklevel=2, ) if change.new: self.shell_stream = change.new[0] control_stream = Instance(ZMQStream, allow_none=True) debug_shell_socket = Any() control_thread = Any() iopub_socket = Any() iopub_thread = Any() stdin_socket = Any() log = Instance(logging.Logger, allow_none=True) # identities: int_id = Integer(-1) ident = Unicode() @default("ident") def _default_ident(self): return str(uuid.uuid4()) # This should be overridden by wrapper kernels that implement any real # language. language_info = {} # any links that should go in the help menu help_links = List() # Experimental option to break in non-user code. # The ipykernel source is in the call stack, so the user # has to manipulate the step-over and step-into in a wize way. debug_just_my_code = Bool( True, help="""Set to False if you want to debug python standard and dependent libraries. """, ).tag(config=True) # track associations with current request # Private interface _darwin_app_nap = Bool( True, help="""Whether to use appnope for compatibility with OS X App Nap. Only affects OS X >= 10.9. """, ).tag(config=True) # track associations with current request _allow_stdin = Bool(False) _parents = Dict({"shell": {}, "control": {}}) _parent_ident = Dict({"shell": b"", "control": b""}) @property def _parent_header(self): warnings.warn( "Kernel._parent_header is deprecated in ipykernel 6. Use .get_parent()", DeprecationWarning, stacklevel=2, ) return self.get_parent(channel="shell") # Time to sleep after flushing the stdout/err buffers in each execute # cycle. While this introduces a hard limit on the minimal latency of the # execute cycle, it helps prevent output synchronization problems for # clients. # Units are in seconds. The minimum zmq latency on local host is probably # ~150 microseconds, set this to 500us for now. We may need to increase it # a little if it's not enough after more interactive testing. _execute_sleep = Float(0.0005).tag(config=True) # Frequency of the kernel's event loop. # Units are in seconds, kernel subclasses for GUI toolkits may need to # adapt to milliseconds. _poll_interval = Float(0.01).tag(config=True) stop_on_error_timeout = Float( 0.0, config=True, help="""time (in seconds) to wait for messages to arrive when aborting queued requests after an error. Requests that arrive within this window after an error will be cancelled. Increase in the event of unusually slow network causing significant delays, which can manifest as e.g. "Run all" in a notebook aborting some, but not all, messages after an error. """, ) # If the shutdown was requested over the network, we leave here the # necessary reply message so it can be sent by our registered atexit # handler. This ensures that the reply is only sent to clients truly at # the end of our shutdown process (which happens after the underlying # IPython shell's own shutdown). _shutdown_message = None # This is a dict of port number that the kernel is listening on. It is set # by record_ports and used by connect_request. _recorded_ports = Dict() # set of aborted msg_ids aborted = Set() # Track execution count here. For IPython, we override this to use the # execution count we store in the shell. execution_count = 0 msg_types = [ "execute_request", "complete_request", "inspect_request", "history_request", "comm_info_request", "kernel_info_request", "connect_request", "shutdown_request", "is_complete_request", "interrupt_request", # deprecated: "apply_request", ] # add deprecated ipyparallel control messages control_msg_types = msg_types + [ "clear_request", "abort_request", "debug_request", "usage_request", ] def __init__(self, **kwargs): super().__init__(**kwargs) # Build dict of handlers for message types self.shell_handlers = {} for msg_type in self.msg_types: self.shell_handlers[msg_type] = getattr(self, msg_type) self.control_handlers = {} for msg_type in self.control_msg_types: self.control_handlers[msg_type] = getattr(self, msg_type) self.control_queue = Queue() def dispatch_control(self, msg): self.control_queue.put_nowait(msg) async def poll_control_queue(self): while True: msg = await self.control_queue.get() # handle tracers from _flush_control_queue if isinstance(msg, (concurrent.futures.Future, asyncio.Future)): msg.set_result(None) continue await self.process_control(msg) async def _flush_control_queue(self): """Flush the control queue, wait for processing of any pending messages""" if self.control_thread: control_loop = self.control_thread.io_loop # concurrent.futures.Futures are threadsafe # and can be used to await across threads tracer_future = concurrent.futures.Future() awaitable_future = asyncio.wrap_future(tracer_future) else: control_loop = self.io_loop tracer_future = awaitable_future = asyncio.Future() def _flush(): # control_stream.flush puts messages on the queue self.control_stream.flush() # put Future on the queue after all of those, # so we can wait for all queued messages to be processed self.control_queue.put(tracer_future) control_loop.add_callback(_flush) return awaitable_future async def process_control(self, msg): """dispatch control requests""" idents, msg = self.session.feed_identities(msg, copy=False) try: msg = self.session.deserialize(msg, content=True, copy=False) except Exception: self.log.error("Invalid Control Message", exc_info=True) return self.log.debug("Control received: %s", msg) # Set the parent message for side effects. self.set_parent(idents, msg, channel="control") self._publish_status("busy", "control") header = msg["header"] msg_type = header["msg_type"] handler = self.control_handlers.get(msg_type, None) if handler is None: self.log.error("UNKNOWN CONTROL MESSAGE TYPE: %r", msg_type) else: try: result = handler(self.control_stream, idents, msg) if inspect.isawaitable(result): await result except Exception: self.log.error("Exception in control handler:", exc_info=True) sys.stdout.flush() sys.stderr.flush() self._publish_status("idle", "control") # flush to ensure reply is sent self.control_stream.flush(zmq.POLLOUT) def should_handle(self, stream, msg, idents): """Check whether a shell-channel message should be handled Allows subclasses to prevent handling of certain messages (e.g. aborted requests). """ msg_id = msg["header"]["msg_id"] if msg_id in self.aborted: # is it safe to assume a msg_id will not be resubmitted? self.aborted.remove(msg_id) self._send_abort_reply(stream, msg, idents) return False return True async def dispatch_shell(self, msg): """dispatch shell requests""" # flush control queue before handling shell requests await self._flush_control_queue() idents, msg = self.session.feed_identities(msg, copy=False) try: msg = self.session.deserialize(msg, content=True, copy=False) except Exception: self.log.error("Invalid Message", exc_info=True) return # Set the parent message for side effects. self.set_parent(idents, msg, channel="shell") self._publish_status("busy", "shell") msg_type = msg["header"]["msg_type"] # Only abort execute requests if self._aborting and msg_type == "execute_request": self._send_abort_reply(self.shell_stream, msg, idents) self._publish_status("idle", "shell") # flush to ensure reply is sent before # handling the next request self.shell_stream.flush(zmq.POLLOUT) return # Print some info about this message and leave a '--->' marker, so it's # easier to trace visually the message chain when debugging. Each # handler prints its message at the end. self.log.debug("\n*** MESSAGE TYPE:%s***", msg_type) self.log.debug(" Content: %s\n --->\n ", msg["content"]) if not self.should_handle(self.shell_stream, msg, idents): return handler = self.shell_handlers.get(msg_type, None) if handler is None: self.log.warning("Unknown message type: %r", msg_type) else: self.log.debug("%s: %s", msg_type, msg) try: self.pre_handler_hook() except Exception: self.log.debug("Unable to signal in pre_handler_hook:", exc_info=True) try: result = handler(self.shell_stream, idents, msg) if inspect.isawaitable(result): await result except Exception: self.log.error("Exception in message handler:", exc_info=True) except KeyboardInterrupt: # Ctrl-c shouldn't crash the kernel here. self.log.error("KeyboardInterrupt caught in kernel.") finally: try: self.post_handler_hook() except Exception: self.log.debug("Unable to signal in post_handler_hook:", exc_info=True) sys.stdout.flush() sys.stderr.flush() self._publish_status("idle", "shell") # flush to ensure reply is sent before # handling the next request self.shell_stream.flush(zmq.POLLOUT) def pre_handler_hook(self): """Hook to execute before calling message handler""" # ensure default_int_handler during handler call self.saved_sigint_handler = signal(SIGINT, default_int_handler) def post_handler_hook(self): """Hook to execute after calling message handler""" signal(SIGINT, self.saved_sigint_handler) def enter_eventloop(self): """enter eventloop""" self.log.info("Entering eventloop %s", self.eventloop) # record handle, so we can check when this changes eventloop = self.eventloop if eventloop is None: self.log.info("Exiting as there is no eventloop") return def advance_eventloop(): # check if eventloop changed: if self.eventloop is not eventloop: self.log.info("exiting eventloop %s", eventloop) return if self.msg_queue.qsize(): self.log.debug("Delaying eventloop due to waiting messages") # still messages to process, make the eventloop wait schedule_next() return self.log.debug("Advancing eventloop %s", eventloop) try: eventloop(self) except KeyboardInterrupt: # Ctrl-C shouldn't crash the kernel self.log.error("KeyboardInterrupt caught in kernel") pass if self.eventloop is eventloop: # schedule advance again schedule_next() def schedule_next(): """Schedule the next advance of the eventloop""" # flush the eventloop every so often, # giving us a chance to handle messages in the meantime self.log.debug("Scheduling eventloop advance") self.io_loop.call_later(0.001, advance_eventloop) # begin polling the eventloop schedule_next() async def do_one_iteration(self): """Process a single shell message Any pending control messages will be flushed as well .. versionchanged:: 5 This is now a coroutine """ # flush messages off of shell stream into the message queue self.shell_stream.flush() # process at most one shell message per iteration await self.process_one(wait=False) async def process_one(self, wait=True): """Process one request Returns None if no message was handled. """ if wait: t, dispatch, args = await self.msg_queue.get() else: try: t, dispatch, args = self.msg_queue.get_nowait() except (asyncio.QueueEmpty, QueueEmpty): return None await dispatch(*args) async def dispatch_queue(self): """Coroutine to preserve order of message handling Ensures that only one message is processing at a time, even when the handler is async """ while True: try: await self.process_one() except Exception: self.log.exception("Error in message handler") _message_counter = Any( help="""Monotonic counter of messages """, ) @default("_message_counter") def _message_counter_default(self): return itertools.count() def schedule_dispatch(self, dispatch, *args): """schedule a message for dispatch""" idx = next(self._message_counter) self.msg_queue.put_nowait( ( idx, dispatch, args, ) ) # ensure the eventloop wakes up self.io_loop.add_callback(lambda: None) def start(self): """register dispatchers for streams""" self.io_loop = ioloop.IOLoop.current() self.msg_queue = Queue() self.io_loop.add_callback(self.dispatch_queue) self.control_stream.on_recv(self.dispatch_control, copy=False) if self.control_thread: control_loop = self.control_thread.io_loop else: control_loop = self.io_loop asyncio.run_coroutine_threadsafe(self.poll_control_queue(), control_loop.asyncio_loop) self.shell_stream.on_recv( partial( self.schedule_dispatch, self.dispatch_shell, ), copy=False, ) # publish idle status self._publish_status("starting", "shell") def record_ports(self, ports): """Record the ports that this kernel is using. The creator of the Kernel instance must call this methods if they want the :meth:`connect_request` method to return the port numbers. """ self._recorded_ports = ports # --------------------------------------------------------------------------- # Kernel request handlers # --------------------------------------------------------------------------- def _publish_execute_input(self, code, parent, execution_count): """Publish the code request on the iopub stream.""" self.session.send( self.iopub_socket, "execute_input", {"code": code, "execution_count": execution_count}, parent=parent, ident=self._topic("execute_input"), ) def _publish_status(self, status, channel, parent=None): """send status (busy/idle) on IOPub""" self.session.send( self.iopub_socket, "status", {"execution_state": status}, parent=parent or self.get_parent(channel), ident=self._topic("status"), ) def _publish_debug_event(self, event): self.session.send( self.iopub_socket, "debug_event", event, parent=self.get_parent("control"), ident=self._topic("debug_event"), ) def set_parent(self, ident, parent, channel="shell"): """Set the current parent request Side effects (IOPub messages) and replies are associated with the request that caused them via the parent_header. The parent identity is used to route input_request messages on the stdin channel. """ self._parent_ident[channel] = ident self._parents[channel] = parent def get_parent(self, channel="shell"): """Get the parent request associated with a channel. .. versionadded:: 6 Parameters ---------- channel : str the name of the channel ('shell' or 'control') Returns ------- message : dict the parent message for the most recent request on the channel. """ return self._parents.get(channel, {}) def send_response( self, stream, msg_or_type, content=None, ident=None, buffers=None, track=False, header=None, metadata=None, channel="shell", ): """Send a response to the message we're currently processing. This accepts all the parameters of :meth:`jupyter_client.session.Session.send` except ``parent``. This relies on :meth:`set_parent` having been called for the current message. """ return self.session.send( stream, msg_or_type, content, self.get_parent(channel), ident, buffers, track, header, metadata, ) def init_metadata(self, parent): """Initialize metadata. Run at the beginning of execution requests. """ # FIXME: `started` is part of ipyparallel # Remove for ipykernel 5.0 return { "started": now(), } def finish_metadata(self, parent, metadata, reply_content): """Finish populating metadata. Run after completing an execution request. """ return metadata async def execute_request(self, stream, ident, parent): """handle an execute_request""" try: content = parent["content"] code = content["code"] silent = content["silent"] store_history = content.get("store_history", not silent) user_expressions = content.get("user_expressions", {}) allow_stdin = content.get("allow_stdin", False) except Exception: self.log.error("Got bad msg: ") self.log.error("%s", parent) return stop_on_error = content.get("stop_on_error", True) metadata = self.init_metadata(parent) # Re-broadcast our input for the benefit of listening clients, and # start computing output if not silent: self.execution_count += 1 self._publish_execute_input(code, parent, self.execution_count) cell_id = (parent.get("metadata") or {}).get("cellId") if _accepts_cell_id(self.do_execute): reply_content = self.do_execute( code, silent, store_history, user_expressions, allow_stdin, cell_id=cell_id, ) else: reply_content = self.do_execute( code, silent, store_history, user_expressions, allow_stdin, ) if inspect.isawaitable(reply_content): reply_content = await reply_content # Flush output before sending the reply. sys.stdout.flush() sys.stderr.flush() # FIXME: on rare occasions, the flush doesn't seem to make it to the # clients... This seems to mitigate the problem, but we definitely need # to better understand what's going on. if self._execute_sleep: time.sleep(self._execute_sleep) # Send the reply. reply_content = json_clean(reply_content) metadata = self.finish_metadata(parent, metadata, reply_content) reply_msg = self.session.send( stream, "execute_reply", reply_content, parent, metadata=metadata, ident=ident, ) self.log.debug("%s", reply_msg) if not silent and reply_msg["content"]["status"] == "error" and stop_on_error: self._abort_queues() def do_execute( self, code, silent, store_history=True, user_expressions=None, allow_stdin=False, *, cell_id=None, ): """Execute user code. Must be overridden by subclasses.""" raise NotImplementedError async def complete_request(self, stream, ident, parent): content = parent["content"] code = content["code"] cursor_pos = content["cursor_pos"] matches = self.do_complete(code, cursor_pos) if inspect.isawaitable(matches): matches = await matches matches = json_clean(matches) self.session.send(stream, "complete_reply", matches, parent, ident) def do_complete(self, code, cursor_pos): """Override in subclasses to find completions.""" return { "matches": [], "cursor_end": cursor_pos, "cursor_start": cursor_pos, "metadata": {}, "status": "ok", } async def inspect_request(self, stream, ident, parent): content = parent["content"] reply_content = self.do_inspect( content["code"], content["cursor_pos"], content.get("detail_level", 0), set(content.get("omit_sections", [])), ) if inspect.isawaitable(reply_content): reply_content = await reply_content # Before we send this object over, we scrub it for JSON usage reply_content = json_clean(reply_content) msg = self.session.send(stream, "inspect_reply", reply_content, parent, ident) self.log.debug("%s", msg) def do_inspect(self, code, cursor_pos, detail_level=0, omit_sections=()): """Override in subclasses to allow introspection.""" return {"status": "ok", "data": {}, "metadata": {}, "found": False} async def history_request(self, stream, ident, parent): content = parent["content"] reply_content = self.do_history(**content) if inspect.isawaitable(reply_content): reply_content = await reply_content reply_content = json_clean(reply_content) msg = self.session.send(stream, "history_reply", reply_content, parent, ident) self.log.debug("%s", msg) def do_history( self, hist_access_type, output, raw, session=None, start=None, stop=None, n=None, pattern=None, unique=False, ): """Override in subclasses to access history.""" return {"status": "ok", "history": []} async def connect_request(self, stream, ident, parent): if self._recorded_ports is not None: content = self._recorded_ports.copy() else: content = {} content["status"] = "ok" msg = self.session.send(stream, "connect_reply", content, parent, ident) self.log.debug("%s", msg) @property def kernel_info(self): return { "protocol_version": kernel_protocol_version, "implementation": self.implementation, "implementation_version": self.implementation_version, "language_info": self.language_info, "banner": self.banner, "help_links": self.help_links, } async def kernel_info_request(self, stream, ident, parent): content = {"status": "ok"} content.update(self.kernel_info) msg = self.session.send(stream, "kernel_info_reply", content, parent, ident) self.log.debug("%s", msg) async def comm_info_request(self, stream, ident, parent): content = parent["content"] target_name = content.get("target_name", None) # Should this be moved to ipkernel? if hasattr(self, "comm_manager"): comms = { k: dict(target_name=v.target_name) for (k, v) in self.comm_manager.comms.items() if v.target_name == target_name or target_name is None } else: comms = {} reply_content = dict(comms=comms, status="ok") msg = self.session.send(stream, "comm_info_reply", reply_content, parent, ident) self.log.debug("%s", msg) def _send_interupt_children(self): if os.name == "nt": self.log.error("Interrupt message not supported on Windows") else: pid = os.getpid() pgid = os.getpgid(pid) # Prefer process-group over process # but only if the kernel is the leader of the process group if pgid and pgid == pid and hasattr(os, "killpg"): try: os.killpg(pgid, SIGINT) return except OSError: pass try: os.kill(pid, SIGINT) except OSError: pass async def interrupt_request(self, stream, ident, parent): self._send_interupt_children() content = parent["content"] self.session.send(stream, "interrupt_reply", content, parent, ident=ident) return async def shutdown_request(self, stream, ident, parent): content = self.do_shutdown(parent["content"]["restart"]) if inspect.isawaitable(content): content = await content self.session.send(stream, "shutdown_reply", content, parent, ident=ident) # same content, but different msg_id for broadcasting on IOPub self._shutdown_message = self.session.msg("shutdown_reply", content, parent) await self._at_shutdown() self.log.debug("Stopping control ioloop") control_io_loop = self.control_stream.io_loop control_io_loop.add_callback(control_io_loop.stop) self.log.debug("Stopping shell ioloop") shell_io_loop = self.shell_stream.io_loop shell_io_loop.add_callback(shell_io_loop.stop) def do_shutdown(self, restart): """Override in subclasses to do things when the frontend shuts down the kernel. """ return {"status": "ok", "restart": restart} async def is_complete_request(self, stream, ident, parent): content = parent["content"] code = content["code"] reply_content = self.do_is_complete(code) if inspect.isawaitable(reply_content): reply_content = await reply_content reply_content = json_clean(reply_content) reply_msg = self.session.send(stream, "is_complete_reply", reply_content, parent, ident) self.log.debug("%s", reply_msg) def do_is_complete(self, code): """Override in subclasses to find completions.""" return {"status": "unknown"} async def debug_request(self, stream, ident, parent): content = parent["content"] reply_content = self.do_debug_request(content) if inspect.isawaitable(reply_content): reply_content = await reply_content reply_content = json_clean(reply_content) reply_msg = self.session.send(stream, "debug_reply", reply_content, parent, ident) self.log.debug("%s", reply_msg) # Taken from https://github.com/jupyter-server/jupyter-resource-usage/blob/e6ec53fa69fdb6de8e878974bcff006310658408/jupyter_resource_usage/metrics.py#L16 def get_process_metric_value(self, process, name, attribute=None): try: # psutil.Process methods will either return... metric_value = getattr(process, name)() if attribute is not None: # ... a named tuple return getattr(metric_value, attribute) else: # ... or a number return metric_value # Avoid littering logs with stack traces # complaining about dead processes except BaseException: return None async def usage_request(self, stream, ident, parent): reply_content = {"hostname": socket.gethostname()} current_process = psutil.Process() all_processes = [current_process] + current_process.children(recursive=True) process_metric_value = self.get_process_metric_value reply_content["kernel_cpu"] = sum( [process_metric_value(process, "cpu_percent", None) for process in all_processes] ) reply_content["kernel_memory"] = sum( [process_metric_value(process, "memory_info", "rss") for process in all_processes] ) cpu_percent = psutil.cpu_percent() # https://psutil.readthedocs.io/en/latest/index.html?highlight=cpu#psutil.cpu_percent # The first time cpu_percent is called it will return a meaningless 0.0 value which you are supposed to ignore. if cpu_percent is not None and cpu_percent != 0.0: reply_content["host_cpu_percent"] = cpu_percent reply_content["host_virtual_memory"] = dict(psutil.virtual_memory()._asdict()) reply_msg = self.session.send(stream, "usage_reply", reply_content, parent, ident) self.log.debug("%s", reply_msg) async def do_debug_request(self, msg): raise NotImplementedError # --------------------------------------------------------------------------- # Engine methods (DEPRECATED) # --------------------------------------------------------------------------- async def apply_request(self, stream, ident, parent): self.log.warning("apply_request is deprecated in kernel_base, moving to ipyparallel.") try: content = parent["content"] bufs = parent["buffers"] msg_id = parent["header"]["msg_id"] except Exception: self.log.error("Got bad msg: %s", parent, exc_info=True) return md = self.init_metadata(parent) reply_content, result_buf = self.do_apply(content, bufs, msg_id, md) # flush i/o sys.stdout.flush() sys.stderr.flush() md = self.finish_metadata(parent, md, reply_content) self.session.send( stream, "apply_reply", reply_content, parent=parent, ident=ident, buffers=result_buf, metadata=md, ) def do_apply(self, content, bufs, msg_id, reply_metadata): """DEPRECATED""" raise NotImplementedError # --------------------------------------------------------------------------- # Control messages (DEPRECATED) # --------------------------------------------------------------------------- async def abort_request(self, stream, ident, parent): """abort a specific msg by id""" self.log.warning( "abort_request is deprecated in kernel_base. It is only part of IPython parallel" ) msg_ids = parent["content"].get("msg_ids", None) if isinstance(msg_ids, str): msg_ids = [msg_ids] if not msg_ids: self._abort_queues() for mid in msg_ids: self.aborted.add(str(mid)) content = dict(status="ok") reply_msg = self.session.send( stream, "abort_reply", content=content, parent=parent, ident=ident ) self.log.debug("%s", reply_msg) async def clear_request(self, stream, idents, parent): """Clear our namespace.""" self.log.warning( "clear_request is deprecated in kernel_base. It is only part of IPython parallel" ) content = self.do_clear() self.session.send(stream, "clear_reply", ident=idents, parent=parent, content=content) def do_clear(self): """DEPRECATED since 4.0.3""" raise NotImplementedError # --------------------------------------------------------------------------- # Protected interface # --------------------------------------------------------------------------- def _topic(self, topic): """prefixed topic for IOPub messages""" base = "kernel.%s" % self.ident return ("%s.%s" % (base, topic)).encode() _aborting = Bool(False) def _abort_queues(self): # while this flag is true, # execute requests will be aborted self._aborting = True self.log.info("Aborting queue") # flush streams, so all currently waiting messages # are added to the queue self.shell_stream.flush() # Callback to signal that we are done aborting # dispatch functions _must_ be async async def stop_aborting(): self.log.info("Finishing abort") self._aborting = False # put the stop-aborting event on the message queue # so that all messages already waiting in the queue are aborted # before we reset the flag schedule_stop_aborting = partial(self.schedule_dispatch, stop_aborting) # if we have a delay, give messages this long to arrive on the queue # before we stop aborting requests asyncio.get_event_loop().call_later(self.stop_on_error_timeout, schedule_stop_aborting) def _send_abort_reply(self, stream, msg, idents): """Send a reply to an aborted request""" self.log.info(f"Aborting {msg['header']['msg_id']}: {msg['header']['msg_type']}") reply_type = msg["header"]["msg_type"].rsplit("_", 1)[0] + "_reply" status = {"status": "aborted"} md = self.init_metadata(msg) md = self.finish_metadata(msg, md, status) md.update(status) self.session.send( stream, reply_type, metadata=md, content=status, parent=msg, ident=idents, ) def _no_raw_input(self): """Raise StdinNotImplementedError if active frontend doesn't support stdin.""" raise StdinNotImplementedError( "raw_input was called, but this frontend does not support stdin." ) def getpass(self, prompt="", stream=None): """Forward getpass to frontends Raises ------ StdinNotImplementedError if active frontend doesn't support stdin. """ if not self._allow_stdin: raise StdinNotImplementedError( "getpass was called, but this frontend does not support input requests." ) if stream is not None: import warnings warnings.warn( "The `stream` parameter of `getpass.getpass` will have no effect when using ipykernel", UserWarning, stacklevel=2, ) return self._input_request( prompt, self._parent_ident["shell"], self.get_parent("shell"), password=True, ) def raw_input(self, prompt=""): """Forward raw_input to frontends Raises ------ StdinNotImplementedError if active frontend doesn't support stdin. """ if not self._allow_stdin: raise StdinNotImplementedError( "raw_input was called, but this frontend does not support input requests." ) return self._input_request( str(prompt), self._parent_ident["shell"], self.get_parent("shell"), password=False, ) def _input_request(self, prompt, ident, parent, password=False): # Flush output before making the request. sys.stderr.flush() sys.stdout.flush() # flush the stdin socket, to purge stale replies while True: try: self.stdin_socket.recv_multipart(zmq.NOBLOCK) except zmq.ZMQError as e: if e.errno == zmq.EAGAIN: break else: raise # Send the input request. content = json_clean(dict(prompt=prompt, password=password)) self.session.send(self.stdin_socket, "input_request", content, parent, ident=ident) # Await a response. while True: try: # Use polling with select() so KeyboardInterrupts can get # through; doing a blocking recv() means stdin reads are # uninterruptible on Windows. We need a timeout because # zmq.select() is also uninterruptible, but at least this # way reads get noticed immediately and KeyboardInterrupts # get noticed fairly quickly by human response time standards. rlist, _, xlist = zmq.select([self.stdin_socket], [], [self.stdin_socket], 0.01) if rlist or xlist: ident, reply = self.session.recv(self.stdin_socket) if (ident, reply) != (None, None): break except KeyboardInterrupt: # re-raise KeyboardInterrupt, to truncate traceback raise KeyboardInterrupt("Interrupted by user") from None except Exception: self.log.warning("Invalid Message:", exc_info=True) try: value = reply["content"]["value"] except Exception: self.log.error("Bad input_reply: %s", parent) value = "" if value == "\x04": # EOF raise EOFError return value def _signal_children(self, signum): """ Send a signal to all our children Like `killpg`, but does not include the current process (or possible parents). """ for p in self._process_children(): self.log.debug(f"Sending {Signals(signum)!r} to subprocess {p}") try: if signum == SIGTERM: p.terminate() elif signum == SIGKILL: p.kill() else: p.send_signal(signum) except psutil.NoSuchProcess: pass def _process_children(self): """Retrieve child processes in the kernel's process group Avoids: - including parents and self with killpg - including all children that may have forked-off a new group """ kernel_process = psutil.Process() all_children = kernel_process.children(recursive=True) if os.name == "nt": return all_children kernel_pgid = os.getpgrp() process_group_children = [] for child in all_children: try: child_pgid = os.getpgid(child.pid) except OSError: pass else: if child_pgid == kernel_pgid: process_group_children.append(child) return process_group_children async def _progressively_terminate_all_children(self): sleeps = (0.01, 0.03, 0.1, 0.3, 1, 3, 10) if not self._process_children(): self.log.debug("Kernel has no children.") return for signum in (SIGTERM, SIGKILL): for delay in sleeps: children = self._process_children() if not children: self.log.debug("No more children, continuing shutdown routine.") return # signals only children, not current process self._signal_children(signum) self.log.debug( f"Will sleep {delay}s before checking for children and retrying. {children}" ) await asyncio.sleep(delay) async def _at_shutdown(self): """Actions taken at shutdown by the kernel, called by python's atexit.""" try: await self._progressively_terminate_all_children() except Exception as e: self.log.exception("Exception during subprocesses termination %s", e) finally: if self._shutdown_message is not None: self.session.send( self.iopub_socket, self._shutdown_message, ident=self._topic("shutdown"), ) self.log.debug("%s", self._shutdown_message) self.control_stream.flush(zmq.POLLOUT)
class Worker(Server): """ Worker Node Workers perform two functions: 1. **Serve data** from a local dictionary 2. **Perform computation** on that data and on data from peers Additionally workers keep a Center informed of their data and use that Center to gather data from other workers when necessary to perform a computation. You can start a worker with the ``dworker`` command line application:: $ dworker scheduler-ip:port **State** * **data:** ``{key: object}``: Dictionary mapping keys to actual values * **active:** ``{key}``: Set of keys currently under computation * **ncores:** ``int``: Number of cores used by this worker process * **executor:** ``concurrent.futures.ThreadPoolExecutor``: Executor used to perform computation * **local_dir:** ``path``: Path on local machine to store temporary files * **center:** ``rpc``: Location of center or scheduler. See ``.ip/.port`` attributes. * **name:** ``string``: Alias * **services:** ``{str: Server}``: Auxiliary web servers running on this worker * **service_ports:** ``{str: port}``: Examples -------- Create centers and workers in Python: >>> from distributed import Center, Worker >>> c = Center('192.168.0.100', 8787) # doctest: +SKIP >>> w = Worker(c.ip, c.port) # doctest: +SKIP >>> yield w._start(port=8788) # doctest: +SKIP Or use the command line:: $ dcenter Start center at 127.0.0.1:8787 $ dworker 127.0.0.1:8787 Start worker at: 127.0.0.1:8788 Registered with center at: 127.0.0.1:8787 See Also -------- distributed.center.Center: """ def __init__(self, center_ip, center_port, ip=None, ncores=None, loop=None, local_dir=None, services=None, service_ports=None, name=None, **kwargs): self.ip = ip or get_ip() self._port = 0 self.ncores = ncores or _ncores self.data = dict() self.loop = loop or IOLoop.current() self.status = None self.local_dir = local_dir or tempfile.mkdtemp(prefix='worker-') self.executor = ThreadPoolExecutor(self.ncores) self.thread_tokens = Queue() # https://github.com/tornadoweb/tornado/issues/1595#issuecomment-198551572 for i in range(self.ncores): self.thread_tokens.put_nowait(i) self.center = rpc(ip=center_ip, port=center_port) self.active = set() self.name = name if not os.path.exists(self.local_dir): os.mkdir(self.local_dir) if self.local_dir not in sys.path: sys.path.insert(0, self.local_dir) self.services = {} self.service_ports = service_ports or {} for k, v in (services or {}).items(): if isinstance(k, tuple): k, port = k else: port = 0 self.services[k] = v(self) self.services[k].listen(port) self.service_ports[k] = self.services[k].port handlers = {'compute': self.compute, 'gather': self.gather, 'compute-stream': self.compute_stream, 'run': self.run, 'get_data': self.get_data, 'update_data': self.update_data, 'delete_data': self.delete_data, 'terminate': self.terminate, 'ping': pingpong, 'health': self.health, 'upload_file': self.upload_file} super(Worker, self).__init__(handlers, **kwargs) @gen.coroutine def _start(self, port=0): self.listen(port) self.name = self.name or self.address for k, v in self.services.items(): v.listen(0) self.service_ports[k] = v.port logger.info(' Start worker at: %20s:%d', self.ip, self.port) for k, v in self.service_ports.items(): logger.info(' %16s at: %20s:%d' % (k, self.ip, v)) logger.info('Waiting to connect to: %20s:%d', self.center.ip, self.center.port) while True: try: resp = yield self.center.register( ncores=self.ncores, address=(self.ip, self.port), keys=list(self.data), services=self.service_ports, name=self.name) break except (OSError, StreamClosedError): logger.debug("Unable to register with scheduler. Waiting") yield gen.sleep(0.5) if resp != 'OK': raise ValueError(resp) logger.info(' Registered to: %20s:%d', self.center.ip, self.center.port) self.status = 'running' def start(self, port=0): self.loop.add_callback(self._start, port) def identity(self, stream): return {'type': type(self).__name__, 'id': self.id, 'center': (self.center.ip, self.center.port)} @gen.coroutine def _close(self, report=True, timeout=10): if report: yield gen.with_timeout(timedelta(seconds=timeout), self.center.unregister(address=(self.ip, self.port)), io_loop=self.loop) self.center.close_streams() self.stop() self.executor.shutdown() if os.path.exists(self.local_dir): shutil.rmtree(self.local_dir) for k, v in self.services.items(): v.stop() self.status = 'closed' self.stop() @gen.coroutine def terminate(self, stream, report=True): yield self._close(report=report) raise Return('OK') @property def address(self): return '%s:%d' % (self.ip, self.port) @property def address_tuple(self): return (self.ip, self.port) @gen.coroutine def gather(self, stream=None, who_has=None): who_has = {k: [coerce_to_address(addr) for addr in v] for k, v in who_has.items() if k not in self.data} try: result = yield gather_from_workers(who_has) except KeyError as e: logger.warn("Could not find data", e) raise Return({'status': 'missing-data', 'keys': e.args}) else: self.data.update(result) raise Return({'status': 'OK'}) @gen.coroutine def _ready_task(self, function=None, key=None, args=(), kwargs={}, task=None, who_has=None): diagnostics = {} if who_has: local_data = {k: self.data[k] for k in who_has if k in self.data} who_has = {k: set(map(coerce_to_address, v)) for k, v in who_has.items() if k not in self.data} try: logger.info("gather %d keys from peers: %s", len(who_has), str(who_has)) diagnostics['transfer-start'] = time() other = yield gather_from_workers(who_has) diagnostics['transfer-stop'] = time() data = merge(local_data, other) except KeyError as e: logger.warn("Could not find data for %s", key) raise Return({'status': 'missing-data', 'keys': e.args, 'key': key}) else: data = {} transfer_time = 0 try: start = default_timer() if task is not None: task = loads(task) if function is not None: function = loads(function) if args: args = loads(args) if kwargs: kwargs = loads(kwargs) diagnostics['deserialization'] = default_timer() - start except Exception as e: logger.warn("Could not deserialize task", exc_info=True) raise Return(assoc(error_message(e), 'key', key)) if task is not None: assert not function and not args and not kwargs function = execute_task args = (task,) # Fill args with data args2 = pack_data(args, data) kwargs2 = pack_data(kwargs, data) raise Return({'status': 'OK', 'function': function, 'args': args2, 'kwargs': kwargs2, 'diagnostics': diagnostics, 'key': key}) @gen.coroutine def executor_submit(self, key, function, *args, **kwargs): """ Safely run function in thread pool executor We've run into issues running concurrent.future futures within tornado. Apparently it's advantageous to use timeouts and periodic callbacks to ensure things run smoothly. This can get tricky, so we pull it off into an separate method. """ token = yield self.thread_tokens.get() job_counter[0] += 1 i = job_counter[0] # logger.info("%s:%d Starts job %d, %s", self.ip, self.port, i, key) future = self.executor.submit(function, *args, **kwargs) pc = PeriodicCallback(lambda: logger.debug("future state: %s - %s", key, future._state), 1000); pc.start() try: if sys.version_info < (3, 2): yield future else: while not future.done() and future._state != 'FINISHED': try: yield gen.with_timeout(timedelta(seconds=1), future, io_loop=self.loop) break except gen.TimeoutError: logger.info("work queue size: %d", self.executor._work_queue.qsize()) logger.info("future state: %s", future._state) logger.info("Pending job %d: %s", i, future) finally: pc.stop() self.thread_tokens.put(token) result = future.result() logger.info("Finish job %d, %s", i, key) raise gen.Return(result) @gen.coroutine def compute_stream(self, stream): with log_errors(): logger.debug("Open compute stream") bstream = BatchedSend(interval=10, loop=self.loop) bstream.start(stream) @gen.coroutine def process(msg): try: result = yield self.compute(report=False, **msg) bstream.send(result) except Exception as e: logger.exception(e) bstream.send(assoc(error_message(e), 'key', msg.get('key'))) with log_errors(): while True: try: msgs = yield read(stream) except StreamClosedError: break if not isinstance(msgs, list): msgs = [msgs] for msg in msgs: op = msg.pop('op', None) if op == 'close': break elif op == 'compute-task': self.loop.add_callback(process, msg) else: logger.warning("Unknown operation %s, %s", op, msg) yield bstream.close() logger.info("Close compute stream") @gen.coroutine def compute(self, stream=None, function=None, key=None, args=(), kwargs={}, task=None, who_has=None, report=True): """ Execute function """ self.active.add(key) # Ready function for computation msg = yield self._ready_task(function=function, key=key, args=args, kwargs=kwargs, task=task, who_has=who_has) if msg['status'] != 'OK': try: self.active.remove(key) except KeyError: pass raise Return(msg) else: function = msg['function'] args = msg['args'] kwargs = msg['kwargs'] # Log and compute in separate thread result = yield self.executor_submit(key, apply_function, function, args, kwargs) result['key'] = key result.update(msg['diagnostics']) if result['status'] == 'OK': self.data[key] = result.pop('result') if report: response = yield self.center.add_keys(address=(self.ip, self.port), keys=[key]) if not response == 'OK': logger.warn('Could not report results to center: %s', str(response)) else: logger.warn(" Compute Failed\n" "Function: %s\n" "args: %s\n" "kwargs: %s\n", str(funcname(function))[:1000], str(args)[:1000], str(kwargs)[:1000], exc_info=True) logger.debug("Send compute response to scheduler: %s, %s", key, msg) try: self.active.remove(key) except KeyError: pass raise Return(result) @gen.coroutine def run(self, stream, function=None, args=(), kwargs={}): function = loads(function) if args: args = loads(args) if kwargs: kwargs = loads(kwargs) try: result = function(*args, **kwargs) except Exception as e: logger.warn(" Run Failed\n" "Function: %s\n" "args: %s\n" "kwargs: %s\n", str(funcname(function))[:1000], str(args)[:1000], str(kwargs)[:1000], exc_info=True) response = error_message(e) else: response = { 'status': 'OK', 'result': dumps(result), } raise Return(response) @gen.coroutine def update_data(self, stream, data=None, report=True): data = valmap(loads, data) self.data.update(data) if report: response = yield self.center.add_keys(address=(self.ip, self.port), keys=list(data)) assert response == 'OK' info = {'nbytes': {k: sizeof(v) for k, v in data.items()}, 'status': 'OK'} raise Return(info) @gen.coroutine def delete_data(self, stream, keys=None, report=True): for key in keys: if key in self.data: del self.data[key] logger.info("Deleted %d keys", len(keys)) if report: logger.debug("Reporting loss of keys to center") yield self.center.remove_keys(address=self.address, keys=list(keys)) raise Return('OK') def get_data(self, stream, keys=None): return {k: dumps(self.data[k]) for k in keys if k in self.data} def upload_file(self, stream, filename=None, data=None, load=True): out_filename = os.path.join(self.local_dir, filename) if isinstance(data, unicode): data = data.encode() with open(out_filename, 'wb') as f: f.write(data) f.flush() if load: try: name, ext = os.path.splitext(filename) if ext in ('.py', '.pyc'): logger.info("Reload module %s from .py file", name) name = name.split('-')[0] reload(import_module(name)) if ext == '.egg': sys.path.append(out_filename) pkgs = pkg_resources.find_distributions(out_filename) for pkg in pkgs: logger.info("Load module %s from egg", pkg.project_name) reload(import_module(pkg.project_name)) if not pkgs: logger.warning("Found no packages in egg file") except Exception as e: logger.exception(e) return {'status': 'error', 'exception': dumps(e)} return {'status': 'OK', 'nbytes': len(data)} def health(self, stream=None): """ Information about worker """ d = {'active': len(self.active), 'stored': len(self.data), 'time': time()} try: import psutil mem = psutil.virtual_memory() d.update({'cpu': psutil.cpu_percent(), 'memory': mem.total, 'memory-percent': mem.percent}) try: net_io = psutil.net_io_counters() d['network-send'] = net_io.bytes_sent - self._last_net_io.bytes_sent d['network-recv'] = net_io.bytes_recv - self._last_net_io.bytes_recv except AttributeError: pass self._last_net_io = net_io try: disk_io = psutil.disk_io_counters() d['disk-read'] = disk_io.read_bytes - self._last_disk_io.read_bytes d['disk-write'] = disk_io.write_bytes - self._last_disk_io.write_bytes except (AttributeError, RuntimeError): disk_io = None self._last_disk_io = disk_io except ImportError: pass return d
class Scheduler(object): def __init__(self, center, delete_batch_time=1, loop=None, resource_interval=1, resource_log_size=1000): self.scheduler_queues = [Queue()] self.report_queues = [] self.delete_queue = Queue() self.status = None self.coroutines = [] self.center = coerce_to_rpc(center) self.dask = dict() self.dependencies = dict() self.dependents = dict() self.generation = 0 self.has_what = defaultdict(set) self.held_data = set() self.in_play = set() self.keyorder = dict() self.nbytes = dict() self.ncores = dict() self.nannies = dict() self.processing = dict() self.restrictions = dict() self.stacks = dict() self.waiting = dict() self.waiting_data = dict() self.who_has = defaultdict(set) self.exceptions = dict() self.tracebacks = dict() self.exceptions_blame = dict() self.resource_logs = dict() self.loop = loop or IOLoop.current() self.delete_batch_time = delete_batch_time self.resource_interval = resource_interval self.resource_log_size = resource_log_size self.plugins = [] self.handlers = {'update-graph': self.update_graph, 'update-data': self.update_data, 'missing-data': self.mark_missing_data, 'task-missing-data': self.mark_missing_data, 'worker-failed': self.mark_worker_missing, 'release-held-data': self.release_held_data, 'restart': self._restart} def put(self, msg): return self.scheduler_queues[0].put_nowait(msg) @property def report_queue(self): return self.report_queues[0] @gen.coroutine def _sync_center(self): self.ncores, self.has_what, self.who_has, self.nannies = yield [ self.center.ncores(), self.center.has_what(), self.center.who_has(), self.center.nannies()] self._nanny_coroutines = [] for (ip, wport), nport in self.nannies.items(): if not nport: continue if (ip, nport) not in self.resource_logs: self.resource_logs[(ip, nport)] = deque(maxlen=self.resource_log_size) self._nanny_coroutines.append(self._nanny_listen(ip, nport)) def start(self, start_queues=True): collections = [self.dask, self.dependencies, self.dependents, self.waiting, self.waiting_data, self.in_play, self.keyorder, self.nbytes, self.processing, self.restrictions] for collection in collections: collection.clear() self.processing = {addr: set() for addr in self.ncores} self.stacks = {addr: list() for addr in self.ncores} self.worker_queues = {addr: Queue() for addr in self.ncores} with ignoring(AttributeError): self._delete_coroutine.cancel() with ignoring(AttributeError): for c in self._worker_coroutines: c.cancel() self._delete_coroutine = self.delete() self._worker_coroutines = [self.worker(w) for w in self.ncores] self.heal_state() if start_queues: self.handle_queues(self.scheduler_queues[0], None) for cor in self.coroutines: if cor.done(): raise cor.exception() return self._finished() @gen.coroutine def _finished(self): while any(not c.done() for c in self.coroutines): yield All(self.coroutines) @gen.coroutine def _close(self): yield self.cleanup() yield self._finished() yield self.center.close(close=True) self.center.close_streams() @gen.coroutine def cleanup(self): """ Clean up queues and coroutines, prepare to stop """ if self.status == 'closing': raise gen.Return() self.status = 'closing' logger.debug("Cleaning up coroutines") n = 0 self.delete_queue.put_nowait({'op': 'close'}); n += 1 for w, nc in self.ncores.items(): for i in range(nc): self.worker_queues[w].put_nowait({'op': 'close'}); n += 1 for s in self.scheduler_queues[1:]: s.put_nowait({'op': 'close-stream'}) for i in range(n): msg = yield self.scheduler_queues[0].get() for q in self.report_queues: q.put_nowait({'op': 'close'}) def mark_ready_to_run(self, key): """ Send task to an appropriate worker, trigger worker """ logger.debug("Mark %s ready to run", key) if key in self.waiting: assert not self.waiting[key] del self.waiting[key] new_worker = decide_worker(self.dependencies, self.stacks, self.who_has, self.restrictions, self.nbytes, key) self.stacks[new_worker].append(key) self.ensure_occupied(new_worker) def mark_key_in_memory(self, key, workers=None): """ Mark that key now lives in particular workers """ logger.debug("Mark %s in memory", key) if workers is None: workers = self.who_has[key] for worker in workers: self.who_has[key].add(worker) self.has_what[worker].add(key) with ignoring(KeyError): self.processing[worker].remove(key) for dep in sorted(self.dependents.get(key, []), key=self.keyorder.get, reverse=True): if dep in self.waiting: s = self.waiting[dep] with ignoring(KeyError): s.remove(key) if not s: # new task ready to run self.mark_ready_to_run(dep) for dep in self.dependencies.get(key, []): if dep in self.waiting_data: s = self.waiting_data[dep] with ignoring(KeyError): s.remove(key) if not s and dep: self.release_key(dep) self.report({'op': 'key-in-memory', 'key': key, 'workers': workers}) def ensure_occupied(self, worker): """ Spin up tasks on worker while it has tasks and free cores """ logger.debug('Ensure worker is occupied: %s', worker) while (self.stacks[worker] and self.ncores[worker] > len(self.processing[worker])): key = self.stacks[worker].pop() self.processing[worker].add(key) logger.debug("Send job to worker: %s, %s, %s", worker, key, self.dask[key]) self.worker_queues[worker].put_nowait( {'op': 'compute-task', 'key': key, 'task': self.dask[key], 'needed': self.dependencies[key]}) def seed_ready_tasks(self, keys=None): """ Distribute leaves among workers Takes an iterable of keys to consider for execution """ if keys is None: keys = self.dask new_stacks = assign_many_tasks( self.dependencies, self.waiting, self.keyorder, self.who_has, self.stacks, self.restrictions, self.nbytes, [k for k in keys if k in self.waiting and not self.waiting[k]]) logger.debug("Seed ready tasks: %s", new_stacks) for worker, stack in new_stacks.items(): if stack: self.ensure_occupied(worker) def release_key(self, key): """ Release key from distributed memory if its ready """ logger.debug("Release key %s", key) if key not in self.held_data and not self.waiting_data.get(key): self.delete_queue.put_nowait({'op': 'delete-task', 'key': key}) for w in self.who_has[key]: self.has_what[w].remove(key) del self.who_has[key] if key in self.waiting_data: del self.waiting_data[key] if key in self.in_play: self.in_play.remove(key) def update_data(self, who_has=None, nbytes=None): logger.debug("Update data %s", who_has) for key, workers in who_has.items(): self.mark_key_in_memory(key, workers) self.nbytes.update(nbytes) self.held_data.update(who_has) self.in_play.update(who_has) def mark_task_erred(self, key, worker, exception, traceback): """ Mark that a task has erred on a particular worker """ if key in self.processing[worker]: self.processing[worker].remove(key) self.exceptions[key] = exception self.tracebacks[key] = traceback self.mark_failed(key, key) self.ensure_occupied(worker) for plugin in self.plugins[:]: try: plugin.task_erred(self, key, worker, exception) except Exception as e: logger.exception(e) def mark_failed(self, key, failing_key=None): """ When a task fails mark it and all dependent task as failed """ logger.debug("Mark key as failed %s", key) if key in self.exceptions_blame: return self.exceptions_blame[key] = failing_key self.report({'op': 'task-erred', 'key': key, 'exception': self.exceptions[failing_key], 'traceback': self.tracebacks[failing_key]}) if key in self.waiting: del self.waiting[key] if key in self.waiting_data: del self.waiting_data[key] self.in_play.remove(key) for dep in self.dependents[key]: self.mark_failed(dep, failing_key) def mark_task_finished(self, key, worker, nbytes): """ Mark that a task has finished execution on a particular worker """ logger.debug("Mark task as finished %s, %s", key, worker) if key in self.processing[worker]: self.nbytes[key] = nbytes self.mark_key_in_memory(key, [worker]) self.ensure_occupied(worker) for plugin in self.plugins[:]: try: plugin.task_finished(self, key, worker, nbytes) except Exception as e: logger.exception(e) else: logger.debug("Key not found in processing, %s, %s, %s", key, worker, self.processing[worker]) def mark_missing_data(self, missing=None, key=None, worker=None): missing = set(missing) logger.debug("Recovering missing data: %s", missing) for k in missing: with ignoring(KeyError): workers = self.who_has.pop(k) for worker in workers: self.has_what[worker].remove(k) self.my_heal_missing_data(missing) if key and worker: with ignoring(KeyError): self.processing[worker].remove(key) self.waiting[key] = missing logger.info('task missing data, %s, %s', key, self.waiting) self.ensure_occupied(worker) self.seed_ready_tasks() def log_state(self, msg=''): logger.debug("Runtime State: %s", msg) logger.debug('\n\nwaiting: %s\n\nstacks: %s\n\nprocessing: %s\n\n' 'in_play: %s\n\n', self.waiting, self.stacks, self.processing, self.in_play) def mark_worker_missing(self, worker=None, heal=True): """ Mark that a worker no longer seems responsive """ logger.debug("Mark worker as missing %s", worker) if worker not in self.processing: return keys = self.has_what.pop(worker) for i in range(self.ncores[worker]): # send close message, in case not dead self.worker_queues[worker].put_nowait({'op': 'close', 'report': False}) del self.worker_queues[worker] del self.ncores[worker] del self.stacks[worker] del self.processing[worker] if not self.stacks: logger.critical("Lost all workers") missing_keys = set() for key in keys: self.who_has[key].remove(worker) if not self.who_has[key]: missing_keys.add(key) gone_data = {k for k, v in self.who_has.items() if not v} self.in_play.difference_update(missing_keys) for k in gone_data: del self.who_has[k] if heal: self.heal_state() def update_graph(self, dsk=None, keys=None, restrictions={}): update_state(self.dask, self.dependencies, self.dependents, self.held_data, self.who_has, self.in_play, self.waiting, self.waiting_data, dsk, keys) cover_aliases(self.dask, dsk) self.restrictions.update(restrictions) new_keyorder = order(dsk) # TODO: define order wrt old graph for key in new_keyorder: if key not in self.keyorder: # TODO: add test for this self.keyorder[key] = (self.generation, new_keyorder[key]) # prefer old if len(dsk) > 1: self.generation += 1 # older graph generations take precedence for key in dsk: for dep in self.dependencies[key]: if dep in self.exceptions_blame: self.mark_failed(key, self.exceptions_blame[dep]) self.seed_ready_tasks(dsk) for key in keys: if self.who_has[key]: self.mark_key_in_memory(key) for plugin in self.plugins[:]: try: plugin.update_graph(self, dsk, keys, restrictions) except Exception as e: logger.exception(e) def release_held_data(self, key=None): if key in self.held_data: logger.debug("Release key: %s", key) self.held_data.remove(key) self.release_key(key) def heal_state(self): """ Recover from catastrophic change """ logger.debug("Heal state") self.log_state("Before Heal") state = heal(self.dependencies, self.dependents, set(self.who_has), self.stacks, self.processing, self.waiting, self.waiting_data) released = state['released'] self.in_play.clear(); self.in_play.update(state['in_play']) add_keys = {k for k, v in self.waiting.items() if not v} for key in self.held_data & released: self.report({'op': 'lost-key', 'key': key}) if self.stacks: for key in add_keys: self.mark_ready_to_run(key) for key in set(self.who_has) & released - self.held_data: self.delete_queue.put_nowait({'op': 'delete-task', 'key': key}) self.in_play.update(self.who_has) self.log_state("After Heal") def my_heal_missing_data(self, missing): logger.debug("Heal from missing data") return heal_missing_data(self.dask, self.dependencies, self.dependents, self.held_data, self.who_has, self.in_play, self.waiting, self.waiting_data, missing) def report(self, msg): for q in self.report_queues: q.put_nowait(msg) def add_plugin(self, plugin): self.plugins.append(plugin) def handle_queues(self, scheduler_queue, report_queue): self.scheduler_queues.append(scheduler_queue) if report_queue: self.report_queues.append(report_queue) future = self.handle_scheduler(scheduler_queue, report_queue) self.coroutines.append(future) return future @gen.coroutine def handle_scheduler(self, queue, report): """ The scheduler coroutine for dask scheduling This coroutine manages interactions with all worker cores and with the delete coroutine through queues. Parameters ---------- scheduler_queue: tornado.queues.Queue Get information from outside report_queue: tornado.queues.Queue Report information to outside worker_queues: dict {worker: tornado.queues.Queue} One queue per worker node. Each queue is listened to by several worker_core coroutines. delete_queue: tornado.queues.Queue One queue listened to by ``delete`` which connects to the center to delete unnecessary intermediate data who_has: dict {key: set} Mapping key to {set of worker-identities} has_what: dict {worker: set} Mapping worker-identity to {set of keys} ncores: dict {worker: int} Mapping worker-identity to number-of-cores """ assert (not self.dask) == (not self.dependencies), (self.dask, self.dependencies) if not self.status == 'running': self.status = 'running' self.report({'op': 'start'}) if report: report.put_nowait({'op': 'stream-start'}) while True: msg = yield queue.get() logger.debug("scheduler receives message %s", msg) op = msg.pop('op') if op == 'close-stream': break elif op == 'close': self._close() elif op in self.handlers: result = self.handlers[op](**msg) if isinstance(result, gen.Future): yield result else: logger.warn("Bad message: op=%s, %s", op, msg) if op == 'close': break logger.debug('Finished scheduling coroutine') @gen.coroutine def worker(self, ident): """ Manage a single distributed worker node This coroutine manages one remote worker. It spins up several ``worker_core`` coroutines, one for each core. It reports a closed connection to scheduler if one occurs. """ try: yield All([self.worker_core(ident, i) for i in range(self.ncores[ident])]) except (IOError, OSError): logger.info("Worker failed from closed stream: %s", ident) self.put({'op': 'worker-failed', 'worker': ident}) @gen.coroutine def worker_core(self, ident, i): """ Manage one core on one distributed worker node This coroutine listens on worker_queue for the following operations **Incoming Messages**: - compute-task: call worker.compute(...) on remote node, report when done - close: close connection to worker node, report `worker-finished` to scheduler **Outgoing Messages**: - task-finished: sent to scheduler once a task completes - task-erred: sent to scheduler when a task errs - worker-finished: sent to scheduler in response to a close command """ worker = rpc(ip=ident[0], port=ident[1]) logger.debug("Start worker core %s, %d", ident, i) while True: msg = yield self.worker_queues[ident].get() if msg['op'] == 'close': logger.debug("Worker core receives close message %s, %s", ident, msg) break if msg['op'] == 'compute-task': key = msg['key'] needed = msg['needed'] task = msg['task'] if not istask(task): response, content = yield worker.update_data(data={key: task}) assert response == b'OK', response nbytes = content['nbytes'][key] else: response, content = yield worker.compute(function=execute_task, args=(task,), needed=needed, key=key, kwargs={}) if response == b'OK': nbytes = content['nbytes'] logger.debug("Compute response from worker %s, %s, %s, %s", ident, key, response, content) if response == b'error': error, traceback = content self.mark_task_erred(key, ident, error, traceback) elif response == b'missing-data': self.mark_missing_data(content.args, key=key, worker=ident) else: self.mark_task_finished(key, ident, nbytes) yield worker.close(close=True) worker.close_streams() if msg.get('report', True): self.put({'op': 'worker-finished', 'worker': ident}) logger.debug("Close worker core, %s, %d", ident, i) @gen.coroutine def delete(self): """ Delete extraneous intermediates from distributed memory This coroutine manages a connection to the center in order to send keys that should be removed from distributed memory. We batch several keys that come in over the ``delete_queue`` into a list. Roughly once a second we send this list of keys over to the center which then handles deleting these keys from workers' memory. worker \ /-> worker node worker -> scheduler -> delete -> center --> worker node worker / \-> worker node **Incoming Messages** - delete-task: holds a key to be deleted - close: close this coroutine """ batch = list() last = time() while True: msg = yield self.delete_queue.get() if msg['op'] == 'close': break # TODO: trigger coroutine to go off in a second if no activity batch.append(msg['key']) if batch and time() - last > self.delete_batch_time: # One second batching logger.debug("Ask center to delete %d keys", len(batch)) last = time() yield self.center.delete_data(keys=batch) batch = list() if batch: yield self.center.delete_data(keys=batch) self.put({'op': 'delete-finished'}) logger.debug('Delete finished') @gen.coroutine def _nanny_listen(self, ip, port): stream = yield connect(ip=ip, port=port) yield write(stream, {'op': 'monitor_resources', 'interval': self.resource_interval}) while not stream.closed(): msg = yield read(stream) self.resource_logs[(ip, port)].append(msg) @gen.coroutine def _scatter(self, stream, data=None, workers=None): if not self.ncores: raise ValueError("No workers yet found. " "Try syncing with center.\n" " e.sync_center()") ncores = workers if workers is not None else self.ncores remotes, who_has, nbytes = yield scatter_to_workers( self.center, ncores, data) self.update_data(who_has=who_has, nbytes=nbytes) raise gen.Return(remotes) @gen.coroutine def _restart(self): logger.debug("Send shutdown signal to workers") for q in self.scheduler_queues + self.report_queues: clear_queue(q) for addr in self.nannies: self.mark_worker_missing(worker=addr, heal=False) logger.debug("Send kill signal to nannies") nannies = [rpc(ip=ip, port=n_port) for (ip, w_port), n_port in self.nannies.items()] yield All([nanny.kill() for nanny in nannies]) while self.ncores: yield gen.sleep(0.01) # All quiet yield All([nanny.instantiate(close=True) for nanny in nannies]) yield self._sync_center() self.start() self.report({'op': 'restart'}) for plugin in self.plugins[:]: try: plugin.restart(self) except Exception as e: logger.exception(e)
class SQSSource(object): """Implementation of ISource that receives messages from a SQS queue. """ max_delete_delay = 5 def __init__(self, logger, loop, gate, sqs_client, metric_prefix='source'): self.gate = gate self.collector = sqs_client self.logger = logger self.loop = loop self.metric_prefix = metric_prefix self.end_of_input = Event() self.input_error = Event() self.state = RUNNING self._delete_queue = Queue() self._should_flush_queue = Event() self.sender_tag = 'sender:%s.%s' % (self.__class__.__module__, self.__class__.__name__) self.loop.spawn_callback(self.onInput) self.loop.spawn_callback(self._onDelete) @gen.coroutine def close(self, timeout=None): self.state = CLOSING self.logger.warning('Closing source') yield self._delete_queue.join(timeout) @gen.coroutine def _flush_delete_batch(self, batch_size): delete_batch = [ self._delete_queue.get_nowait() for pos in range(min(batch_size, self.collector.max_messages)) ] try: response = yield self.collector.delete_message_batch(*delete_batch) except SQSError as err: lmsg = 'Error encountered deleting processed messages in SQS: %s' self.logger.exception(lmsg, err) self.input_error.set() for msg in delete_batch: self._delete_queue.put_nowait(msg) else: if response.Failed: self.input_error.set() for req in response.Failed: self.logger.error('Message failed to delete: %s', req.Id) self._delete_queue.put_nowait(req) @gen.coroutine def _onDelete(self): respawn = True while respawn: try: qsize = self._delete_queue.qsize() # This will keep flushing until clear, # including items that show up in between flushes while qsize > 0: yield self._flush_delete_batch(qsize) qsize = self._delete_queue.qsize() self._should_flush_queue.clear() yield self._should_flush_queue.wait() except Exception as err: self.logger.exception(err) self.input_error.set() respawn = False @gen.coroutine def onInput(self): respawn = True retry_timeout = INITIAL_TIMEOUT # We use an algorithm similar to TCP window scaling, # so that we request fewer messages when we encounter # back pressure from our gate/drain and request more # when we flushed a complete batch window_size = self.collector.max_messages while respawn: try: response = yield self.collector.receive_message_batch( max_messages=window_size, ) if response.Messages: # We need to have low latency to delete messages # we've processed retry_timeout = INITIAL_TIMEOUT else: retry_timeout = min(retry_timeout * 2, MAX_TIMEOUT) yield gen.sleep(retry_timeout.total_seconds()) sent_full_batch = True for position, msg in enumerate(response.Messages): try: self.gate.put_nowait(msg) except QueueFull: self.logger.debug('Gate queue full; yielding') sent_full_batch = False # TODO: is it worth trying to batch and schedule # a flush at this point instead of many # single deletes? yield self.gate.put(msg) self._should_flush_queue.set() self._delete_queue.put_nowait(msg) statsd.increment('%s.queued' % self.metric_prefix, tags=[self.sender_tag]) # If we were able to flush the entire batch without waiting, # increase our window size to max_messages if sent_full_batch and \ window_size < self.collector.max_messages: window_size += 1 # Otherwise ask for less next time elif not sent_full_batch and window_size > 1: window_size -= 1 except Exception as err: self.logger.exception(err) self.input_error.set() respawn = False
class Debugger: # Requests that requires that the debugger has started started_debug_msg_types = [ "dumpCell", "setBreakpoints", "source", "stackTrace", "variables", "attach", "configurationDone", ] # Requests that can be handled even if the debugger is not running static_debug_msg_types = [ "debugInfo", "inspectVariables", "richInspectVariables", "modules" ] def __init__(self, log, debugpy_stream, event_callback, shell_socket, session, just_my_code=True): self.log = log self.debugpy_client = DebugpyClient(log, debugpy_stream, self._handle_event) self.shell_socket = shell_socket self.session = session self.is_started = False self.event_callback = event_callback self.just_my_code = just_my_code self.stopped_queue = Queue() self.started_debug_handlers = {} for msg_type in Debugger.started_debug_msg_types: self.started_debug_handlers[msg_type] = getattr(self, msg_type) self.static_debug_handlers = {} for msg_type in Debugger.static_debug_msg_types: self.static_debug_handlers[msg_type] = getattr(self, msg_type) self.breakpoint_list = {} self.stopped_threads = set() self.debugpy_initialized = False self._removed_cleanup = {} self.debugpy_host = "127.0.0.1" self.debugpy_port = 0 self.endpoint = None self.variable_explorer = VariableExplorer() def _handle_event(self, msg): if msg["event"] == "stopped": if msg["body"]["allThreadsStopped"]: self.stopped_queue.put_nowait(msg) # Do not forward the event now, will be done in the handle_stopped_event return else: self.stopped_threads.add(msg["body"]["threadId"]) self.event_callback(msg) elif msg["event"] == "continued": if msg["body"]["allThreadsContinued"]: self.stopped_threads = set() else: self.stopped_threads.remove(msg["body"]["threadId"]) self.event_callback(msg) else: self.event_callback(msg) async def _forward_message(self, msg): return await self.debugpy_client.send_dap_request(msg) def _build_variables_response(self, request, variables): var_list = [ var for var in variables if self.accept_variable(var["name"]) ] reply = { "seq": request["seq"], "type": "response", "request_seq": request["seq"], "success": True, "command": request["command"], "body": { "variables": var_list }, } return reply def _accept_stopped_thread(self, thread_name): # TODO: identify Thread-2, Thread-3 and Thread-4. These are NOT # Control, IOPub or Heartbeat threads forbid_list = [ "IPythonHistorySavingThread", "Thread-2", "Thread-3", "Thread-4" ] return thread_name not in forbid_list async def handle_stopped_event(self): # Wait for a stopped event message in the stopped queue # This message is used for triggering the 'threads' request event = await self.stopped_queue.get() req = { "seq": event["seq"] + 1, "type": "request", "command": "threads" } rep = await self._forward_message(req) for t in rep["body"]["threads"]: if self._accept_stopped_thread(t["name"]): self.stopped_threads.add(t["id"]) self.event_callback(event) @property def tcp_client(self): return self.debugpy_client def start(self): if not self.debugpy_initialized: tmp_dir = get_tmp_directory() if not os.path.exists(tmp_dir): os.makedirs(tmp_dir) host, port = self.debugpy_client.get_host_port() code = "import debugpy;" code += 'debugpy.listen(("' + host + '",' + port + "))" content = {"code": code, "silent": True} self.session.send( self.shell_socket, "execute_request", content, None, (self.shell_socket.getsockopt(ROUTING_ID)), ) ident, msg = self.session.recv(self.shell_socket, mode=0) self.debugpy_initialized = msg["content"]["status"] == "ok" # Don't remove leading empty lines when debugging so the breakpoints are correctly positioned cleanup_transforms = get_ipython( ).input_transformer_manager.cleanup_transforms if leading_empty_lines in cleanup_transforms: index = cleanup_transforms.index(leading_empty_lines) self._removed_cleanup[index] = cleanup_transforms.pop(index) self.debugpy_client.connect_tcp_socket() return self.debugpy_initialized def stop(self): self.debugpy_client.disconnect_tcp_socket() # Restore remove cleanup transformers cleanup_transforms = get_ipython( ).input_transformer_manager.cleanup_transforms for index in sorted(self._removed_cleanup): func = self._removed_cleanup.pop(index) cleanup_transforms.insert(index, func) async def dumpCell(self, message): code = message["arguments"]["code"] file_name = get_file_name(code) with open(file_name, "w", encoding="utf-8") as f: f.write(code) reply = { "type": "response", "request_seq": message["seq"], "success": True, "command": message["command"], "body": { "sourcePath": file_name }, } return reply async def setBreakpoints(self, message): source = message["arguments"]["source"]["path"] self.breakpoint_list[source] = message["arguments"]["breakpoints"] return await self._forward_message(message) async def source(self, message): reply = { "type": "response", "request_seq": message["seq"], "command": message["command"] } source_path = message["arguments"]["source"]["path"] if os.path.isfile(source_path): with open(source_path, encoding="utf-8") as f: reply["success"] = True reply["body"] = {"content": f.read()} else: reply["success"] = False reply["message"] = "source unavailable" reply["body"] = {} return reply async def stackTrace(self, message): reply = await self._forward_message(message) # The stackFrames array can have the following content: # { frames from the notebook} # ... # { 'id': xxx, 'name': '<module>', ... } <= this is the first frame of the code from the notebook # { frames from ipykernel } # ... # {'id': yyy, 'name': '<module>', ... } <= this is the first frame of ipykernel code # or only the frames from the notebook. # We want to remove all the frames from ipykernel when they are present. try: sf_list = reply["body"]["stackFrames"] module_idx = len(sf_list) - next( i for i, v in enumerate(reversed(sf_list), 1) if v["name"] == "<module>" and i != 1) reply["body"]["stackFrames"] = reply["body"][ "stackFrames"][:module_idx + 1] except StopIteration: pass return reply def accept_variable(self, variable_name): forbid_list = [ "__name__", "__doc__", "__package__", "__loader__", "__spec__", "__annotations__", "__builtins__", "__builtin__", "__display__", "get_ipython", "debugpy", "exit", "quit", "In", "Out", "_oh", "_dh", "_", "__", "___", ] cond = variable_name not in forbid_list cond = cond and not bool(re.search(r"^_\d", variable_name)) cond = cond and variable_name[0:2] != "_i" return cond async def variables(self, message): reply = {} if not self.stopped_threads: variables = self.variable_explorer.get_children_variables( message["arguments"]["variablesReference"]) return self._build_variables_response(message, variables) else: reply = await self._forward_message(message) # TODO : check start and count arguments work as expected in debugpy reply["body"]["variables"] = [ var for var in reply["body"]["variables"] if self.accept_variable(var["name"]) ] return reply async def attach(self, message): host, port = self.debugpy_client.get_host_port() message["arguments"]["connect"] = {"host": host, "port": port} message["arguments"]["logToFile"] = True # Experimental option to break in non-user code. # The ipykernel source is in the call stack, so the user # has to manipulate the step-over and step-into in a wize way. # Set debugOptions for breakpoints in python standard library source. if not self.just_my_code: message["arguments"]["debugOptions"] = ["DebugStdLib"] return await self._forward_message(message) async def configurationDone(self, message): reply = { "seq": message["seq"], "type": "response", "request_seq": message["seq"], "success": True, "command": message["command"], } return reply async def debugInfo(self, message): breakpoint_list = [] for key, value in self.breakpoint_list.items(): breakpoint_list.append({"source": key, "breakpoints": value}) reply = { "type": "response", "request_seq": message["seq"], "success": True, "command": message["command"], "body": { "isStarted": self.is_started, "hashMethod": "Murmur2", "hashSeed": get_tmp_hash_seed(), "tmpFilePrefix": get_tmp_directory() + os.sep, "tmpFileSuffix": ".py", "breakpoints": breakpoint_list, "stoppedThreads": list(self.stopped_threads), "richRendering": True, "exceptionPaths": ["Python Exceptions"], }, } return reply async def inspectVariables(self, message): self.variable_explorer.untrack_all() # looks like the implementation of untrack_all in ptvsd # destroys objects we nee din track. We have no choice but # reinstantiate the object self.variable_explorer = VariableExplorer() self.variable_explorer.track() variables = self.variable_explorer.get_children_variables() return self._build_variables_response(message, variables) async def richInspectVariables(self, message): reply = { "type": "response", "sequence_seq": message["seq"], "success": False, "command": message["command"], } var_name = message["arguments"]["variableName"] valid_name = str.isidentifier(var_name) if not valid_name: reply["body"] = {"data": {}, "metadata": {}} if var_name == "special variables" or var_name == "function variables": reply["success"] = True return reply repr_data = {} repr_metadata = {} if not self.stopped_threads: # The code did not hit a breakpoint, we use the intepreter # to get the rich representation of the variable result = get_ipython().user_expressions({var_name: var_name})[var_name] if result.get("status", "error") == "ok": repr_data = result.get("data", {}) repr_metadata = result.get("metadata", {}) else: # The code has stopped on a breakpoint, we use the setExpression # request to get the rich representation of the variable code = f"get_ipython().display_formatter.format({var_name})" frame_id = message["arguments"]["frameId"] seq = message["seq"] reply = await self._forward_message({ "type": "request", "command": "evaluate", "seq": seq + 1, "arguments": { "expression": code, "frameId": frame_id }, }) if reply["success"]: repr_data, repr_metadata = eval(reply["body"]["result"], {}, {}) body = { "data": repr_data, "metadata": {k: v for k, v in repr_metadata.items() if k in repr_data}, } reply["body"] = body reply["success"] = True return reply async def modules(self, message): modules = list(sys.modules.values()) startModule = message.get("startModule", 0) moduleCount = message.get("moduleCount", len(modules)) mods = [] for i in range(startModule, moduleCount): module = modules[i] filename = getattr(getattr(module, "__spec__", None), "origin", None) if filename and filename.endswith(".py"): mods.append({ "id": i, "name": module.__name__, "path": filename }) reply = {"body": {"modules": mods, "totalModules": len(modules)}} return reply async def process_request(self, message): reply = {} if message["command"] == "initialize": if self.is_started: self.log.info("The debugger has already started") else: self.is_started = self.start() if self.is_started: self.log.info("The debugger has started") else: reply = { "command": "initialize", "request_seq": message["seq"], "seq": 3, "success": False, "type": "response", } handler = self.static_debug_handlers.get(message["command"], None) if handler is not None: reply = await handler(message) elif self.is_started: handler = self.started_debug_handlers.get(message["command"], None) if handler is not None: reply = await handler(message) else: reply = await self._forward_message(message) if message["command"] == "disconnect": self.stop() self.breakpoint_list = {} self.stopped_threads = set() self.is_started = False self.log.info("The debugger has stopped") return reply
class Debugger: # Requests that requires that the debugger has started started_debug_msg_types = [ 'dumpCell', 'setBreakpoints', 'source', 'stackTrace', 'variables', 'attach', 'configurationDone' ] # Requests that can be handled even if the debugger is not running static_debug_msg_types = [ 'debugInfo', 'inspectVariables', 'richInspectVariables', 'modules' ] def __init__(self, log, debugpy_stream, event_callback, shell_socket, session): self.log = log self.debugpy_client = DebugpyClient(log, debugpy_stream, self._handle_event) self.shell_socket = shell_socket self.session = session self.is_started = False self.event_callback = event_callback self.stopped_queue = Queue() self.started_debug_handlers = {} for msg_type in Debugger.started_debug_msg_types: self.started_debug_handlers[msg_type] = getattr(self, msg_type) self.static_debug_handlers = {} for msg_type in Debugger.static_debug_msg_types: self.static_debug_handlers[msg_type] = getattr(self, msg_type) self.breakpoint_list = {} self.stopped_threads = set() self.debugpy_initialized = False self._removed_cleanup = {} self.debugpy_host = '127.0.0.1' self.debugpy_port = 0 self.endpoint = None self.variable_explorer = VariableExplorer() def _handle_event(self, msg): if msg['event'] == 'stopped': if msg['body']['allThreadsStopped']: self.stopped_queue.put_nowait(msg) # Do not forward the event now, will be done in the handle_stopped_event return else: self.stopped_threads.add(msg['body']['threadId']) self.event_callback(msg) elif msg['event'] == 'continued': if msg['body']['allThreadsContinued']: self.stopped_threads = set() else: self.stopped_threads.remove(msg['body']['threadId']) self.event_callback(msg) else: self.event_callback(msg) async def _forward_message(self, msg): return await self.debugpy_client.send_dap_request(msg) def _build_variables_response(self, request, variables): var_list = [ var for var in variables if self.accept_variable(var['name']) ] reply = { 'seq': request['seq'], 'type': 'response', 'request_seq': request['seq'], 'success': True, 'command': request['command'], 'body': { 'variables': var_list } } return reply def _accept_stopped_thread(self, thread_name): # TODO: identify Thread-2, Thread-3 and Thread-4. These are NOT # Control, IOPub or Heartbeat threads forbid_list = [ 'IPythonHistorySavingThread', 'Thread-2', 'Thread-3', 'Thread-4' ] return thread_name not in forbid_list async def handle_stopped_event(self): # Wait for a stopped event message in the stopped queue # This message is used for triggering the 'threads' request event = await self.stopped_queue.get() req = { 'seq': event['seq'] + 1, 'type': 'request', 'command': 'threads' } rep = await self._forward_message(req) for t in rep['body']['threads']: if self._accept_stopped_thread(t['name']): self.stopped_threads.add(t['id']) self.event_callback(event) @property def tcp_client(self): return self.debugpy_client def start(self): if not self.debugpy_initialized: tmp_dir = get_tmp_directory() if not os.path.exists(tmp_dir): os.makedirs(tmp_dir) host, port = self.debugpy_client.get_host_port() code = 'import debugpy;' code += 'debugpy.listen(("' + host + '",' + port + '))' content = {'code': code, 'silent': True} self.session.send(self.shell_socket, 'execute_request', content, None, (self.shell_socket.getsockopt(ROUTING_ID))) ident, msg = self.session.recv(self.shell_socket, mode=0) self.debugpy_initialized = msg['content']['status'] == 'ok' # Don't remove leading empty lines when debugging so the breakpoints are correctly positioned cleanup_transforms = get_ipython( ).input_transformer_manager.cleanup_transforms if leading_empty_lines in cleanup_transforms: index = cleanup_transforms.index(leading_empty_lines) self._removed_cleanup[index] = cleanup_transforms.pop(index) self.debugpy_client.connect_tcp_socket() return self.debugpy_initialized def stop(self): self.debugpy_client.disconnect_tcp_socket() # Restore remove cleanup transformers cleanup_transforms = get_ipython( ).input_transformer_manager.cleanup_transforms for index in sorted(self._removed_cleanup): func = self._removed_cleanup.pop(index) cleanup_transforms.insert(index, func) async def dumpCell(self, message): code = message['arguments']['code'] file_name = get_file_name(code) with open(file_name, 'w', encoding='utf-8') as f: f.write(code) reply = { 'type': 'response', 'request_seq': message['seq'], 'success': True, 'command': message['command'], 'body': { 'sourcePath': file_name } } return reply async def setBreakpoints(self, message): source = message["arguments"]["source"]["path"] self.breakpoint_list[source] = message["arguments"]["breakpoints"] return await self._forward_message(message) async def source(self, message): reply = { 'type': 'response', 'request_seq': message['seq'], 'command': message['command'] } source_path = message["arguments"]["source"]["path"] if os.path.isfile(source_path): with open(source_path, encoding='utf-8') as f: reply['success'] = True reply['body'] = {'content': f.read()} else: reply['success'] = False reply['message'] = 'source unavailable' reply['body'] = {} return reply async def stackTrace(self, message): reply = await self._forward_message(message) # The stackFrames array can have the following content: # { frames from the notebook} # ... # { 'id': xxx, 'name': '<module>', ... } <= this is the first frame of the code from the notebook # { frames from ipykernel } # ... # {'id': yyy, 'name': '<module>', ... } <= this is the first frame of ipykernel code # or only the frames from the notebook. # We want to remove all the frames from ipykernel when they are present. try: sf_list = reply["body"]["stackFrames"] module_idx = len(sf_list) - next( i for i, v in enumerate(reversed(sf_list), 1) if v["name"] == "<module>" and i != 1) reply["body"]["stackFrames"] = reply["body"][ "stackFrames"][:module_idx + 1] except StopIteration: pass return reply def accept_variable(self, variable_name): forbid_list = [ '__name__', '__doc__', '__package__', '__loader__', '__spec__', '__annotations__', '__builtins__', '__builtin__', '__display__', 'get_ipython', 'debugpy', 'exit', 'quit', 'In', 'Out', '_oh', '_dh', '_', '__', '___' ] cond = variable_name not in forbid_list cond = cond and not bool(re.search(r'^_\d', variable_name)) cond = cond and variable_name[0:2] != '_i' return cond async def variables(self, message): reply = {} if not self.stopped_threads: variables = self.variable_explorer.get_children_variables( message['arguments']['variablesReference']) return self._build_variables_response(message, variables) else: reply = await self._forward_message(message) # TODO : check start and count arguments work as expected in debugpy reply['body']['variables'] = \ [var for var in reply['body']['variables'] if self.accept_variable(var['name'])] return reply async def attach(self, message): host, port = self.debugpy_client.get_host_port() message['arguments']['connect'] = {'host': host, 'port': port} message['arguments']['logToFile'] = True # Reverts that option for now since it leads to spurious break of the code # in ipykernel source and resuming the execution leads to several errors # in the kernel. # Set debugOptions for breakpoints in python standard library source. # message['arguments']['debugOptions'] = [ 'DebugStdLib' ] return await self._forward_message(message) async def configurationDone(self, message): reply = { 'seq': message['seq'], 'type': 'response', 'request_seq': message['seq'], 'success': True, 'command': message['command'] } return reply async def debugInfo(self, message): breakpoint_list = [] for key, value in self.breakpoint_list.items(): breakpoint_list.append({'source': key, 'breakpoints': value}) reply = { 'type': 'response', 'request_seq': message['seq'], 'success': True, 'command': message['command'], 'body': { 'isStarted': self.is_started, 'hashMethod': 'Murmur2', 'hashSeed': get_tmp_hash_seed(), 'tmpFilePrefix': get_tmp_directory() + os.sep, 'tmpFileSuffix': '.py', 'breakpoints': breakpoint_list, 'stoppedThreads': list(self.stopped_threads), 'richRendering': True, 'exceptionPaths': ['Python Exceptions'] } } return reply async def inspectVariables(self, message): self.variable_explorer.untrack_all() # looks like the implementation of untrack_all in ptvsd # destroys objects we nee din track. We have no choice but # reinstantiate the object self.variable_explorer = VariableExplorer() self.variable_explorer.track() variables = self.variable_explorer.get_children_variables() return self._build_variables_response(message, variables) async def richInspectVariables(self, message): reply = { "type": "response", "sequence_seq": message["seq"], "success": False, "command": message["command"], } var_name = message["arguments"]["variableName"] valid_name = str.isidentifier(var_name) if not valid_name: reply["body"] = {"data": {}, "metadata": {}} if var_name == "special variables" or var_name == "function variables": reply["success"] = True return reply repr_data = {} repr_metadata = {} if not self.stopped_threads: # The code did not hit a breakpoint, we use the intepreter # to get the rich representation of the variable result = get_ipython().user_expressions({var_name: var_name})[var_name] if result.get("status", "error") == "ok": repr_data = result.get("data", {}) repr_metadata = result.get("metadata", {}) else: # The code has stopped on a breakpoint, we use the setExpression # request to get the rich representation of the variable code = f"get_ipython().display_formatter.format({var_name})" frame_id = message["arguments"]["frameId"] seq = message["seq"] reply = await self._forward_message({ "type": "request", "command": "evaluate", "seq": seq + 1, "arguments": { "expression": code, "frameId": frame_id }, }) if reply["success"]: repr_data, repr_metadata = eval(reply["body"]["result"], {}, {}) body = { "data": repr_data, "metadata": {k: v for k, v in repr_metadata.items() if k in repr_data}, } reply["body"] = body reply["success"] = True return reply async def modules(self, message): modules = list(sys.modules.values()) startModule = message.get('startModule', 0) moduleCount = message.get('moduleCount', len(modules)) mods = [] for i in range(startModule, moduleCount): module = modules[i] filename = getattr(getattr(module, '__spec__', None), 'origin', None) if filename and filename.endswith('.py'): mods.append({ 'id': i, 'name': module.__name__, 'path': filename }) reply = {'body': {'modules': mods, 'totalModules': len(modules)}} return reply async def process_request(self, message): reply = {} if message['command'] == 'initialize': if self.is_started: self.log.info('The debugger has already started') else: self.is_started = self.start() if self.is_started: self.log.info('The debugger has started') else: reply = { 'command': 'initialize', 'request_seq': message['seq'], 'seq': 3, 'success': False, 'type': 'response' } handler = self.static_debug_handlers.get(message['command'], None) if handler is not None: reply = await handler(message) elif self.is_started: handler = self.started_debug_handlers.get(message['command'], None) if handler is not None: reply = await handler(message) else: reply = await self._forward_message(message) if message['command'] == 'disconnect': self.stop() self.breakpoint_list = {} self.stopped_threads = set() self.is_started = False self.log.info('The debugger has stopped') return reply
class TornadoTransmission(): def __init__(self, max_concurrent_batches=10, block_on_send=False, block_on_response=False, max_batch_size=100, send_frequency=timedelta(seconds=0.25), user_agent_addition=''): if not has_tornado: raise ImportError( 'TornadoTransmission requires tornado, but it was not found.' ) self.block_on_send = block_on_send self.block_on_response = block_on_response self.max_batch_size = max_batch_size self.send_frequency = send_frequency user_agent = "libhoney-py/" + VERSION if user_agent_addition: user_agent += " " + user_agent_addition self.http_client = AsyncHTTPClient( force_instance=True, defaults=dict(user_agent=user_agent)) # libhoney adds events to the pending queue for us to send self.pending = Queue(maxsize=1000) # we hand back responses from the API on the responses queue self.responses = Queue(maxsize=2000) self.batch_data = {} self.sd = statsd.StatsClient(prefix="libhoney") self.batch_sem = Semaphore(max_concurrent_batches) def start(self): ioloop.IOLoop.current().spawn_callback(self._sender) def send(self, ev): '''send accepts an event and queues it to be sent''' self.sd.gauge("queue_length", self.pending.qsize()) try: if self.block_on_send: self.pending.put(ev) else: self.pending.put_nowait(ev) self.sd.incr("messages_queued") except QueueFull: response = { "status_code": 0, "duration": 0, "metadata": ev.metadata, "body": "", "error": "event dropped; queue overflow", } if self.block_on_response: self.responses.put(response) else: try: self.responses.put_nowait(response) except QueueFull: # if the response queue is full when trying to add an event # queue is full response, just skip it. pass self.sd.incr("queue_overflow") # We're using the older decorator/yield model for compatibility with # Python versions before 3.5. # See: http://www.tornadoweb.org/en/stable/guide/coroutines.html#python-3-5-async-and-await @gen.coroutine def _sender(self): '''_sender is the control loop that pulls events off the `self.pending` queue and submits batches for actual sending. ''' events = [] last_flush = time.time() while True: try: ev = yield self.pending.get(timeout=self.send_frequency) if ev is None: # signals shutdown yield self._flush(events) return events.append(ev) if (len(events) > self.max_batch_size or time.time() - last_flush > self.send_frequency.total_seconds()): yield self._flush(events) events = [] except TimeoutError: yield self._flush(events) events = [] last_flush = time.time() @gen.coroutine def _flush(self, events): if not events: return for dest, group in group_events_by_destination(events).items(): yield self._send_batch(dest, group) @gen.coroutine def _send_batch(self, destination, events): ''' Makes a single batch API request with the given list of events. The `destination` argument contains the write key, API host and dataset name used to build the request.''' start = time.time() status_code = 0 try: # enforce max_concurrent_batches yield self.batch_sem.acquire() url = urljoin(urljoin(destination.api_host, "/1/batch/"), destination.dataset) payload = [] for ev in events: event_time = ev.created_at.isoformat() if ev.created_at.tzinfo is None: event_time += "Z" payload.append({ "time": event_time, "samplerate": ev.sample_rate, "data": ev.fields() }) req = HTTPRequest( url, method='POST', headers={ "X-Honeycomb-Team": destination.writekey, "Content-Type": "application/json", }, body=json.dumps(payload, default=json_default_handler), ) self.http_client.fetch(req, self._response_callback) # store the events that were sent so we can process responses later # it is important that we delete these eventually, or we'll run into memory issues self.batch_data[req] = {"start": start, "events": events} except Exception as e: # Catch all exceptions and hand them to the responses queue. self._enqueue_errors(status_code, e, start, events) finally: self.batch_sem.release() def _enqueue_errors(self, status_code, error, start, events): for ev in events: self.sd.incr("send_errors") self._enqueue_response(status_code, "", error, start, ev.metadata) def _response_callback(self, resp): # resp.request should be the same HTTPRequest object built by _send_batch # and mapped to values in batch_data events = self.batch_data[resp.request]["events"] start = self.batch_data[resp.request]["start"] try: status_code = resp.code resp.rethrow() statuses = [d["status"] for d in json.loads(resp.body)] for ev, status in zip(events, statuses): self._enqueue_response(status, "", None, start, ev.metadata) self.sd.incr("messages_sent") except Exception as e: self._enqueue_errors(status_code, e, start, events) self.sd.incr("send_errors") finally: # clean up the data for this batch del self.batch_data[resp.request] def _enqueue_response(self, status_code, body, error, start, metadata): resp = { "status_code": status_code, "body": body, "error": error, "duration": (time.time() - start) * 1000, "metadata": metadata } if self.block_on_response: self.responses.put(resp) else: try: self.responses.put_nowait(resp) except QueueFull: pass def close(self): '''call close to send all in-flight requests and shut down the senders nicely. Times out after max 20 seconds per sending thread plus 10 seconds for the response queue''' try: self.pending.put(None, 10) except QueueFull: pass # signal to the responses queue that nothing more is coming. try: self.responses.put(None, 10) except QueueFull: pass def get_response_queue(self): ''' return the responses queue on to which will be sent the response objects from each event send''' return self.responses
class Rx(PrettyPrintable): def __init__(self, rx_tree, session_id, header_table=None, io_loop=None, service_name=None, raw_headers=None, trace_id=None): if header_table is None: header_table = CocaineHeaders() # If it's not the main thread # and a current IOloop doesn't exist here, # IOLoop.instance becomes self._io_loop self._io_loop = io_loop or IOLoop.current() self._queue = Queue() self._done = False self.session_id = session_id self.service_name = service_name self.rx_tree = rx_tree self.default_protocol = detect_protocol_type(rx_tree) self._headers = header_table self._current_headers = self._headers.merge(raw_headers) self.log = get_trace_adapter(log, trace_id) @coroutine def get(self, timeout=0, protocol=None): if self._done and self._queue.empty(): raise ChokeEvent() # to pull various service errors if timeout <= 0: item = yield self._queue.get() else: deadline = datetime.timedelta(seconds=timeout) item = yield self._queue.get(deadline) if isinstance(item, Exception): raise item if protocol is None: protocol = self.default_protocol name, payload, raw_headers = item self._current_headers = self._headers.merge(raw_headers) res = protocol(name, payload) if isinstance(res, ProtocolError): raise ServiceError(self.service_name, res.reason, res.code, res.category) else: raise Return(res) def done(self): self._done = True def push(self, msg_type, payload, raw_headers): dispatch = self.rx_tree.get(msg_type) self.log.debug("dispatch %s %.300s", dispatch, payload) if dispatch is None: raise InvalidMessageType(self.service_name, CocaineErrno.INVALIDMESSAGETYPE, "unexpected message type %s" % msg_type) name, rx = dispatch self.log.info( "got message from `%s`: channel id: %s, type: %s", self.service_name, self.session_id, name ) self._queue.put_nowait((name, payload, raw_headers)) if rx == {}: # the last transition self.done() elif rx is not None: # not a recursive transition self.rx_tree = rx def error(self, err): self._queue.put_nowait(err) def closed(self): return self._done def _format(self): return "name: %s, queue: %s, done: %s" % (self.service_name, self._queue, self._done) @property def headers(self): return self._current_headers
class Worker(Server): """ Worker Node Workers perform two functions: 1. **Serve data** from a local dictionary 2. **Perform computation** on that data and on data from peers Additionally workers keep a Center informed of their data and use that Center to gather data from other workers when necessary to perform a computation. You can start a worker with the ``dworker`` command line application:: $ dworker scheduler-ip:port **State** * **data:** ``{key: object}``: Dictionary mapping keys to actual values * **active:** ``{key}``: Set of keys currently under computation * **ncores:** ``int``: Number of cores used by this worker process * **executor:** ``concurrent.futures.ThreadPoolExecutor``: Executor used to perform computation * **local_dir:** ``path``: Path on local machine to store temporary files * **center:** ``rpc``: Location of center or scheduler. See ``.ip/.port`` attributes. * **name:** ``string``: Alias * **services:** ``{str: Server}``: Auxiliary web servers running on this worker * **service_ports:** ``{str: port}``: Examples -------- Create centers and workers in Python: >>> from distributed import Center, Worker >>> c = Center('192.168.0.100', 8787) # doctest: +SKIP >>> w = Worker(c.ip, c.port) # doctest: +SKIP >>> yield w._start(port=8788) # doctest: +SKIP Or use the command line:: $ dcenter Start center at 127.0.0.1:8787 $ dworker 127.0.0.1:8787 Start worker at: 127.0.0.1:8788 Registered with center at: 127.0.0.1:8787 See Also -------- distributed.center.Center: """ def __init__(self, center_ip, center_port, ip=None, ncores=None, loop=None, local_dir=None, services=None, service_ports=None, name=None, **kwargs): self.ip = ip or get_ip() self._port = 0 self.ncores = ncores or _ncores self.data = dict() self.loop = loop or IOLoop.current() self.status = None self.local_dir = local_dir or tempfile.mkdtemp(prefix='worker-') self.executor = ThreadPoolExecutor(self.ncores) self.thread_tokens = Queue( ) # https://github.com/tornadoweb/tornado/issues/1595#issuecomment-198551572 for i in range(self.ncores): self.thread_tokens.put_nowait(i) self.center = rpc(ip=center_ip, port=center_port) self.active = set() self.name = name if not os.path.exists(self.local_dir): os.mkdir(self.local_dir) if self.local_dir not in sys.path: sys.path.insert(0, self.local_dir) self.services = {} self.service_ports = service_ports or {} for k, v in (services or {}).items(): if isinstance(k, tuple): k, port = k else: port = 0 self.services[k] = v(self) self.services[k].listen(port) self.service_ports[k] = self.services[k].port handlers = { 'compute': self.compute, 'gather': self.gather, 'compute-stream': self.compute_stream, 'run': self.run, 'get_data': self.get_data, 'update_data': self.update_data, 'delete_data': self.delete_data, 'terminate': self.terminate, 'ping': pingpong, 'health': self.health, 'upload_file': self.upload_file } super(Worker, self).__init__(handlers, **kwargs) @gen.coroutine def _start(self, port=0): self.listen(port) self.name = self.name or self.address for k, v in self.services.items(): v.listen(0) self.service_ports[k] = v.port logger.info(' Start worker at: %20s:%d', self.ip, self.port) for k, v in self.service_ports.items(): logger.info(' %16s at: %20s:%d' % (k, self.ip, v)) logger.info('Waiting to connect to: %20s:%d', self.center.ip, self.center.port) while True: try: resp = yield self.center.register(ncores=self.ncores, address=(self.ip, self.port), keys=list(self.data), services=self.service_ports, name=self.name) break except (OSError, StreamClosedError): logger.debug("Unable to register with scheduler. Waiting") yield gen.sleep(0.5) if resp != 'OK': raise ValueError(resp) logger.info(' Registered to: %20s:%d', self.center.ip, self.center.port) self.status = 'running' def start(self, port=0): self.loop.add_callback(self._start, port) def identity(self, stream): return { 'type': type(self).__name__, 'id': self.id, 'center': (self.center.ip, self.center.port) } @gen.coroutine def _close(self, report=True, timeout=10): if report: yield gen.with_timeout(timedelta(seconds=timeout), self.center.unregister(address=(self.ip, self.port)), io_loop=self.loop) self.center.close_streams() self.stop() self.executor.shutdown() if os.path.exists(self.local_dir): shutil.rmtree(self.local_dir) for k, v in self.services.items(): v.stop() self.status = 'closed' self.stop() @gen.coroutine def terminate(self, stream, report=True): yield self._close(report=report) raise Return('OK') @property def address(self): return '%s:%d' % (self.ip, self.port) @property def address_tuple(self): return (self.ip, self.port) @gen.coroutine def gather(self, stream=None, who_has=None): who_has = { k: [coerce_to_address(addr) for addr in v] for k, v in who_has.items() if k not in self.data } try: result = yield gather_from_workers(who_has) except KeyError as e: logger.warn("Could not find data", e) raise Return({'status': 'missing-data', 'keys': e.args}) else: self.data.update(result) raise Return({'status': 'OK'}) @gen.coroutine def _ready_task(self, function=None, key=None, args=(), kwargs={}, task=None, who_has=None): who_has = who_has or {} diagnostics = {} data = {k: self.data[k] for k in who_has if k in self.data} who_has = { k: set(map(coerce_to_address, v)) for k, v in who_has.items() if k not in self.data } if who_has: try: logger.info("gather %d keys from peers: %s", len(who_has), str(who_has)) diagnostics['transfer-start'] = time() other = yield gather_from_workers(who_has) diagnostics['transfer-stop'] = time() self.data.update(other) yield self.center.add_keys(address=self.address, keys=list(other)) data.update(other) except KeyError as e: logger.warn("Could not find data for %s", key) raise Return({ 'status': 'missing-data', 'keys': e.args, 'key': key }) else: transfer_time = 0 try: start = default_timer() if task is not None: task = loads(task) if function is not None: function = loads(function) if args: args = loads(args) if kwargs: kwargs = loads(kwargs) diagnostics['deserialization'] = default_timer() - start except Exception as e: logger.warn("Could not deserialize task", exc_info=True) raise Return(assoc(error_message(e), 'key', key)) if task is not None: assert not function and not args and not kwargs function = execute_task args = (task, ) # Fill args with data args2 = pack_data(args, data) kwargs2 = pack_data(kwargs, data) raise Return({ 'status': 'OK', 'function': function, 'args': args2, 'kwargs': kwargs2, 'diagnostics': diagnostics, 'key': key }) @gen.coroutine def executor_submit(self, key, function, *args, **kwargs): """ Safely run function in thread pool executor We've run into issues running concurrent.future futures within tornado. Apparently it's advantageous to use timeouts and periodic callbacks to ensure things run smoothly. This can get tricky, so we pull it off into an separate method. """ token = yield self.thread_tokens.get() job_counter[0] += 1 i = job_counter[0] # logger.info("%s:%d Starts job %d, %s", self.ip, self.port, i, key) future = self.executor.submit(function, *args, **kwargs) pc = PeriodicCallback( lambda: logger.debug("future state: %s - %s", key, future._state), 1000) pc.start() try: if sys.version_info < (3, 2): yield future else: while not future.done() and future._state != 'FINISHED': try: yield gen.with_timeout(timedelta(seconds=1), future, io_loop=self.loop) break except gen.TimeoutError: logger.info("work queue size: %d", self.executor._work_queue.qsize()) logger.info("future state: %s", future._state) logger.info("Pending job %d: %s", i, future) finally: pc.stop() self.thread_tokens.put(token) result = future.result() logger.info("Finish job %d, %s", i, key) raise gen.Return(result) @gen.coroutine def compute_stream(self, stream): with log_errors(): logger.debug("Open compute stream") bstream = BatchedSend(interval=10, loop=self.loop) bstream.start(stream) @gen.coroutine def process(msg): try: result = yield self.compute(report=False, **msg) bstream.send(result) except Exception as e: logger.exception(e) bstream.send(assoc(error_message(e), 'key', msg.get('key'))) with log_errors(): while True: try: msgs = yield read(stream) except StreamClosedError: break if not isinstance(msgs, list): msgs = [msgs] for msg in msgs: op = msg.pop('op', None) if op == 'close': break elif op == 'compute-task': self.loop.add_callback(process, msg) else: logger.warning("Unknown operation %s, %s", op, msg) yield bstream.close() logger.info("Close compute stream") @gen.coroutine def compute(self, stream=None, function=None, key=None, args=(), kwargs={}, task=None, who_has=None, report=True): """ Execute function """ self.active.add(key) # Ready function for computation msg = yield self._ready_task(function=function, key=key, args=args, kwargs=kwargs, task=task, who_has=who_has) if msg['status'] != 'OK': try: self.active.remove(key) except KeyError: pass raise Return(msg) else: function = msg['function'] args = msg['args'] kwargs = msg['kwargs'] # Log and compute in separate thread result = yield self.executor_submit(key, apply_function, function, args, kwargs) result['key'] = key result.update(msg['diagnostics']) if result['status'] == 'OK': self.data[key] = result.pop('result') if report: response = yield self.center.add_keys(address=(self.ip, self.port), keys=[key]) if not response == 'OK': logger.warn('Could not report results to center: %s', str(response)) else: logger.warn( " Compute Failed\n" "Function: %s\n" "args: %s\n" "kwargs: %s\n", str(funcname(function))[:1000], str(args)[:1000], str(kwargs)[:1000], exc_info=True) logger.debug("Send compute response to scheduler: %s, %s", key, msg) try: self.active.remove(key) except KeyError: pass raise Return(result) @gen.coroutine def run(self, stream, function=None, args=(), kwargs={}): function = loads(function) if args: args = loads(args) if kwargs: kwargs = loads(kwargs) try: result = function(*args, **kwargs) except Exception as e: logger.warn( " Run Failed\n" "Function: %s\n" "args: %s\n" "kwargs: %s\n", str(funcname(function))[:1000], str(args)[:1000], str(kwargs)[:1000], exc_info=True) response = error_message(e) else: response = { 'status': 'OK', 'result': dumps(result), } raise Return(response) @gen.coroutine def update_data(self, stream, data=None, report=True): data = valmap(loads, data) self.data.update(data) if report: response = yield self.center.add_keys(address=(self.ip, self.port), keys=list(data)) assert response == 'OK' info = { 'nbytes': {k: sizeof(v) for k, v in data.items()}, 'status': 'OK' } raise Return(info) @gen.coroutine def delete_data(self, stream, keys=None, report=True): for key in keys: if key in self.data: del self.data[key] logger.info("Deleted %d keys", len(keys)) if report: logger.debug("Reporting loss of keys to center") yield self.center.remove_keys(address=self.address, keys=list(keys)) raise Return('OK') def get_data(self, stream, keys=None): return {k: dumps(self.data[k]) for k in keys if k in self.data} def upload_file(self, stream, filename=None, data=None, load=True): out_filename = os.path.join(self.local_dir, filename) if isinstance(data, unicode): data = data.encode() with open(out_filename, 'wb') as f: f.write(data) f.flush() if load: try: name, ext = os.path.splitext(filename) if ext in ('.py', '.pyc'): logger.info("Reload module %s from .py file", name) name = name.split('-')[0] reload(import_module(name)) if ext == '.egg': sys.path.append(out_filename) pkgs = pkg_resources.find_distributions(out_filename) for pkg in pkgs: logger.info("Load module %s from egg", pkg.project_name) reload(import_module(pkg.project_name)) if not pkgs: logger.warning("Found no packages in egg file") except Exception as e: logger.exception(e) return {'status': 'error', 'exception': dumps(e)} return {'status': 'OK', 'nbytes': len(data)} def health(self, stream=None): """ Information about worker """ d = { 'active': len(self.active), 'stored': len(self.data), 'time': time() } try: import psutil mem = psutil.virtual_memory() d.update({ 'cpu': psutil.cpu_percent(), 'memory': mem.total, 'memory-percent': mem.percent }) try: net_io = psutil.net_io_counters() d['network-send'] = net_io.bytes_sent - self._last_net_io.bytes_sent d['network-recv'] = net_io.bytes_recv - self._last_net_io.bytes_recv except AttributeError: pass self._last_net_io = net_io try: disk_io = psutil.disk_io_counters() d['disk-read'] = disk_io.read_bytes - self._last_disk_io.read_bytes d['disk-write'] = disk_io.write_bytes - self._last_disk_io.write_bytes except AttributeError: pass self._last_disk_io = disk_io except ImportError: pass return d
class Scheduler(): """ (public) Scheduler class. """ __slots__ = [ 'unit', 'current_time', 'callback_process', 'data_in_queue', 'data_in_heap', 'tasks_queue', 'lock' ] def __init__(self, unit_in_seconds, callback_process): """ Initialize a scheduler. :param unit_in_seconds: number of seconds to wait for each step. :param callback_process: callback to call on every task. Signature: task_callback(task.data) -> bool If callback return True, task is considered done and is removed from scheduler. Otherwise, task is rescheduled for another delay. """ assert isinstance(unit_in_seconds, int) and unit_in_seconds > 0 assert callable(callback_process) self.unit = unit_in_seconds self.current_time = 0 self.callback_process = callback_process self.data_in_heap = PriorityDict() # data => Deadline self.data_in_queue = { } # type: dict{object, _Task} # data => associated Task in queue self.tasks_queue = Queue() # Lock to modify this object safely inside one Tornado thread: # http://www.tornadoweb.org/en/stable/locks.html self.lock = Lock() def _enqueue(self, task): """ Put a task in queue of tasks to process now. """ self.data_in_queue[task.data] = task self.tasks_queue.put_nowait(task) @gen.coroutine def has_data(self, data): """ Return True if given data is associated to any task. """ with (yield self.lock.acquire()): return data in self.data_in_heap or data in self.data_in_queue @gen.coroutine def get_info(self, data): """ Return info about scheduling for given data, or None if data is not found. """ with (yield self.lock.acquire()): deadline = None # type: _Deadline if data in self.data_in_heap: deadline = self.data_in_heap[data] if data in self.data_in_queue: deadline = self.data_in_queue[data].deadline if deadline: return SchedulerEvent(time_unit=self.unit, time_added=deadline.start_time, delay=deadline.delay, current_time=self.current_time) return None @gen.coroutine def add_data(self, data, nb_units_to_wait): """ Add data with a non-null deadline. For null deadlines, use no_wait(). :param data: data to add :param nb_units_to_wait: time to wait (in number of units) """ if not isinstance(nb_units_to_wait, int) or nb_units_to_wait <= 0: raise exceptions.NaturalIntegerNotNullException() with (yield self.lock.acquire()): if data in self.data_in_heap or data in self.data_in_queue: raise exceptions.AlreadyScheduledException() # Add task to scheduler. self.data_in_heap[data] = _Deadline(self.current_time, nb_units_to_wait) @gen.coroutine def no_wait(self, data, nb_units_to_wait, processing_validator): """ Add a data to be processed the sooner. :param data: data to add :param nb_units_to_wait: time to wait (in number of units) for data tasks after first task is executed. If null (0), data is processed once (first time) and then dropped. :param processing_validator: validator used to check if data can still be processed for the first time. See documentation of class _ImmediateTask for more details. """ if not isinstance(nb_units_to_wait, int) or nb_units_to_wait < 0: raise exceptions.NaturalIntegerException() with (yield self.lock.acquire()): if data in self.data_in_heap: # Move data from heap to queue with new delay. del self.data_in_heap[data] self._enqueue( _ImmediateTask(data, nb_units_to_wait, processing_validator)) elif data in self.data_in_queue: # Change delay for future scheduling. self.data_in_queue[data].update_delay(nb_units_to_wait) else: # Add data to queue. self._enqueue( _ImmediateTask(data, nb_units_to_wait, processing_validator)) @gen.coroutine def remove_data(self, data): """ Remove a data (and all associated tasks) from scheduler. """ with (yield self.lock.acquire()): if data in self.data_in_heap: del self.data_in_heap[data] elif data in self.data_in_queue: # Remove task from data_in_queue and invalidate it in queue. self.data_in_queue.pop(data).valid = False @gen.coroutine def _step(self): """ Compute a step (check and enqueue tasks to run now) in scheduler. """ with (yield self.lock.acquire()): self.current_time += 1 while self.data_in_heap: deadline, data = self.data_in_heap.smallest() if deadline.deadline > self.current_time: break del self.data_in_heap[data] self._enqueue(_Task(data, deadline)) @gen.coroutine def schedule(self): """ Main scheduler method (callback to register in ioloop). Wait for unit seconds and run tasks after each wait time. """ while True: yield gen.sleep(self.unit) yield self._step() @gen.coroutine def process_tasks(self): """ Main task processing method (callback to register in ioloop). Consume and process tasks in queue and reschedule processed tasks when relevant. A task is processed if associated data was not removed from scheduler. A task is rescheduler if processing callback returns False (True meaning `task definitively done`) AND if task deadline is not null. """ while True: task = yield self.tasks_queue.get() # type: _Task try: if task.valid and (not isinstance(task, _ImmediateTask) or task.can_still_process()): if gen.is_coroutine_function(self.callback_process): remove_data = yield self.callback_process(task.data) else: remove_data = self.callback_process(task.data) remove_data = remove_data or not task.deadline.delay with (yield self.lock.acquire()): del self.data_in_queue[task.data] if not remove_data: self.data_in_heap[task.data] = _Deadline( self.current_time, task.deadline.delay) finally: self.tasks_queue.task_done()
async def peer_worker(self, peers: Set[str], peers_to_check: Queue, peers_to_check_set: Set[str], attempted_contact: Set[str], results: List[bool]): client = AsyncHTTPClient() async for peer in peers_to_check: if peer is None: # Exit signal return if (peer in self.last_errored and datetime.now().timestamp() - self.last_errored[peer] < LAST_ERRORED_CACHE_TIME): # Avoid repetitively hitting dead nodes print( f"[{SERVICE_NAME} {datetime.now()}] Skipping dead peer {peer}", flush=True) peers_to_check_set.remove(peer) peers_to_check.task_done() continue if peer in attempted_contact: peers_to_check_set.remove(peer) peers_to_check.task_done() continue if peer in self.contacting: print( f"[{SERVICE_NAME} {datetime.now()}] Avoiding race on peer {peer}", flush=True) # TODO: Do we call task_done() here? continue self.contacting.add(peer) print(f"[{SERVICE_NAME} {datetime.now()}] Contacting peer {peer}", flush=True) peer_peers: List[str] = [] try: # TODO: Combine requests? # Notify peer of current node's existence, OIDC realm, and peer list await peer_fetch( client=client, peer=peer, path_fragment= "api/federation/peers", # TODO: This should probably be parametrized request_body=json.dumps({ "peers": list(peers), "self": CHORD_URL, "oidc_discovery_uri": OIDC_DISCOVERY_URI, })) # Fetch the peer's peer list r = await peer_fetch(client=client, peer=peer, path_fragment="api/federation/peers", method="GET") # If a non-200 response is encountered, an error is raised self.connected_to_peer_network = True peer_peers = r["peers"] except IndexError: print( f"[{SERVICE_NAME} {datetime.now()}] [ERROR] Invalid 200 response returned by {peer}.", flush=True, file=sys.stderr) except (HTTPError, ValueError) as e: # HTTPError: Standard 400s/500s # ValueError: ex. Unsupported url scheme: api/federation/peers now = datetime.now() print( f"[{SERVICE_NAME} {now}] [ERROR] Peer contact error for {peer} ({str(e)})", flush=True, file=sys.stderr) self.last_errored[peer] = now.timestamp() # Incorporate the peer's peer list into the current set of peers peers = peers.union(peer_peers) # Search for new peers, and if they exist add them to the queue containing peers to verify new_peer = False for p in peer_peers: if p not in peers_to_check_set and p not in self.contacting and p not in attempted_contact: new_peer = True peers_to_check.put_nowait(p) peers_to_check_set.add(p) results.append(new_peer) attempted_contact.add(peer) self.contacting.remove(peer) peers_to_check_set.remove(peer) peers_to_check.task_done()
class Application(object): def __init__(self, routes, node, pipe): """ Application instantiates and registers handlers for each message type, and routes messages to the pre-instantiated instances of each message handler :param routes: list of tuples in the form of (<message type str>, <MessageHandler class>) :param node: Node instance of the local node :param pipe: Instance of multiprocessing.Pipe for communicating with the parent process """ # We don't really have to worry about synchronization # so long as we're careful about explicit context switching self.nodes = {node.node_id: node} self.local_node = node self.handlers = {} self.tcpclient = TCPClient() self.gossip_inbox = Queue() self.gossip_outbox = Queue() self.sequence_number = 0 if routes: self.add_handlers(routes) self.pipe = pipe self.ioloop = IOLoop.current() self.add_node_event = Event() def next_sequence_number(self): self.sequence_number += 1 return self.sequence_number @coroutine def ping_random_node(self): node = yield self.get_random_node() LOGGER.debug('{} pinging random node: {}'.format( self.local_node.node_id, node.node_id)) try: yield self.ping(node) except TimeoutError: self.mark_suspect(node) @coroutine def add_node(self, node): if node.node_id not in self.nodes: LOGGER.debug('Adding node {} to {}'.format(node, self.nodes)) self.add_node_event.set() self.nodes[node.node_id] = node LOGGER.debug('Added node {} to {}'.format(node, self.nodes)) @coroutine def remove_node(self, node): if node.node_id in self.nodes: del self.nodes[node.node_id] other_nodes = yield self.get_other_nodes if not other_nodes: self.add_node_event.clear() def add_handlers(self, handlers): for message_type, handler_cls in handlers: assert message_type in MESSAGE_TYPES, ( 'Message type {!r} not found in MESSAGE TYPES {}'.format( message_type, MESSAGE_TYPES.keys())) self.handlers[message_type] = handler_cls(self) def route_stream_message(self, stream, message_type, message): LOGGER.debug('{!r} received {} message from {!r}'.format( self, message_type, stream)) message_cls = MESSAGE_TYPES[message_type] message_obj = message_cls(**message) handler = self.handlers[message_type] LOGGER.debug('Routing {} to {}'.format(message_type, handler)) handler(stream, message_obj) @coroutine def send_message(self, stream, message): LOGGER.debug('Sending message {!r} to {}'.format( message.MESSAGE_TYPE, stream)) try: yield stream.write(message.to_msgpack) except StreamClosedError: LOGGER.warn('Unable to send {} to {} - stream closed'.format( message.MESSAGE_TYPE, stream)) @coroutine def _get_next_message(self, stream): # get the next message from the stream unpacker = msgpack.Unpacker() try: wire_bytes = yield with_timeout( datetime.timedelta(seconds=PING_TIMEOUT), stream.read_bytes(4096, partial=True)) except StreamClosedError: LOGGER.warn( 'Unable to get next message from {} - stream closed'.format( stream)) else: unpacker.feed(wire_bytes) LOGGER.debug('Deserializing object from stream {}'.format(stream)) message = unpacker.next() message.pop('type') raise Return(message) @coroutine def ping(self, node): """ Ping a node :param node: Instance of Node to ping :returns: Boolean, True if successful/False if fail """ host = node.addr port = node.port LOGGER.debug('pinging {}:{}'.format(host, port)) ping = Ping(seqno=self.next_sequence_number(), node=node, sender=self.local_node) # Connect to the node try: stream = yield self.tcpclient.connect(host, port) except StreamClosedError: LOGGER.error( 'Unable to connect from {} to {} (pinging host)'.format( self.local_node.node_id, node.node_id)) raise Return(False) try: # Send the ping LOGGER.debug('Sending {!r} to {!r}'.format(ping.MESSAGE_TYPE, node)) yield self.send_message(stream, ping) # Wait for an ACK message in response LOGGER.debug('Getting next message from {}:{}'.format(host, port)) message = yield self._get_next_message(stream) if message is None: raise Return(False) ack = Ack(**message) LOGGER.debug('Received {!r} from {!r} (response to {!r})'.format( ack.MESSAGE_TYPE, node.node_id, ping.MESSAGE_TYPE)) # Check that the ACK sequence number matches the PING sequence number if ack.seqno == ping.seqno: LOGGER.debug( 'Sequence number matches. Node {} looks good to !'.format( node.node_id, self.local_node.node_id)) # Process the gossip messages tacked onto the ACK message's payload for message in ack.payload: try: self.gossip_inbox.put_nowait(message) except QueueFull: LOGGER.error( 'Unable to add {} message from {} to gossip inbox'. format(message.MESSAGE_TYPE, node.node_id)) # mark the node as ALIVE in self.nodes self.mark_alive(node) # Send gossip that this node is alive self.queue_gossip_send(Alive(node=node, sender=self.local_node)) raise Return(True) else: raise Return(False) finally: stream.close() @coroutine def ack(self, stream, seqno): payload = [] for _ in xrange(ACK_PAYLOAD_SIZE): try: gossip = self.gossip_outbox.get_nowait() payload.append(gossip) except QueueEmpty: break ack = Ack(seqno=seqno, payload=payload) LOGGER.debug('Trying to send ack: {}'.format(ack)) try: yield stream.write(ack.to_msgpack) except StreamClosedError: LOGGER.error( 'Unable to connect from {} to stream (acking PING)'.format( self.local_node.node_id)) LOGGER.debug('Sent ack to {}'.format(stream)) @coroutine def _change_node_state(self, node, state): """ Because Tornado has explicit context switching, we don't need to worry much about synchronization here """ LOGGER.debug('{} knows about {}: {}'.format(self.local_node.node_id, node.node_id, state)) self.add_node(node) self.nodes[node.node_id].state = state @coroutine def mark_alive(self, node): if node.node_id != self.local_node.node_id: LOGGER.debug('Marking {} ALIVE'.format(node.node_id)) self._change_node_state(node, State.ALIVE) @coroutine def mark_dead(self, node): self._change_node_state(node, State.DEAD) @coroutine def mark_suspect(self, node): self._change_node_state(node, State.SUSPECT) @coroutine def ingest_gossip_inbox(self): while True: LOGGER.debug('checking inbox') message = yield self.gossip_inbox.get() LOGGER.debug('Received message {} from gossip inbox'.format( message.MESSAGE_TYPE)) if message.MESSAGE_TYPE == Alive.MESSAGE_TYPE: self.mark_alive(message.sender) self.mark_alive(message.node) self.queue_gossip_send(message) elif message.MESSAGE_TYPE == Suspect.MESSAGE_TYPE: self.mark_alive(message.sender) self.mark_suspect(message.node) self.queue_gossip_send(message) elif message.MESSAGE_TYPE == Dead.MESSAGE_TYPE: self.mark_alive(message.sender) self.mark_dead(message.node) self.queue_gossip_send(message) @coroutine def queue_gossip_send(self, message): """ If the message is gossipable, add it to the outbox """ try: next_incarnation = message.next_incarnation next_incarnation.sender = self.local_node except message.MaxIncarnationsReached: LOGGER.debug( 'Max incarnations reached for {}! No gossip 4 u'.format( message.MESSAGE_TYPE)) else: LOGGER.debug('Enqueuing {} gossips for {}'.format( GOSSIP_PEERS, message)) for _ in xrange(GOSSIP_PEERS): yield self.gossip_outbox.put(next_incarnation) @coroutine def send_buffered_gossip(self): while True: random_node = yield self.get_random_node() message = yield self.gossip_outbox.get() LOGGER.debug('{} connecting to {} for gossip'.format( self.local_node, random_node)) try: stream = yield self.tcpclient.connect(random_node.addr, random_node.port) except StreamClosedError: LOGGER.error( 'Unable to connect from {} to {} (sending gossip)'.format( self.local_node.node_id, random_node.node_id)) LOGGER.warning('Putting the gossip back on our queue') try: self.gossip_outbox.put_nowait(message) except QueueFull: LOGGER.error( 'Unable to put gossip back onto the queue. Giving up!') else: try: LOGGER.debug('{} gossipping with {}'.format( self.local_node.node_id, random_node.node_id)) yield self.send_message(stream, message) finally: stream.close() @coroutine def get_other_nodes(self, exclude=None): if exclude is None: exclude = (self.local_node, ) exclude_node_ids = [n.node_id for n in exclude] raise Return([n for n in self.nodes if n not in exclude_node_ids]) @coroutine def get_random_node(self, exclude=None): LOGGER.debug('Waiting for more nodes') yield self.add_node_event.wait() LOGGER.debug('Getting non-self random node') other_nodes = yield self.get_other_nodes(exclude=exclude) LOGGER.debug('{} got something! choices: {}'.format( self.local_node.node_id, other_nodes)) assert other_nodes node_id = random.choice(other_nodes) raise Return(self.nodes[node_id])
async def post(self): data_type_queries, join_query, exclude_from_auto_join = get_query_parts( self.request.body) if not data_type_queries: self.set_status(400) self.write( bad_request_error( "Invalid request format (missing body or data_type_queries)" )) return results = [] auth_header = get_auth_header(self.request.headers) try: # Try compiling each query to make sure it works. Any exceptions thrown will get caught below. test_queries(data_type_queries.values()) client = AsyncHTTPClient() # TODO: Handle pagination # TODO: Why fetch projects instead of datasets? Is it to avoid "orphan" datasets? Is that even possible? # Use Unix socket resolver projects = await peer_fetch(client, CHORD_URL, "api/metadata/api/projects", method="GET", auth_header=auth_header, extra_headers=DATASET_SEARCH_HEADERS) datasets_dict: Dict[str, dict] = { d["identifier"]: d for p in projects["results"] for d in p["datasets"] } dataset_objects_dict: Dict[str, Dict[str, list]] = { d: {} for d in datasets_dict } dataset_object_schema = {"type": "object", "properties": {}} dataset_join_queries: Dict[str, Query] = { d: None for d in datasets_dict } dataset_queue = Queue() for dataset in datasets_dict.values(): dataset_queue.put_nowait(dataset) # Spawn workers to handle asynchronous requests to various datasets search_workers = tornado.gen.multi([ self.search_worker( dataset_queue, dataset_object_schema, join_query, data_type_queries, exclude_from_auto_join, auth_header, dataset_objects_dict, dataset_join_queries, ) for _ in range(WORKERS) ]) await dataset_queue.join() print( f"[{SERVICE_NAME} {datetime.now()}] Done fetching individual service search results.", flush=True) # Aggregate datasets into results list if they satisfy the queries for dataset_id, dataset_results in dataset_objects_dict.items( ): # TODO: Worker results.extend( process_dataset_results(data_type_queries, dataset_join_queries[dataset_id], dataset_results, datasets_dict[dataset_id], dataset_object_schema, include_internal_data=False)) self.write({"results": results}) await self.finish() # Trigger exit for all search workers for _ in range(WORKERS): dataset_queue.put_nowait(None) # Wait for workers to exit await search_workers except HTTPError as e: # Metadata service error # TODO: Better message print( f"[{SERVICE_NAME} {datetime.now()}] [ERROR] Error from service: {str(e)}", file=sys.stderr, flush=True) self.set_status(500) self.write(internal_server_error(f"Error from service: {str(e)}")) except (TypeError, ValueError, SyntaxError) as e: # errors from query processing # TODO: Better / more compliant error message # TODO: Move these up? # TODO: Not guaranteed to be actually query-processing errors self.set_status(400) self.write(bad_request_error( f"Query processing error: {str(e)}")) # TODO: Better message print( f"[{SERVICE_NAME} {datetime.now()}] [ERROR] Encountered query processing error: {str(e)}", file=sys.stderr, flush=True) traceback.print_exc()
class Application(object): def __init__(self, routes, node, pipe): """ Application instantiates and registers handlers for each message type, and routes messages to the pre-instantiated instances of each message handler :param routes: list of tuples in the form of (<message type str>, <MessageHandler class>) :param node: Node instance of the local node :param pipe: Instance of multiprocessing.Pipe for communicating with the parent process """ # We don't really have to worry about synchronization # so long as we're careful about explicit context switching self.nodes = {node.node_id: node} self.local_node = node self.handlers = {} self.tcpclient = TCPClient() self.gossip_inbox = Queue() self.gossip_outbox = Queue() self.sequence_number = 0 if routes: self.add_handlers(routes) self.pipe = pipe self.ioloop = IOLoop.current() self.add_node_event = Event() def next_sequence_number(self): self.sequence_number += 1 return self.sequence_number @coroutine def ping_random_node(self): node = yield self.get_random_node() LOGGER.debug('{} pinging random node: {}'.format(self.local_node.node_id, node.node_id)) try: yield self.ping(node) except TimeoutError: self.mark_suspect(node) @coroutine def add_node(self, node): if node.node_id not in self.nodes: LOGGER.debug('Adding node {} to {}'.format(node, self.nodes)) self.add_node_event.set() self.nodes[node.node_id] = node LOGGER.debug('Added node {} to {}'.format(node, self.nodes)) @coroutine def remove_node(self, node): if node.node_id in self.nodes: del self.nodes[node.node_id] other_nodes = yield self.get_other_nodes if not other_nodes: self.add_node_event.clear() def add_handlers(self, handlers): for message_type, handler_cls in handlers: assert message_type in MESSAGE_TYPES, ( 'Message type {!r} not found in MESSAGE TYPES {}'.format( message_type, MESSAGE_TYPES.keys() ) ) self.handlers[message_type] = handler_cls(self) def route_stream_message(self, stream, message_type, message): LOGGER.debug('{!r} received {} message from {!r}'.format(self, message_type, stream)) message_cls = MESSAGE_TYPES[message_type] message_obj = message_cls(**message) handler = self.handlers[message_type] LOGGER.debug('Routing {} to {}'.format(message_type, handler)) handler(stream, message_obj) @coroutine def send_message(self, stream, message): LOGGER.debug('Sending message {!r} to {}'.format(message.MESSAGE_TYPE, stream)) try: yield stream.write(message.to_msgpack) except StreamClosedError: LOGGER.warn('Unable to send {} to {} - stream closed'.format(message.MESSAGE_TYPE, stream)) @coroutine def _get_next_message(self, stream): # get the next message from the stream unpacker = msgpack.Unpacker() try: wire_bytes = yield with_timeout( datetime.timedelta(seconds=PING_TIMEOUT), stream.read_bytes(4096, partial=True) ) except StreamClosedError: LOGGER.warn('Unable to get next message from {} - stream closed'.format(stream)) else: unpacker.feed(wire_bytes) LOGGER.debug('Deserializing object from stream {}'.format(stream)) message = unpacker.next() message.pop('type') raise Return(message) @coroutine def ping(self, node): """ Ping a node :param node: Instance of Node to ping :returns: Boolean, True if successful/False if fail """ host = node.addr port = node.port LOGGER.debug('pinging {}:{}'.format(host, port)) ping = Ping(seqno=self.next_sequence_number(), node=node, sender=self.local_node) # Connect to the node try: stream = yield self.tcpclient.connect(host, port) except StreamClosedError: LOGGER.error('Unable to connect from {} to {} (pinging host)'.format(self.local_node.node_id, node.node_id)) raise Return(False) try: # Send the ping LOGGER.debug('Sending {!r} to {!r}'.format(ping.MESSAGE_TYPE, node)) yield self.send_message(stream, ping) # Wait for an ACK message in response LOGGER.debug('Getting next message from {}:{}'.format(host, port)) message = yield self._get_next_message(stream) if message is None: raise Return(False) ack = Ack(**message) LOGGER.debug('Received {!r} from {!r} (response to {!r})'.format(ack.MESSAGE_TYPE, node.node_id, ping.MESSAGE_TYPE)) # Check that the ACK sequence number matches the PING sequence number if ack.seqno == ping.seqno: LOGGER.debug('Sequence number matches. Node {} looks good to !'.format(node.node_id, self.local_node.node_id)) # Process the gossip messages tacked onto the ACK message's payload for message in ack.payload: try: self.gossip_inbox.put_nowait(message) except QueueFull: LOGGER.error('Unable to add {} message from {} to gossip inbox'.format(message.MESSAGE_TYPE, node.node_id)) # mark the node as ALIVE in self.nodes self.mark_alive(node) # Send gossip that this node is alive self.queue_gossip_send( Alive(node=node, sender=self.local_node) ) raise Return(True) else: raise Return(False) finally: stream.close() @coroutine def ack(self, stream, seqno): payload = [] for _ in xrange(ACK_PAYLOAD_SIZE): try: gossip = self.gossip_outbox.get_nowait() payload.append(gossip) except QueueEmpty: break ack = Ack(seqno=seqno, payload=payload) LOGGER.debug('Trying to send ack: {}'.format(ack)) try: yield stream.write(ack.to_msgpack) except StreamClosedError: LOGGER.error('Unable to connect from {} to stream (acking PING)'.format(self.local_node.node_id)) LOGGER.debug('Sent ack to {}'.format(stream)) @coroutine def _change_node_state(self, node, state): """ Because Tornado has explicit context switching, we don't need to worry much about synchronization here """ LOGGER.debug('{} knows about {}: {}'.format(self.local_node.node_id, node.node_id, state)) self.add_node(node) self.nodes[node.node_id].state = state @coroutine def mark_alive(self, node): if node.node_id != self.local_node.node_id: LOGGER.debug('Marking {} ALIVE'.format(node.node_id)) self._change_node_state(node, State.ALIVE) @coroutine def mark_dead(self, node): self._change_node_state(node, State.DEAD) @coroutine def mark_suspect(self, node): self._change_node_state(node, State.SUSPECT) @coroutine def ingest_gossip_inbox(self): while True: LOGGER.debug('checking inbox') message = yield self.gossip_inbox.get() LOGGER.debug('Received message {} from gossip inbox'.format(message.MESSAGE_TYPE)) if message.MESSAGE_TYPE == Alive.MESSAGE_TYPE: self.mark_alive(message.sender) self.mark_alive(message.node) self.queue_gossip_send(message) elif message.MESSAGE_TYPE == Suspect.MESSAGE_TYPE: self.mark_alive(message.sender) self.mark_suspect(message.node) self.queue_gossip_send(message) elif message.MESSAGE_TYPE == Dead.MESSAGE_TYPE: self.mark_alive(message.sender) self.mark_dead(message.node) self.queue_gossip_send(message) @coroutine def queue_gossip_send(self, message): """ If the message is gossipable, add it to the outbox """ try: next_incarnation = message.next_incarnation next_incarnation.sender = self.local_node except message.MaxIncarnationsReached: LOGGER.debug('Max incarnations reached for {}! No gossip 4 u'.format(message.MESSAGE_TYPE)) else: LOGGER.debug('Enqueuing {} gossips for {}'.format(GOSSIP_PEERS, message)) for _ in xrange(GOSSIP_PEERS): yield self.gossip_outbox.put(next_incarnation) @coroutine def send_buffered_gossip(self): while True: random_node = yield self.get_random_node() message = yield self.gossip_outbox.get() LOGGER.debug('{} connecting to {} for gossip'.format(self.local_node, random_node)) try: stream = yield self.tcpclient.connect(random_node.addr, random_node.port) except StreamClosedError: LOGGER.error('Unable to connect from {} to {} (sending gossip)'.format(self.local_node.node_id, random_node.node_id)) LOGGER.warning('Putting the gossip back on our queue') try: self.gossip_outbox.put_nowait(message) except QueueFull: LOGGER.error('Unable to put gossip back onto the queue. Giving up!') else: try: LOGGER.debug('{} gossipping with {}'.format(self.local_node.node_id, random_node.node_id)) yield self.send_message(stream, message) finally: stream.close() @coroutine def get_other_nodes(self, exclude=None): if exclude is None: exclude = (self.local_node,) exclude_node_ids = [n.node_id for n in exclude] raise Return([n for n in self.nodes if n not in exclude_node_ids]) @coroutine def get_random_node(self, exclude=None): LOGGER.debug('Waiting for more nodes') yield self.add_node_event.wait() LOGGER.debug('Getting non-self random node') other_nodes = yield self.get_other_nodes(exclude=exclude) LOGGER.debug('{} got something! choices: {}'.format(self.local_node.node_id, other_nodes)) assert other_nodes node_id = random.choice(other_nodes) raise Return(self.nodes[node_id])
class Executor(object): """ Distributed executor with data dependencies This executor resembles executors in concurrent.futures but also allows Futures within submit/map calls. Provide center address on initialization >>> executor = Executor(('127.0.0.1', 8787)) # doctest: +SKIP Use ``submit`` method like normal >>> a = executor.submit(add, 1, 2) # doctest: +SKIP >>> b = executor.submit(add, 10, 20) # doctest: +SKIP Additionally, provide results of submit calls (futures) to further submit calls: >>> c = executor.submit(add, a, b) # doctest: +SKIP This allows for the dynamic creation of complex dependencies. """ def __init__(self, center, start=True, delete_batch_time=1): self.center = coerce_to_rpc(center) self.futures = dict() self.refcount = defaultdict(lambda: 0) self.dask = dict() self.restrictions = dict() self.loop = IOLoop() self.report_queue = Queue() self.scheduler_queue = Queue() self._shutdown_event = Event() self._delete_batch_time = delete_batch_time if start: self.start() def start(self): """ Start scheduler running in separate thread """ from threading import Thread self.loop.add_callback(self._go) self._loop_thread = Thread(target=self.loop.start) self._loop_thread.start() def __enter__(self): if not self.loop._running: self.start() return self def __exit__(self, type, value, traceback): self.shutdown() def _inc_ref(self, key): self.refcount[key] += 1 def _dec_ref(self, key): self.refcount[key] -= 1 if self.refcount[key] == 0: del self.refcount[key] self._release_key(key) def _release_key(self, key): """ Release key from distributed memory """ self.futures[key]['event'].clear() logger.debug("Release key %s", key) del self.futures[key] self.scheduler_queue.put_nowait({'op': 'release-held-data', 'key': key}) @gen.coroutine def report(self): """ Listen to scheduler """ while True: msg = yield self.report_queue.get() if msg['op'] == 'close': break if msg['op'] == 'task-finished': if msg['key'] in self.futures: self.futures[msg['key']]['status'] = 'finished' self.futures[msg['key']]['event'].set() if msg['op'] == 'lost-data': if msg['key'] in self.futures: self.futures[msg['key']]['status'] = 'lost' self.futures[msg['key']]['event'].clear() if msg['op'] == 'task-erred': if msg['key'] in self.futures: self.futures[msg['key']]['status'] = 'error' self.futures[msg['key']]['event'].set() @gen.coroutine def _shutdown(self): """ Send shutdown signal and wait until _go completes """ self.report_queue.put_nowait({'op': 'close'}) self.scheduler_queue.put_nowait({'op': 'close'}) yield self._shutdown_event.wait() def shutdown(self): """ Send shutdown signal and wait until scheduler terminates """ self.report_queue.put_nowait({'op': 'close'}) self.scheduler_queue.put_nowait({'op': 'close'}) self.loop.stop() self._loop_thread.join() @gen.coroutine def _go(self): """ Setup and run all other coroutines. Block until finished. """ self.who_has, self.has_what, self.ncores = yield [self.center.who_has(), self.center.has_what(), self.center.ncores()] self.waiting = {} self.processing = {} self.stacks = {} worker_queues = {worker: Queue() for worker in self.ncores} delete_queue = Queue() coroutines = ([ self.report(), scheduler(self.scheduler_queue, self.report_queue, worker_queues, delete_queue, self.who_has, self.has_what, self.ncores, self.dask, self.restrictions, self.waiting, self.stacks, self.processing), delete(self.scheduler_queue, delete_queue, self.center.ip, self.center.port, self._delete_batch_time)] + [worker(self.scheduler_queue, worker_queues[w], w, n) for w, n in self.ncores.items()]) results = yield All(coroutines) self._shutdown_event.set() def submit(self, func, *args, **kwargs): """ Submit a function application to the scheduler Parameters ---------- func: callable *args: **kwargs: pure: bool (defaults to True) Whether or not the function is pure. Set ``pure=False`` for impure functions like ``np.random.random``. workers: set, iterable of sets A set of worker hostnames on which computations may be performed. Leave empty to default to all workers (common case) Examples -------- >>> c = executor.submit(add, a, b) # doctest: +SKIP Returns ------- Future See Also -------- distributed.executor.Executor.submit: """ if not callable(func): raise TypeError("First input to submit must be a callable function") key = kwargs.pop('key', None) pure = kwargs.pop('pure', True) workers = kwargs.pop('workers', None) if key is None: if pure: key = funcname(func) + '-' + tokenize(func, kwargs, *args) else: key = funcname(func) + '-' + next(tokens) if key in self.futures: return Future(key, self) if kwargs: task = (apply, func, args, kwargs) else: task = (func,) + args if workers is not None: restrictions = {key: workers} else: restrictions = {} if key not in self.futures: self.futures[key] = {'event': Event(), 'status': 'waiting'} logger.debug("Submit %s(...), %s", funcname(func), key) self.scheduler_queue.put_nowait({'op': 'update-graph', 'dsk': {key: task}, 'keys': [key], 'restrictions': restrictions}) return Future(key, self) def map(self, func, *iterables, **kwargs): """ Map a function on a sequence of arguments Arguments can be normal objects or Futures Parameters ---------- func: callable iterables: Iterables pure: bool (defaults to True) Whether or not the function is pure. Set ``pure=False`` for impure functions like ``np.random.random``. workers: set, iterable of sets A set of worker hostnames on which computations may be performed. Leave empty to default to all workers (common case) Examples -------- >>> L = executor.map(func, sequence) # doctest: +SKIP Returns ------- list of futures See also -------- distributed.executor.Executor.submit """ pure = kwargs.pop('pure', True) workers = kwargs.pop('workers', None) if not callable(func): raise TypeError("First input to map must be a callable function") iterables = [list(it) for it in iterables] if pure: keys = [funcname(func) + '-' + tokenize(func, kwargs, *args) for args in zip(*iterables)] else: uid = str(uuid.uuid4()) keys = [funcname(func) + '-' + uid + '-' + next(tokens) for i in range(min(map(len, iterables)))] if not kwargs: dsk = {key: (func,) + args for key, args in zip(keys, zip(*iterables))} else: dsk = {key: (apply, func, args, kwargs) for key, args in zip(keys, zip(*iterables))} for key in dsk: if key not in self.futures: self.futures[key] = {'event': Event(), 'status': 'waiting'} if isinstance(workers, (list, set)): if workers and isinstance(first(workers), (list, set)): if len(workers) != len(keys): raise ValueError("You only provided %d worker restrictions" " for a sequence of length %d" % (len(workers), len(keys))) restrictions = dict(zip(keys, workers)) else: restrictions = {key: workers for key in keys} elif workers is None: restrictions = {} else: raise TypeError("Workers must be a list or set of workers or None") logger.debug("map(%s, ...)", funcname(func)) self.scheduler_queue.put_nowait({'op': 'update-graph', 'dsk': dsk, 'keys': keys, 'restrictions': restrictions}) return [Future(key, self) for key in keys] @gen.coroutine def _gather(self, futures): futures2, keys = unpack_remotedata(futures) keys = list(keys) while True: yield All([self.futures[key]['event'].wait() for key in keys]) try: data = yield _gather(self.center, keys) except KeyError as e: self.scheduler_queue.put_nowait({'op': 'missing-data', 'missing': e.args}) for key in e.args: self.futures[key]['event'].clear() else: break data = dict(zip(keys, data)) result = pack_data(futures2, data) raise gen.Return(result) def gather(self, futures): """ Gather futures from distributed memory Accepts a future or any nested core container of futures Examples -------- >>> from operator import add # doctest: +SKIP >>> e = Executor('127.0.0.1:8787') # doctest: +SKIP >>> x = e.submit(add, 1, 2) # doctest: +SKIP >>> e.gather(x) # doctest: +SKIP 3 >>> e.gather([x, [x], x]) # doctest: +SKIP [3, [3], 3] """ return sync(self.loop, self._gather, futures) @gen.coroutine def _get(self, dsk, keys, restrictions=None): flatkeys = list(flatten(keys)) for key in flatkeys: if key not in self.futures: self.futures[key] = {'event': Event(), 'status': None} futures = {key: Future(key, self) for key in flatkeys} self.scheduler_queue.put_nowait({'op': 'update-graph', 'dsk': dsk, 'keys': flatkeys, 'restrictions': restrictions or {}}) packed = pack_data(keys, futures) result = yield self._gather(packed) raise gen.Return(result) def get(self, dsk, keys, **kwargs): """ Gather futures from distributed memory Parameters ---------- dsk: dict keys: object, or nested lists of objects restrictions: dict (optional) A mapping of {key: {set of worker hostnames}} that restricts where jobs can take place Examples -------- >>> from operator import add # doctest: +SKIP >>> e = Executor('127.0.0.1:8787') # doctest: +SKIP >>> e.get({'x': (add, 1, 2)}, 'x') # doctest: +SKIP 3 """ return sync(self.loop, self._get, dsk, keys, **kwargs)
class Executor(object): """ Distributed executor with data dependencies This executor resembles executors in concurrent.futures but also allows Futures within submit/map calls. Provide center address on initialization >>> executor = Executor(('127.0.0.1', 8787)) # doctest: +SKIP Use ``submit`` method like normal >>> a = executor.submit(add, 1, 2) # doctest: +SKIP >>> b = executor.submit(add, 10, 20) # doctest: +SKIP Additionally, provide results of submit calls (futures) to further submit calls: >>> c = executor.submit(add, a, b) # doctest: +SKIP This allows for the dynamic creation of complex dependencies. """ def __init__(self, center, start=True, delete_batch_time=1): self.center = coerce_to_rpc(center) self.futures = dict() self.refcount = defaultdict(lambda: 0) self.dask = dict() self.restrictions = dict() self.loop = IOLoop() self.report_queue = Queue() self.scheduler_queue = Queue() self._shutdown_event = Event() self._delete_batch_time = delete_batch_time if start: self.start() def start(self): """ Start scheduler running in separate thread """ from threading import Thread self.loop.add_callback(self._go) self._loop_thread = Thread(target=self.loop.start) self._loop_thread.start() def __enter__(self): if not self.loop._running: self.start() return self def __exit__(self, type, value, traceback): self.shutdown() def _inc_ref(self, key): self.refcount[key] += 1 def _dec_ref(self, key): self.refcount[key] -= 1 if self.refcount[key] == 0: del self.refcount[key] self._release_key(key) def _release_key(self, key): """ Release key from distributed memory """ self.futures[key]['event'].clear() logger.debug("Release key %s", key) del self.futures[key] self.scheduler_queue.put_nowait({ 'op': 'release-held-data', 'key': key }) @gen.coroutine def report(self): """ Listen to scheduler """ while True: msg = yield self.report_queue.get() if msg['op'] == 'close': break if msg['op'] == 'task-finished': if msg['key'] in self.futures: self.futures[msg['key']]['status'] = 'finished' self.futures[msg['key']]['event'].set() if msg['op'] == 'lost-data': if msg['key'] in self.futures: self.futures[msg['key']]['status'] = 'lost' self.futures[msg['key']]['event'].clear() if msg['op'] == 'task-erred': if msg['key'] in self.futures: self.futures[msg['key']]['status'] = 'error' self.futures[msg['key']]['event'].set() @gen.coroutine def _shutdown(self): """ Send shutdown signal and wait until _go completes """ self.report_queue.put_nowait({'op': 'close'}) self.scheduler_queue.put_nowait({'op': 'close'}) yield self._shutdown_event.wait() def shutdown(self): """ Send shutdown signal and wait until scheduler terminates """ self.report_queue.put_nowait({'op': 'close'}) self.scheduler_queue.put_nowait({'op': 'close'}) self.loop.stop() self._loop_thread.join() @gen.coroutine def _go(self): """ Setup and run all other coroutines. Block until finished. """ self.who_has, self.has_what, self.ncores = yield [ self.center.who_has(), self.center.has_what(), self.center.ncores() ] self.waiting = {} self.processing = {} self.stacks = {} worker_queues = {worker: Queue() for worker in self.ncores} delete_queue = Queue() coroutines = ([ self.report(), scheduler(self.scheduler_queue, self.report_queue, worker_queues, delete_queue, self.who_has, self.has_what, self.ncores, self.dask, self.restrictions, self.waiting, self.stacks, self.processing), delete(self.scheduler_queue, delete_queue, self.center.ip, self.center.port, self._delete_batch_time) ] + [ worker(self.scheduler_queue, worker_queues[w], w, n) for w, n in self.ncores.items() ]) results = yield All(coroutines) self._shutdown_event.set() def submit(self, func, *args, **kwargs): """ Submit a function application to the scheduler Parameters ---------- func: callable *args: **kwargs: pure: bool (defaults to True) Whether or not the function is pure. Set ``pure=False`` for impure functions like ``np.random.random``. workers: set, iterable of sets A set of worker hostnames on which computations may be performed. Leave empty to default to all workers (common case) Examples -------- >>> c = executor.submit(add, a, b) # doctest: +SKIP Returns ------- Future See Also -------- distributed.executor.Executor.submit: """ if not callable(func): raise TypeError( "First input to submit must be a callable function") key = kwargs.pop('key', None) pure = kwargs.pop('pure', True) workers = kwargs.pop('workers', None) if key is None: if pure: key = funcname(func) + '-' + tokenize(func, kwargs, *args) else: key = funcname(func) + '-' + next(tokens) if key in self.futures: return Future(key, self) if kwargs: task = (apply, func, args, kwargs) else: task = (func, ) + args if workers is not None: restrictions = {key: workers} else: restrictions = {} if key not in self.futures: self.futures[key] = {'event': Event(), 'status': 'waiting'} logger.debug("Submit %s(...), %s", funcname(func), key) self.scheduler_queue.put_nowait({ 'op': 'update-graph', 'dsk': { key: task }, 'keys': [key], 'restrictions': restrictions }) return Future(key, self) def map(self, func, *iterables, **kwargs): """ Map a function on a sequence of arguments Arguments can be normal objects or Futures Parameters ---------- func: callable iterables: Iterables pure: bool (defaults to True) Whether or not the function is pure. Set ``pure=False`` for impure functions like ``np.random.random``. workers: set, iterable of sets A set of worker hostnames on which computations may be performed. Leave empty to default to all workers (common case) Examples -------- >>> L = executor.map(func, sequence) # doctest: +SKIP Returns ------- list of futures See also -------- distributed.executor.Executor.submit """ pure = kwargs.pop('pure', True) workers = kwargs.pop('workers', None) if not callable(func): raise TypeError("First input to map must be a callable function") iterables = [list(it) for it in iterables] if pure: keys = [ funcname(func) + '-' + tokenize(func, kwargs, *args) for args in zip(*iterables) ] else: uid = str(uuid.uuid4()) keys = [ funcname(func) + '-' + uid + '-' + next(tokens) for i in range(min(map(len, iterables))) ] if not kwargs: dsk = { key: (func, ) + args for key, args in zip(keys, zip(*iterables)) } else: dsk = { key: (apply, func, args, kwargs) for key, args in zip(keys, zip(*iterables)) } for key in dsk: if key not in self.futures: self.futures[key] = {'event': Event(), 'status': 'waiting'} if isinstance(workers, (list, set)): if workers and isinstance(first(workers), (list, set)): if len(workers) != len(keys): raise ValueError("You only provided %d worker restrictions" " for a sequence of length %d" % (len(workers), len(keys))) restrictions = dict(zip(keys, workers)) else: restrictions = {key: workers for key in keys} elif workers is None: restrictions = {} else: raise TypeError("Workers must be a list or set of workers or None") logger.debug("map(%s, ...)", funcname(func)) self.scheduler_queue.put_nowait({ 'op': 'update-graph', 'dsk': dsk, 'keys': keys, 'restrictions': restrictions }) return [Future(key, self) for key in keys] @gen.coroutine def _gather(self, futures): futures2, keys = unpack_remotedata(futures) keys = list(keys) while True: yield All([self.futures[key]['event'].wait() for key in keys]) try: data = yield _gather(self.center, keys) except KeyError as e: self.scheduler_queue.put_nowait({ 'op': 'missing-data', 'missing': e.args }) for key in e.args: self.futures[key]['event'].clear() else: break data = dict(zip(keys, data)) result = pack_data(futures2, data) raise gen.Return(result) def gather(self, futures): """ Gather futures from distributed memory Accepts a future or any nested core container of futures Examples -------- >>> from operator import add # doctest: +SKIP >>> e = Executor('127.0.0.1:8787') # doctest: +SKIP >>> x = e.submit(add, 1, 2) # doctest: +SKIP >>> e.gather(x) # doctest: +SKIP 3 >>> e.gather([x, [x], x]) # doctest: +SKIP [3, [3], 3] """ return sync(self.loop, self._gather, futures) @gen.coroutine def _get(self, dsk, keys, restrictions=None): flatkeys = list(flatten(keys)) for key in flatkeys: if key not in self.futures: self.futures[key] = {'event': Event(), 'status': None} futures = {key: Future(key, self) for key in flatkeys} self.scheduler_queue.put_nowait({ 'op': 'update-graph', 'dsk': dsk, 'keys': flatkeys, 'restrictions': restrictions or {} }) packed = pack_data(keys, futures) result = yield self._gather(packed) raise gen.Return(result) def get(self, dsk, keys, **kwargs): """ Gather futures from distributed memory Parameters ---------- dsk: dict keys: object, or nested lists of objects restrictions: dict (optional) A mapping of {key: {set of worker hostnames}} that restricts where jobs can take place Examples -------- >>> from operator import add # doctest: +SKIP >>> e = Executor('127.0.0.1:8787') # doctest: +SKIP >>> e.get({'x': (add, 1, 2)}, 'x') # doctest: +SKIP 3 """ return sync(self.loop, self._get, dsk, keys, **kwargs)
class CommandTopic(object): def __init__(self, topic_name, maxsize=10): self.topic_name = topic_name self.q_maxsize = maxsize self.q = Queue(maxsize=maxsize) self.sem = BoundedSemaphoreWithValue(1) self.mid_gen = _MidGenerator() self.gc_flag = False self.io_loop = ioloop.IOLoop.current() def issue(self, ft, cmd, block, timeout): if 'mid' not in cmd: cmd['mid'] = next(self.mid_gen) try: h_timeout = self.io_loop.call_later(timeout, self.cmd_timeout, ft, cmd) ft.add_done_callback( lambda _: self.io_loop.remove_timeout(h_timeout)) # [ft, cmd, block] self.q.put_nowait([ft, cmd, block]) except Exception as e: ft.set_exception(e) return ft def cmd_timeout(self, ft, cmd): ft.set_exception( gen.TimeoutError( 'Timeout in waiting response of command: {}'.format(str(cmd)))) @gen.coroutine def start_cmd(self): yield self.sem.acquire() item = yield self.q.get() if not item: self.sem.release() raise gen.Return(None) self.queue_item_under_process = item ft, cmd, block = item # a special case should be handled: # the network is very slow so the future timed out before semaphore acquirement is done if ft.done(): raise gen.Return(None) ft.add_done_callback(lambda _: self.sem.release()) if not block: # notification type cmd, dont wait response ft.set_result(None) raise gen.Return(cmd) def finish_cmd(self, mid, resp): ft, cmd, block = self.queue_item_under_process if not ft.done() and mid == cmd['mid']: ft.set_result(resp) def start_listen(self): if self.sem.value() == 0: self.q.put(None) self.gc_flag = False def stop_listen(self): if hasattr(self, 'pending_item'): ft, cmd, block = self.queue_item_under_process if not ft.done(): ft.set_result(None) self.q.put(None) self.gc_flag = True def is_marked_gc(self): return self.gc_flag
class BatchedStream(object): """ Mostly obsolete, see BatchedSend """ def __init__(self, stream, interval): self.stream = stream self.interval = interval / 1000. self.last_transmission = default_timer() self.send_q = Queue() self.recv_q = Queue() self._background_send_coroutine = self._background_send() self._background_recv_coroutine = self._background_recv() self._broken = None self.pc = PeriodicCallback(lambda: None, 100) self.pc.start() @gen.coroutine def _background_send(self): with log_errors(): while True: msg = yield self.send_q.get() if msg == 'close': break msgs = [msg] now = default_timer() wait_time = self.last_transmission + self.interval - now if wait_time > 0: yield gen.sleep(wait_time) while not self.send_q.empty(): msgs.append(self.send_q.get_nowait()) try: yield write(self.stream, msgs) except StreamClosedError: self.recv_q.put_nowait('close') self._broken = True break if len(msgs) > 1: logger.debug("Batched messages: %d", len(msgs)) for _ in msgs: self.send_q.task_done() @gen.coroutine def _background_recv(self): with log_errors(): while True: try: msgs = yield read(self.stream) except StreamClosedError: self.recv_q.put_nowait('close') self.send_q.put_nowait('close') self._broken = True break assert isinstance(msgs, list) if len(msgs) > 1: logger.debug("Batched messages: %d", len(msgs)) for msg in msgs: self.recv_q.put_nowait(msg) @gen.coroutine def flush(self): yield self.send_q.join() @gen.coroutine def send(self, msg): if self._broken: raise StreamClosedError('Batch Stream is Closed') else: self.send_q.put_nowait(msg) @gen.coroutine def recv(self): result = yield self.recv_q.get() if result == 'close': raise StreamClosedError('Batched Stream is Closed') else: raise gen.Return(result) @gen.coroutine def close(self): yield self.flush() raise gen.Return(self.stream.close()) def closed(self): return self.stream.closed()
class TornadoTransmission(): def __init__(self, max_concurrent_batches=10, block_on_send=False, block_on_response=False, max_batch_size=100, send_frequency=0.25, user_agent_addition=''): if not has_tornado: raise ImportError('TornadoTransmission requires tornado, but it was not found.') self.block_on_send = block_on_send self.block_on_response = block_on_response self.max_batch_size = max_batch_size self.send_frequency = send_frequency user_agent = "libhoney-py/" + VERSION if user_agent_addition: user_agent += " " + user_agent_addition self.http_client = AsyncHTTPClient( force_instance=True, defaults=dict(user_agent=user_agent)) # libhoney adds events to the pending queue for us to send self.pending = Queue(maxsize=1000) # we hand back responses from the API on the responses queue self.responses = Queue(maxsize=2000) self.batch_data = {} self.sd = statsd.StatsClient(prefix="libhoney") self.batch_sem = Semaphore(max_concurrent_batches) def start(self): ioloop.IOLoop.current().spawn_callback(self._sender) def send(self, ev): '''send accepts an event and queues it to be sent''' self.sd.gauge("queue_length", self.pending.qsize()) try: if self.block_on_send: self.pending.put(ev) else: self.pending.put_nowait(ev) self.sd.incr("messages_queued") except QueueFull: response = { "status_code": 0, "duration": 0, "metadata": ev.metadata, "body": "", "error": "event dropped; queue overflow", } if self.block_on_response: self.responses.put(response) else: try: self.responses.put_nowait(response) except QueueFull: # if the response queue is full when trying to add an event # queue is full response, just skip it. pass self.sd.incr("queue_overflow") # We're using the older decorator/yield model for compatibility with # Python versions before 3.5. # See: http://www.tornadoweb.org/en/stable/guide/coroutines.html#python-3-5-async-and-await @gen.coroutine def _sender(self): '''_sender is the control loop that pulls events off the `self.pending` queue and submits batches for actual sending. ''' events = [] last_flush = time.time() while True: try: ev = yield self.pending.get(timeout=self.send_frequency) if ev is None: # signals shutdown yield self._flush(events) return events.append(ev) if (len(events) > self.max_batch_size or time.time() - last_flush > self.send_frequency): yield self._flush(events) events = [] except TimeoutError: yield self._flush(events) events = [] last_flush = time.time() @gen.coroutine def _flush(self, events): if not events: return for dest, group in group_events_by_destination(events).items(): yield self._send_batch(dest, group) @gen.coroutine def _send_batch(self, destination, events): ''' Makes a single batch API request with the given list of events. The `destination` argument contains the write key, API host and dataset name used to build the request.''' start = time.time() status_code = 0 try: # enforce max_concurrent_batches yield self.batch_sem.acquire() url = urljoin(urljoin(destination.api_host, "/1/batch/"), destination.dataset) payload = [] for ev in events: event_time = ev.created_at.isoformat() if ev.created_at.tzinfo is None: event_time += "Z" payload.append({ "time": event_time, "samplerate": ev.sample_rate, "data": ev.fields()}) req = HTTPRequest( url, method='POST', headers={ "X-Honeycomb-Team": destination.writekey, "Content-Type": "application/json", }, body=json.dumps(payload, default=json_default_handler), ) self.http_client.fetch(req, self._response_callback) # store the events that were sent so we can process responses later # it is important that we delete these eventually, or we'll run into memory issues self.batch_data[req] = {"start": start, "events": events} except Exception as e: # Catch all exceptions and hand them to the responses queue. self._enqueue_errors(status_code, e, start, events) finally: self.batch_sem.release() def _enqueue_errors(self, status_code, error, start, events): for ev in events: self.sd.incr("send_errors") self._enqueue_response(status_code, "", error, start, ev.metadata) def _response_callback(self, resp): # resp.request should be the same HTTPRequest object built by _send_batch # and mapped to values in batch_data events = self.batch_data[resp.request]["events"] start = self.batch_data[resp.request]["start"] try: status_code = resp.code resp.rethrow() statuses = [d["status"] for d in json.loads(resp.body)] for ev, status in zip(events, statuses): self._enqueue_response(status, "", None, start, ev.metadata) self.sd.incr("messages_sent") except Exception as e: self._enqueue_errors(status_code, e, start, events) self.sd.incr("send_errors") finally: # clean up the data for this batch del self.batch_data[resp.request] def _enqueue_response(self, status_code, body, error, start, metadata): resp = { "status_code": status_code, "body": body, "error": error, "duration": (time.time() - start) * 1000, "metadata": metadata } if self.block_on_response: self.responses.put(resp) else: try: self.responses.put_nowait(resp) except QueueFull: pass def close(self): '''call close to send all in-flight requests and shut down the senders nicely. Times out after max 20 seconds per sending thread plus 10 seconds for the response queue''' try: self.pending.put(None, 10) except QueueFull: pass # signal to the responses queue that nothing more is coming. try: self.responses.put(None, 10) except QueueFull: pass def get_response_queue(self): ''' return the responses queue on to which will be sent the response objects from each event send''' return self.responses
def f(c, a, b): s = Scheduler((c.ip, c.port), loop=loop) yield s._sync_center() done = s.start() sched, report = Queue(), Queue(); s.handle_queues(sched, report) msg = yield report.get(); assert msg['op'] == 'stream-start' s.update_graph(dsk={'x-1': (inc, 1), 'x-2': (inc, 'x-1'), 'x-3': (inc, 'x-2'), 'y-1': (dec, 'x-3'), 'y-2': (dec, 'y-1'), 'e': (throws, 'y-2'), 'other': (inc, 123)}, keys=['e']) p = MultiProgressWidget(['e'], scheduler=s) assert p.keys == {'x': {'x-1', 'x-2', 'x-3'}, 'y': {'y-1', 'y-2'}, 'e': {'e'}} while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'x-3': break assert p.keys == {'x': set(), 'y': {'y-1', 'y-2'}, 'e': {'e'}} p._update() assert p.bars['x'].value == 1.0 assert p.bars['y'].value == 0.0 assert p.bars['e'].value == 0.0 assert '3 / 3' in p.texts['x'].value assert '0 / 2' in p.texts['y'].value assert '0 / 1' in p.texts['e'].value while True: msg = yield report.get() if msg['op'] == 'key-in-memory' and msg['key'] == 'y-2': break p._update() assert p.bars['x'].value == 1.0 assert p.bars['y'].value == 1.0 assert p.bars['e'].value == 0.0 assert p.keys == {'x': set(), 'y': set(), 'e': {'e'}} while True: msg = yield report.get() if msg['op'] == 'task-erred' and msg['key'] == 'e': break assert p.bars['x'].bar_style == 'success' assert p.bars['y'].bar_style == 'success' assert p.bars['e'].bar_style == 'danger' assert p.status == 'error' sched.put_nowait({'op': 'close'}) yield done
class Kernel(SingletonConfigurable): #--------------------------------------------------------------------------- # Kernel interface #--------------------------------------------------------------------------- # attribute to override with a GUI eventloop = Any(None) @observe('eventloop') def _update_eventloop(self, change): """schedule call to eventloop from IOLoop""" loop = ioloop.IOLoop.current() if change.new is not None: loop.add_callback(self.enter_eventloop) session = Instance(Session, allow_none=True) profile_dir = Instance('IPython.core.profiledir.ProfileDir', allow_none=True) shell_stream = Instance(ZMQStream, allow_none=True) shell_streams = List( help="""Deprecated shell_streams alias. Use shell_stream .. versionchanged:: 6.0 shell_streams is deprecated. Use shell_stream. """) @default("shell_streams") def _shell_streams_default(self): warnings.warn( "Kernel.shell_streams is deprecated in yapkernel 6.0. Use Kernel.shell_stream", DeprecationWarning, stacklevel=2, ) if self.shell_stream is not None: return [self.shell_stream] else: return [] @observe("shell_streams") def _shell_streams_changed(self, change): warnings.warn( "Kernel.shell_streams is deprecated in yapkernel 6.0. Use Kernel.shell_stream", DeprecationWarning, stacklevel=2, ) if len(change.new) > 1: warnings.warn( "Kernel only supports one shell stream. Additional streams will be ignored.", RuntimeWarning, stacklevel=2, ) if change.new: self.shell_stream = change.new[0] control_stream = Instance(ZMQStream, allow_none=True) debug_shell_socket = Any() control_thread = Any() iopub_socket = Any() iopub_thread = Any() stdin_socket = Any() log = Instance(logging.Logger, allow_none=True) # identities: int_id = Integer(-1) ident = Unicode() @default('ident') def _default_ident(self): return str(uuid.uuid4()) # This should be overridden by wrapper kernels that implement any real # language. language_info = { 'name': 'Prolog (YAP)', 'mimetype': 'text/x-prolog', 'file_extension': '.yap', } # any links that should go in the help menu help_links = List() # Private interface _darwin_app_nap = Bool( True, help="""Whether to use appnope for compatibility with OS X App Nap. Only affects OS X >= 10.9. """).tag(config=True) # track associations with current request _allow_stdin = Bool(False) _parents = Dict({"shell": {}, "control": {}}) _parent_ident = Dict({'shell': b'', 'control': b''}) @property def _parent_header(self): warnings.warn( "Kernel._parent_header is deprecated in yapkernel 6. Use .get_parent()", DeprecationWarning, stacklevel=2, ) return self.get_parent(channel="shell") # Time to sleep after flushing the stdout/err buffers in each execute # cycle. While this introduces a hard limit on the minimal latency of the # execute cycle, it helps prevent output synchronization problems for # clients. # Units are in seconds. The minimum zmq latency on local host is probably # ~150 microseconds, set this to 500us for now. We may need to increase it # a little if it's not enough after more interactive testing. _execute_sleep = Float(0.0005).tag(config=True) # Frequency of the kernel's event loop. # Units are in seconds, kernel subclasses for GUI toolkits may need to # adapt to milliseconds. _poll_interval = Float(0.01).tag(config=True) stop_on_error_timeout = Float( 0.0, config=True, help="""time (in seconds) to wait for messages to arrive when aborting queued requests after an error. Requests that arrive within this window after an error will be cancelled. Increase in the event of unusually slow network causing significant delays, which can manifest as e.g. "Run all" in a notebook aborting some, but not all, messages after an error. """) # If the shutdown was requested over the network, we leave here the # necessary reply message so it can be sent by our registered atexit # handler. This ensures that the reply is only sent to clients truly at # the end of our shutdown process (which happens after the underlying # IPython shell's own shutdown). _shutdown_message = None # This is a dict of port number that the kernel is listening on. It is set # by record_ports and used by connect_request. _recorded_ports = Dict() # set of aborted msg_ids aborted = Set() # Track execution count here. For IPython, we override this to use the # execution count we store in the shell. execution_count = 0 msg_types = [ 'execute_request', 'complete_request', 'inspect_request', 'history_request', 'comm_info_request', 'kernel_info_request', 'connect_request', 'shutdown_request', 'is_complete_request', 'interrupt_request', # deprecated: 'apply_request', ] # add deprecated ipyparallel control messages control_msg_types = msg_types + [ 'clear_request', 'abort_request', 'debug_request' ] def __init__(self, **kwargs): super(Kernel, self).__init__(**kwargs) # Build dict of handlers for message types self.shell_handlers = {} for msg_type in self.msg_types: self.shell_handlers[msg_type] = getattr(self, msg_type) self.control_handlers = {} for msg_type in self.control_msg_types: self.control_handlers[msg_type] = getattr(self, msg_type) self.control_queue = Queue() def dispatch_control(self, msg): self.control_queue.put_nowait(msg) async def poll_control_queue(self): while True: msg = await self.control_queue.get() # handle tracers from _flush_control_queue if isinstance(msg, (concurrent.futures.Future, asyncio.Future)): msg.set_result(None) continue await self.process_control(msg) async def _flush_control_queue(self): """Flush the control queue, wait for processing of any pending messages""" if self.control_thread: control_loop = self.control_thread.io_loop # concurrent.futures.Futures are threadsafe # and can be used to await across threads tracer_future = concurrent.futures.Future() awaitable_future = asyncio.wrap_future(tracer_future) else: control_loop = self.io_loop tracer_future = awaitable_future = asyncio.Future() def _flush(): # control_stream.flush puts messages on the queue self.control_stream.flush() # put Future on the queue after all of those, # so we can wait for all queued messages to be processed self.control_queue.put(tracer_future) control_loop.add_callback(_flush) return awaitable_future async def process_control(self, msg): """dispatch control requests""" idents, msg = self.session.feed_identities(msg, copy=False) try: msg = self.session.deserialize(msg, content=True, copy=False) except Exception: self.log.error("Invalid Control Message", exc_info=True) return self.log.debug("Control received: %s", msg) # Set the parent message for side effects. self.set_parent(idents, msg, channel='control') self._publish_status('busy', 'control') header = msg['header'] msg_type = header['msg_type'] handler = self.control_handlers.get(msg_type, None) if handler is None: self.log.error("UNKNOWN CONTROL MESSAGE TYPE: %r", msg_type) else: try: result = handler(self.control_stream, idents, msg) if inspect.isawaitable(result): await result except Exception: self.log.error("Exception in control handler:", exc_info=True) sys.stdout.flush() sys.stderr.flush() self._publish_status('idle', 'control') # flush to ensure reply is sent self.control_stream.flush(zmq.POLLOUT) def should_handle(self, stream, msg, idents): """Check whether a shell-channel message should be handled Allows subclasses to prevent handling of certain messages (e.g. aborted requests). """ msg_id = msg['header']['msg_id'] if msg_id in self.aborted: # is it safe to assume a msg_id will not be resubmitted? self.aborted.remove(msg_id) self._send_abort_reply(stream, msg, idents) return False return True async def dispatch_shell(self, msg): """dispatch shell requests""" # flush control queue before handling shell requests await self._flush_control_queue() idents, msg = self.session.feed_identities(msg, copy=False) try: msg = self.session.deserialize(msg, content=True, copy=False) except Exception: self.log.error("Invalid Message", exc_info=True) return # Set the parent message for side effects. self.set_parent(idents, msg, channel='shell') self._publish_status('busy', 'shell') msg_type = msg['header']['msg_type'] # Only abort execute requests if self._aborting and msg_type == 'execute_request': self._send_abort_reply(self.shell_stream, msg, idents) self._publish_status('idle', 'shell') # flush to ensure reply is sent before # handling the next request self.shell_stream.flush(zmq.POLLOUT) return # Print some info about this message and leave a '--->' marker, so it's # easier to trace visually the message chain when debugging. Each # handler prints its message at the end. self.log.debug('\n*** MESSAGE TYPE:%s***', msg_type) self.log.debug(' Content: %s\n --->\n ', msg['content']) if not self.should_handle(self.shell_stream, msg, idents): return handler = self.shell_handlers.get(msg_type, None) if handler is None: self.log.warning("Unknown message type: %r", msg_type) else: self.log.debug("%s: %s", msg_type, msg) try: self.pre_handler_hook() except Exception: self.log.debug("Unable to signal in pre_handler_hook:", exc_info=True) try: result = handler(self.shell_stream, idents, msg) if inspect.isawaitable(result): await result except Exception: self.log.error("Exception in message handler:", exc_info=True) except KeyboardInterrupt: # Ctrl-c shouldn't crash the kernel here. self.log.error("KeyboardInterrupt caught in kernel.") finally: try: self.post_handler_hook() except Exception: self.log.debug("Unable to signal in post_handler_hook:", exc_info=True) sys.stdout.flush() sys.stderr.flush() self._publish_status('idle', 'shell') # flush to ensure reply is sent before # handling the next request self.shell_stream.flush(zmq.POLLOUT) def pre_handler_hook(self): """Hook to execute before calling message handler""" # ensure default_int_handler during handler call self.saved_sigint_handler = signal(SIGINT, default_int_handler) def post_handler_hook(self): """Hook to execute after calling message handler""" signal(SIGINT, self.saved_sigint_handler) def enter_eventloop(self): """enter eventloop""" self.log.info("Entering eventloop %s", self.eventloop) # record handle, so we can check when this changes eventloop = self.eventloop if eventloop is None: self.log.info("Exiting as there is no eventloop") return def advance_eventloop(): # check if eventloop changed: if self.eventloop is not eventloop: self.log.info("exiting eventloop %s", eventloop) return if self.msg_queue.qsize(): self.log.debug("Delaying eventloop due to waiting messages") # still messages to process, make the eventloop wait schedule_next() return self.log.debug("Advancing eventloop %s", eventloop) try: eventloop(self) except KeyboardInterrupt: # Ctrl-C shouldn't crash the kernel self.log.error("KeyboardInterrupt caught in kernel") pass if self.eventloop is eventloop: # schedule advance again schedule_next() def schedule_next(): """Schedule the next advance of the eventloop""" # flush the eventloop every so often, # giving us a chance to handle messages in the meantime self.log.debug("Scheduling eventloop advance") self.io_loop.call_later(0.001, advance_eventloop) # begin polling the eventloop schedule_next() async def do_one_iteration(self): """Process a single shell message Any pending control messages will be flushed as well .. versionchanged:: 5 This is now a coroutine """ # flush messages off of shell stream into the message queue self.shell_stream.flush() # process at most one shell message per iteration await self.process_one(wait=False) async def process_one(self, wait=True): """Process one request Returns None if no message was handled. """ if wait: t, dispatch, args = await self.msg_queue.get() else: try: t, dispatch, args = self.msg_queue.get_nowait() except asyncio.QueueEmpty: return None await dispatch(*args) async def dispatch_queue(self): """Coroutine to preserve order of message handling Ensures that only one message is processing at a time, even when the handler is async """ while True: try: await self.process_one() except Exception: self.log.exception("Error in message handler") _message_counter = Any(help="""Monotonic counter of messages """, ) @default('_message_counter') def _message_counter_default(self): return itertools.count() def schedule_dispatch(self, dispatch, *args): """schedule a message for dispatch""" idx = next(self._message_counter) self.msg_queue.put_nowait(( idx, dispatch, args, )) # ensure the eventloop wakes up self.io_loop.add_callback(lambda: None) def start(self): """register dispatchers for streams""" self.io_loop = ioloop.IOLoop.current() self.msg_queue = Queue() self.io_loop.add_callback(self.dispatch_queue) self.control_stream.on_recv(self.dispatch_control, copy=False) if self.control_thread: control_loop = self.control_thread.io_loop else: control_loop = self.io_loop asyncio.run_coroutine_threadsafe(self.poll_control_queue(), control_loop.asyncio_loop) self.shell_stream.on_recv( partial( self.schedule_dispatch, self.dispatch_shell, ), copy=False, ) # publish idle status self._publish_status('starting', 'shell') def record_ports(self, ports): """Record the ports that this kernel is using. The creator of the Kernel instance must call this methods if they want the :meth:`connect_request` method to return the port numbers. """ self._recorded_ports = ports #--------------------------------------------------------------------------- # Kernel request handlers #--------------------------------------------------------------------------- def _publish_execute_input(self, code, parent, execution_count): """Publish the code request on the iopub stream.""" self.session.send(self.iopub_socket, 'execute_input', { 'code': code, 'execution_count': execution_count }, parent=parent, ident=self._topic('execute_input')) def _publish_status(self, status, channel, parent=None): """send status (busy/idle) on IOPub""" self.session.send( self.iopub_socket, "status", {"execution_state": status}, parent=parent or self.get_parent(channel), ident=self._topic("status"), ) def _publish_debug_event(self, event): self.session.send( self.iopub_socket, "debug_event", event, parent=self.get_parent("control"), ident=self._topic("debug_event"), ) def set_parent(self, ident, parent, channel='shell'): """Set the current parent request Side effects (IOPub messages) and replies are associated with the request that caused them via the parent_header. The parent identity is used to route input_request messages on the stdin channel. """ self._parent_ident[channel] = ident self._parents[channel] = parent def get_parent(self, channel="shell"): """Get the parent request associated with a channel. .. versionadded:: 6 Parameters ---------- channel : str the name of the channel ('shell' or 'control') Returns ------- message : dict the parent message for the most recent request on the channel. """ return self._parents.get(channel, {}) def send_response(self, stream, msg_or_type, content=None, ident=None, buffers=None, track=False, header=None, metadata=None, channel='shell'): """Send a response to the message we're currently processing. This accepts all the parameters of :meth:`jupyter_client.session.Session.send` except ``parent``. This relies on :meth:`set_parent` having been called for the current message. """ return self.session.send( stream, msg_or_type, content, self.get_parent(channel), ident, buffers, track, header, metadata, ) def init_metadata(self, parent): """Initialize metadata. Run at the beginning of execution requests. """ # FIXME: `started` is part of ipyparallel # Remove for yapkernel 5.0 return { 'started': now(), } def finish_metadata(self, parent, metadata, reply_content): """Finish populating metadata. Run after completing an execution request. """ return metadata async def execute_request(self, stream, ident, parent): """handle an execute_request""" try: content = parent['content'] code = content['code'] silent = content['silent'] store_history = content.get('store_history', not silent) user_expressions = content.get('user_expressions', {}) allow_stdin = content.get('allow_stdin', False) except Exception: self.log.error("Got bad msg: ") self.log.error("%s", parent) return stop_on_error = content.get('stop_on_error', True) metadata = self.init_metadata(parent) # Re-broadcast our input for the benefit of listening clients, and # start computing output if not silent: self.execution_count += 1 self._publish_execute_input(code, parent, self.execution_count) reply_content = self.do_execute( code, silent, store_history, user_expressions, allow_stdin, ) if inspect.isawaitable(reply_content): reply_content = await reply_content # Flush output before sending the reply. sys.stdout.flush() sys.stderr.flush() # FIXME: on rare occasions, the flush doesn't seem to make it to the # clients... This seems to mitigate the problem, but we definitely need # to better understand what's going on. if self._execute_sleep: time.sleep(self._execute_sleep) # Send the reply. reply_content = json_clean(reply_content) metadata = self.finish_metadata(parent, metadata, reply_content) reply_msg = self.session.send(stream, 'execute_reply', reply_content, parent, metadata=metadata, ident=ident) self.log.debug("%s", reply_msg) if not silent and reply_msg['content'][ 'status'] == 'error' and stop_on_error: await self._abort_queues() def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False): """Execute user code. Must be overridden by subclasses. """ raise NotImplementedError async def complete_request(self, stream, ident, parent): content = parent['content'] code = content['code'] cursor_pos = content['cursor_pos'] matches = self.do_complete(code, cursor_pos) if inspect.isawaitable(matches): matches = await matches matches = json_clean(matches) self.session.send(stream, "complete_reply", matches, parent, ident) def do_complete(self, code, cursor_pos): """Override in subclasses to find completions. """ return { 'matches': [], 'cursor_end': cursor_pos, 'cursor_start': cursor_pos, 'metadata': {}, 'status': 'ok' } async def inspect_request(self, stream, ident, parent): content = parent['content'] reply_content = self.do_inspect( content['code'], content['cursor_pos'], content.get('detail_level', 0), ) if inspect.isawaitable(reply_content): reply_content = await reply_content # Before we send this object over, we scrub it for JSON usage reply_content = json_clean(reply_content) msg = self.session.send(stream, 'inspect_reply', reply_content, parent, ident) self.log.debug("%s", msg) def do_inspect(self, code, cursor_pos, detail_level=0): """Override in subclasses to allow introspection. """ return {'status': 'ok', 'data': {}, 'metadata': {}, 'found': False} async def history_request(self, stream, ident, parent): content = parent['content'] reply_content = self.do_history(**content) if inspect.isawaitable(reply_content): reply_content = await reply_content reply_content = json_clean(reply_content) msg = self.session.send(stream, 'history_reply', reply_content, parent, ident) self.log.debug("%s", msg) def do_history(self, hist_access_type, output, raw, session=None, start=None, stop=None, n=None, pattern=None, unique=False): """Override in subclasses to access history. """ return {'status': 'ok', 'history': []} async def connect_request(self, stream, ident, parent): if self._recorded_ports is not None: content = self._recorded_ports.copy() else: content = {} content['status'] = 'ok' msg = self.session.send(stream, 'connect_reply', content, parent, ident) self.log.debug("%s", msg) @property def kernel_info(self): return { 'protocol_version': kernel_protocol_version, 'implementation': self.implementation, 'implementation_version': self.implementation_version, 'language_info': self.language_info, 'banner': self.banner, 'help_links': self.help_links, } async def kernel_info_request(self, stream, ident, parent): content = {'status': 'ok'} content.update(self.kernel_info) msg = self.session.send(stream, 'kernel_info_reply', content, parent, ident) self.log.debug("%s", msg) async def comm_info_request(self, stream, ident, parent): content = parent['content'] target_name = content.get('target_name', None) # Should this be moved to ipkernel? if hasattr(self, 'comm_manager'): comms = { k: dict(target_name=v.target_name) for (k, v) in self.comm_manager.comms.items() if v.target_name == target_name or target_name is None } else: comms = {} reply_content = dict(comms=comms, status='ok') msg = self.session.send(stream, 'comm_info_reply', reply_content, parent, ident) self.log.debug("%s", msg) async def interrupt_request(self, stream, ident, parent): pid = os.getpid() pgid = os.getpgid(pid) if os.name == "nt": self.log.error("Interrupt message not supported on Windows") else: # Prefer process-group over process if pgid and hasattr(os, "killpg"): try: os.killpg(pgid, SIGINT) return except OSError: pass try: os.kill(pid, SIGINT) except OSError: pass content = parent['content'] self.session.send(stream, 'interrupt_reply', content, parent, ident=ident) return async def shutdown_request(self, stream, ident, parent): content = self.do_shutdown(parent['content']['restart']) if inspect.isawaitable(content): content = await content self.session.send(stream, 'shutdown_reply', content, parent, ident=ident) # same content, but different msg_id for broadcasting on IOPub self._shutdown_message = self.session.msg('shutdown_reply', content, parent) self._at_shutdown() self.log.debug('Stopping control ioloop') control_io_loop = self.control_stream.io_loop control_io_loop.add_callback(control_io_loop.stop) self.log.debug('Stopping shell ioloop') shell_io_loop = self.shell_stream.io_loop shell_io_loop.add_callback(shell_io_loop.stop) def do_shutdown(self, restart): """Override in subclasses to do things when the frontend shuts down the kernel. """ return {'status': 'ok', 'restart': restart} async def is_complete_request(self, stream, ident, parent): content = parent['content'] code = content['code'] reply_content = self.do_is_complete(code) if inspect.isawaitable(reply_content): reply_content = await reply_content reply_content = json_clean(reply_content) reply_msg = self.session.send(stream, 'is_complete_reply', reply_content, parent, ident) self.log.debug("%s", reply_msg) def do_is_complete(self, code): """Override in subclasses to find completions. """ return {'status': 'unknown'} async def debug_request(self, stream, ident, parent): content = parent['content'] reply_content = self.do_debug_request(content) if inspect.isawaitable(reply_content): reply_content = await reply_content reply_content = json_clean(reply_content) reply_msg = self.session.send(stream, 'debug_reply', reply_content, parent, ident) self.log.debug("%s", reply_msg) async def do_debug_request(self, msg): raise NotImplementedError #--------------------------------------------------------------------------- # Engine methods (DEPRECATED) #--------------------------------------------------------------------------- async def apply_request(self, stream, ident, parent): self.log.warning( "apply_request is deprecated in kernel_base, moving to ipyparallel." ) try: content = parent['content'] bufs = parent['buffers'] msg_id = parent['header']['msg_id'] except Exception: self.log.error("Got bad msg: %s", parent, exc_info=True) return md = self.init_metadata(parent) reply_content, result_buf = self.do_apply(content, bufs, msg_id, md) # flush i/o sys.stdout.flush() sys.stderr.flush() md = self.finish_metadata(parent, md, reply_content) self.session.send(stream, 'apply_reply', reply_content, parent=parent, ident=ident, buffers=result_buf, metadata=md) def do_apply(self, content, bufs, msg_id, reply_metadata): """DEPRECATED""" raise NotImplementedError #--------------------------------------------------------------------------- # Control messages (DEPRECATED) #--------------------------------------------------------------------------- async def abort_request(self, stream, ident, parent): """abort a specific msg by id""" self.log.warning( "abort_request is deprecated in kernel_base. It is only part of IPython parallel" ) msg_ids = parent['content'].get('msg_ids', None) if isinstance(msg_ids, str): msg_ids = [msg_ids] if not msg_ids: self._abort_queues() for mid in msg_ids: self.aborted.add(str(mid)) content = dict(status='ok') reply_msg = self.session.send(stream, 'abort_reply', content=content, parent=parent, ident=ident) self.log.debug("%s", reply_msg) async def clear_request(self, stream, idents, parent): """Clear our namespace.""" self.log.warning( "clear_request is deprecated in kernel_base. It is only part of IPython parallel" ) content = self.do_clear() self.session.send(stream, 'clear_reply', ident=idents, parent=parent, content=content) def do_clear(self): """DEPRECATED since 4.0.3""" raise NotImplementedError #--------------------------------------------------------------------------- # Protected interface #--------------------------------------------------------------------------- def _topic(self, topic): """prefixed topic for IOPub messages""" base = "kernel.%s" % self.ident return ("%s.%s" % (base, topic)).encode() _aborting = Bool(False) async def _abort_queues(self): self.shell_stream.flush() self._aborting = True def stop_aborting(): self.log.info("Finishing abort") self._aborting = False asyncio.get_event_loop().call_later(self.stop_on_error_timeout, stop_aborting) def _send_abort_reply(self, stream, msg, idents): """Send a reply to an aborted request""" self.log.info( f"Aborting {msg['header']['msg_id']}: {msg['header']['msg_type']}") reply_type = msg["header"]["msg_type"].rsplit("_", 1)[0] + "_reply" status = {"status": "aborted"} md = self.init_metadata(msg) md = self.finish_metadata(msg, md, status) md.update(status) self.session.send( stream, reply_type, metadata=md, content=status, parent=msg, ident=idents, ) def _no_raw_input(self): """Raise StdinNotImplementedError if active frontend doesn't support stdin.""" raise StdinNotImplementedError("raw_input was called, but this " "frontend does not support stdin.") def getpass(self, prompt='', stream=None): """Forward getpass to frontends Raises ------ StdinNotImplementedError if active frontend doesn't support stdin. """ if not self._allow_stdin: raise StdinNotImplementedError( "getpass was called, but this frontend does not support input requests." ) if stream is not None: import warnings warnings.warn( "The `stream` parameter of `getpass.getpass` will have no effect when using yapkernel", UserWarning, stacklevel=2, ) return self._input_request( prompt, self._parent_ident["shell"], self.get_parent("shell"), password=True, ) def raw_input(self, prompt=''): """Forward raw_input to frontends Raises ------ StdinNotImplementedError if active frontend doesn't support stdin. """ if not self._allow_stdin: raise StdinNotImplementedError( "raw_input was called, but this frontend does not support input requests." ) return self._input_request( str(prompt), self._parent_ident["shell"], self.get_parent("shell"), password=False, ) def _input_request(self, prompt, ident, parent, password=False): # Flush output before making the request. sys.stderr.flush() sys.stdout.flush() # flush the stdin socket, to purge stale replies while True: try: self.stdin_socket.recv_multipart(zmq.NOBLOCK) except zmq.ZMQError as e: if e.errno == zmq.EAGAIN: break else: raise # Send the input request. content = json_clean(dict(prompt=prompt, password=password)) self.session.send(self.stdin_socket, 'input_request', content, parent, ident=ident) # Await a response. while True: try: # Use polling with select() so KeyboardInterrupts can get # through; doing a blocking recv() means stdin reads are # uninterruptible on Windows. We need a timeout because # zmq.select() is also uninterruptible, but at least this # way reads get noticed immediately and KeyboardInterrupts # get noticed fairly quickly by human response time standards. rlist, _, xlist = zmq.select([self.stdin_socket], [], [self.stdin_socket], 0.01) if rlist or xlist: ident, reply = self.session.recv(self.stdin_socket) if (ident, reply) != (None, None): break except KeyboardInterrupt: # re-raise KeyboardInterrupt, to truncate traceback raise KeyboardInterrupt("Interrupted by user") from None except Exception: self.log.warning("Invalid Message:", exc_info=True) try: value = reply["content"]["value"] except Exception: self.log.error("Bad input_reply: %s", parent) value = '' if value == '\x04': # EOF raise EOFError return value def _at_shutdown(self): """Actions taken at shutdown by the kernel, called by python's atexit. """ if self._shutdown_message is not None: self.session.send(self.iopub_socket, self._shutdown_message, ident=self._topic('shutdown')) self.log.debug("%s", self._shutdown_message) self.control_stream.flush(zmq.POLLOUT)
def iterable_to_queue(iterable: Iterable) -> Queue: queue = Queue() for item in iterable: queue.put_nowait(item) return queue
class Crawler(Index): def __init__(self, openidList, max_tries=3, max_tasks=10, _loop=None): self.loop = _loop or asyncio.get_event_loop() # 事件循环 self.max_tries = max_tries # 出错重试次数 self.max_tasks = max_tasks # 并发任务数 self.urls_queue = Queue(loop=self.loop) # 地址队列 self.ClientSession = aiohttp.ClientSession( loop=self.loop) # aiohttp的session,get地址数据对象 for openid in openidList: # 将所有连接put到队列中 self.urls_queue.put_nowait(openid) self.started_at = datetime.now() # 开始计时 self.end_at = None def close(self): # 关闭aiohttp的Session对象 self.ClientSession.close() async def handle(self, openid, bid, wxClass, unionArr, proveinList, citiesList, self_db, logger): tries = 0 while tries < self.max_tries: # 取不到数据会重试3次 try: url = "https://api.weixin.qq.com/cgi-bin/user/info?access_token=" + wxClass.AccessToken + "&openid=" + openid + "&lang=zh_CN" # with aiohttp.Timeout(2): response = await self.ClientSession.get(url, allow_redirects=False ) # 不禁用重定向的取数据 jsonArr = await response.json() # 异步接收返回数据 if 'errcode' not in jsonArr: break except aiohttp.ClientError: # await response.release() # 异步释放资源 # break pass # time.sleep(2) tries += 1 try: # text = await response.text()#异步接收返回数据 print('------tries---------:%d' % tries) print(jsonArr) if 'errcode' in jsonArr: self.AppLogging.warning("get user infois error:%s", jsonArr['errcode']) else: # weixinOpenidList[openid] = jsonArr self.doDBwork(openid, bid, jsonArr, unionArr, proveinList, citiesList, self_db, logger) finally: await response.release() # 异步释放资源 async def work(self, bid, wxClass, unionArr, proveinList, citiesList, self_db, logger): try: while True: openid = await self.urls_queue.get() # 队列中取openid await self.handle(openid, bid, wxClass, unionArr, proveinList, citiesList, self_db, logger) # 子方法去取数据 time.sleep(sleep_interval) # 线程睡眠 self.urls_queue.task_done() # 没有任务后结束 except asyncio.CancelledError: pass async def run(self, bid, wxClass, unionArr, proveinList, citiesList, self_db, logger): # 开启多个工作携程来执行 workers = [ asyncio.Task(self.work(bid, wxClass, unionArr, proveinList, citiesList, self_db, logger), loop=self.loop) for _ in range(self.max_tasks) ] self.started_at = datetime.now() # 程序开始时间 await self.urls_queue.join() # 连接join到队列中 self.end_at = datetime.now() # 结束时间 for w in workers: w.cancel() # 释放携程 def doDBwork(self, wxOpenid, bid, tmplist, unionArr, proveinList, citiesList, self_db, logger): wx = online_userinfo_weixin dbtype, vlist = self.dowork(tmplist, unionArr, proveinList, citiesList) print('---------vvvvvvv----------', dbtype) print(vlist) if dbtype == 1: # 插入 dblist = { 'bid': bid, 'openid': vlist['openid'], 'unionid': vlist['unionid'], 'groupIds': vlist['groupIds'], 'sex': vlist['sex'], 'nickname': vlist['nickname'], 'remark': vlist['remark'], 'subscribe': vlist['subscribe'], 'subscribe_time': vlist['subscribe_time'], 'thumb': vlist['headimgurl'], 'pid': vlist['pid'], 'cid': vlist['cid'], 'uptime': vlist['uptime'], 'intime': vlist['intime'], 'indate': vlist['indate'], } if 'userId' in vlist: dblist['userId'] = vlist['userId'] global insertCount insertCount += 1 print(dblist) # raise try: rr = self_db.execute(wx.__table__.insert(), dblist) self_db.commit() logger.info('bid=%s,insetr to db %s', bid, dblist) except: print('----db insert error-----') elif dbtype == 2: # 更新 dblist = { 'unionid': vlist['unionid'], 'sex': vlist['sex'], 'nickname': vlist['nickname'], 'remark': vlist['remark'], 'subscribe': vlist['subscribe'], 'subscribe_time': vlist['subscribe_time'], 'thumb': vlist['headimgurl'], 'pid': vlist['pid'], 'cid': vlist['cid'], 'uptime': vlist['uptime'], 'groupIds': vlist['groupIds'], } if 'userId' in vlist: dblist['userId'] = vlist['userId'] global updateCount updateCount += 1 try: updateUserId = bOpenidList[wxOpenid] rr = self_db.query(wx).filter(wx.id == updateUserId).update( dblist, synchronize_session=False) self_db.commit() logger.info('bid=%s,update to db %s', bid, dblist) except: print('-----db update error-------') # global bOpenidList bOpenidList.pop(wxOpenid) # self_db.commit() else: pass def dowork(self, tmplist, unionArr, proveinList, citiesList): dbtype = 1 # 1插入2更新3取消关注 indate = int( time.mktime( time.strptime(time.strftime('%Y-%m-%d', time.localtime()), '%Y-%m-%d'))) intime = int(time.time()) if not tmplist: return None, None if not isinstance(tmplist, (dict)): return None, None # subscribe if not 'openid' in tmplist: return None, None openid = tmplist['openid'] tmplist['intime'] = intime tmplist['uptime'] = intime tmplist['indate'] = indate # nickname if 'nickname' not in tmplist: tmplist['nickname'] = '' else: pass # if isinstance(tmplist['nickname'],bytes): # pass # else: # tmplist['nickname']=tmplist['nickname'].encode('unicode-escape') # headimgurl if 'headimgurl' not in tmplist: tmplist['headimgurl'] = '' # subscribe_time if 'subscribe_time' not in tmplist: tmplist['subscribe_time'] = 0 # remark if 'remark' not in tmplist: tmplist['remark'] = '' # 省分id if 'province' in tmplist: tmplist['pid'] = self.defindName(tmplist['province'], proveinList) else: tmplist['pid'] = 0 # 查找市 if 'city' in tmplist: tmplist['cid'] = self.defindName(tmplist['city'], citiesList) else: tmplist['cid'] = 0 # 性别 if 'sex' not in tmplist: tmplist['sex'] = 0 # subscribe if 'subscribe' not in tmplist: tmplist['subscribe'] = 0 # userId if 'unionid' in tmplist: findunionid = tmplist['unionid'] if findunionid in unionArr: userId = unionArr[findunionid] if userId: tmplist['userId'] = userId else: tmplist['unionid'] = '' # 用户分组 if (tmplist['subscribe'] == 0): # 取消关注清空一些数据 tmplist['groupId0'] = 0 tmplist['groupId1'] = 0 tmplist['groupId2'] = 0 tmplist['groupIds'] = '' tmplist['subscribe'] = 0 tmplist['subscribe_time'] = 0 dbtype = 3 # 取消关注 else: h = '' for ts in tmplist['tagid_list']: # print(ts) h += str(ts) + ',' tmplist['groupIds'] = h[:-1] # 存在就更新 if openid in bOpenidList: dbtype = 2 else: dbtype = 1 return dbtype, tmplist
class Rx(PrettyPrintable): def __init__(self, rx_tree, session_id, header_table=None, io_loop=None, service_name=None, raw_headers=None, trace_id=None): if header_table is None: header_table = CocaineHeaders() if io_loop: warnings.warn('io_loop argument is deprecated.', DeprecationWarning) # If it's not the main thread # and a current IOloop doesn't exist here, # IOLoop.instance becomes self._io_loop self._io_loop = io_loop or IOLoop.current() self._queue = Queue() self._done = False self.session_id = session_id self.service_name = service_name self.rx_tree = rx_tree self.default_protocol = detect_protocol_type(rx_tree) self._headers = header_table self._current_headers = self._headers.merge(raw_headers) self.log = get_trace_adapter(log, trace_id) @coroutine def get(self, timeout=0, protocol=None): if self._done and self._queue.empty(): raise ChokeEvent() # to pull various service errors if timeout <= 0: item = yield self._queue.get() else: deadline = datetime.timedelta(seconds=timeout) item = yield self._queue.get(deadline) if isinstance(item, Exception): raise item if protocol is None: protocol = self.default_protocol name, payload, raw_headers = item self._current_headers = self._headers.merge(raw_headers) res = protocol(name, payload) if isinstance(res, ProtocolError): raise ServiceError(self.service_name, res.reason, res.code, res.category) else: raise Return(res) def done(self): self._done = True def push(self, msg_type, payload, raw_headers): dispatch = self.rx_tree.get(msg_type) self.log.debug("dispatch %s %.300s", dispatch, payload) if dispatch is None: raise InvalidMessageType(self.service_name, CocaineErrno.INVALIDMESSAGETYPE, "unexpected message type %s" % msg_type) name, rx = dispatch self.log.info("got message from `%s`: channel id: %s, type: %s", self.service_name, self.session_id, name) self._queue.put_nowait((name, payload, raw_headers)) if rx == {}: # the last transition self.done() elif rx is not None: # not a recursive transition self.rx_tree = rx def error(self, err): self._queue.put_nowait(err) def closed(self): return self._done def _format(self): return "name: %s, queue: %s, done: %s" % (self.service_name, self._queue, self._done) @property def headers(self): return self._current_headers
class BatchedStream(object): """ Mostly obsolete, see BatchedSend """ def __init__(self, stream, interval): self.stream = stream self.interval = interval / 1000.0 self.last_transmission = default_timer() self.send_q = Queue() self.recv_q = Queue() self._background_send_coroutine = self._background_send() self._background_recv_coroutine = self._background_recv() self._broken = None self.pc = PeriodicCallback(lambda: None, 100) self.pc.start() @gen.coroutine def _background_send(self): with log_errors(): while True: msg = yield self.send_q.get() if msg == "close": break msgs = [msg] now = default_timer() wait_time = self.last_transmission + self.interval - now if wait_time > 0: yield gen.sleep(wait_time) while not self.send_q.empty(): msgs.append(self.send_q.get_nowait()) try: yield write(self.stream, msgs) except StreamClosedError: self.recv_q.put_nowait("close") self._broken = True break if len(msgs) > 1: logger.debug("Batched messages: %d", len(msgs)) for _ in msgs: self.send_q.task_done() @gen.coroutine def _background_recv(self): with log_errors(): while True: try: msgs = yield read(self.stream) except StreamClosedError: self.recv_q.put_nowait("close") self.send_q.put_nowait("close") self._broken = True break assert isinstance(msgs, list) if len(msgs) > 1: logger.debug("Batched messages: %d", len(msgs)) for msg in msgs: self.recv_q.put_nowait(msg) @gen.coroutine def flush(self): yield self.send_q.join() @gen.coroutine def send(self, msg): if self._broken: raise StreamClosedError("Batch Stream is Closed") else: self.send_q.put_nowait(msg) @gen.coroutine def recv(self): result = yield self.recv_q.get() if result == "close": raise StreamClosedError("Batched Stream is Closed") else: raise gen.Return(result) @gen.coroutine def close(self): yield self.flush() raise gen.Return(self.stream.close()) def closed(self): return self.stream.closed()
class CommandTopic(object): def __init__(self, topic_name, maxsize=10): self.topic_name = topic_name self.q_maxsize = maxsize self.q = Queue(maxsize=maxsize) self.sem = BoundedSemaphoreWithValue(1) self.mid_gen = _MidGenerator() self.gc_flag = False self.io_loop = ioloop.IOLoop.current() def issue(self, ft, cmd, block, timeout): if 'mid' not in cmd: cmd['mid'] = next(self.mid_gen) try: h_timeout = self.io_loop.call_later(timeout, self.cmd_timeout, ft, cmd) ft.add_done_callback(lambda _: self.io_loop.remove_timeout(h_timeout)) # [ft, cmd, block] self.q.put_nowait([ft, cmd, block]) except Exception as e: ft.set_exception(e) return ft def cmd_timeout(self, ft, cmd): ft.set_exception(gen.TimeoutError('Timeout in waiting response of command: {}'.format(str(cmd)))) @gen.coroutine def start_cmd(self): yield self.sem.acquire() item = yield self.q.get() if not item: self.sem.release() raise gen.Return(None) self.queue_item_under_process = item ft, cmd, block = item # a special case should be handled: # the network is very slow so the future timed out before semaphore acquirement is done if ft.done(): raise gen.Return(None) ft.add_done_callback(lambda _: self.sem.release()) if not block: # notification type cmd, dont wait response ft.set_result(None) raise gen.Return(cmd) def finish_cmd(self, mid, resp): ft, cmd, block = self.queue_item_under_process if not ft.done() and mid == cmd['mid']: ft.set_result(resp) def start_listen(self): if self.sem.value() == 0: self.q.put(None) self.gc_flag = False def stop_listen(self): if hasattr(self, 'pending_item'): ft, cmd, block = self.queue_item_under_process if not ft.done(): ft.set_result(None) self.q.put(None) self.gc_flag = True def is_marked_gc(self): return self.gc_flag