class ServiceQueue: def __init__(self, loop, executor, queue, route, user_function, total_instances): self.platform = Platform() self.util = Utility() self.log = self.platform.log queue_dir = self.util.normalize_path(self.platform.work_dir + "/queues/" + self.platform.get_origin()) self.disk_queue = ElasticQueue(queue_dir=queue_dir, queue_id=route) self._loop = loop self._executor = executor self.queue = queue self.route = route self.user_function = user_function self.ready_queue = asyncio.Queue(loop=self._loop) self.worker_list = dict() self._peek_worker = None self._buffering = True self._interceptor = total_instances == 0 self._singleton = True if total_instances < 1 else False self._loop.create_task(self.listen(total_instances)) def peek_next_worker(self): if self._peek_worker is None: self._peek_worker = self._fetch_next_worker() return self._peek_worker def get_next_worker(self): if self._peek_worker is not None: result = self._peek_worker self._peek_worker = None return result return self._fetch_next_worker() def _fetch_next_worker(self): try: worker_number = self.ready_queue.get_nowait() if worker_number: self.ready_queue.task_done() return worker_number except QueueEmpty: return None def send_to_worker(self, item): worker_number = self.get_next_worker() if worker_number: wq = self.worker_list[worker_number] if wq: wq.put_nowait(item) else: self.log.error("Event for " + self.route + " dropped because worker #" + str(worker_number) + "not found") else: self.log.error("Event for " + self.route + " dropped because there are no workers available") async def listen(self, total_instances): # create concurrent workers and total = 1 if self._singleton else total_instances for i in range(total): instance_number = i + 1 worker_queue = asyncio.Queue(loop=self._loop) self.worker_list[instance_number] = worker_queue WorkerQueue(self._loop, self._executor, self.queue, worker_queue, self.route, self.user_function, instance_number, self._singleton, self._interceptor) # populate the ready queue with an initial set of worker numbers await self.queue.put(instance_number) route_type = 'PRIVATE' if self.platform.route_is_private( self.route) else 'PUBLIC' # minimize logging for temporary inbox that starts with the "r" prefix if self._interceptor and self.util.is_inbox(self.route): self.log.debug(route_type + ' ' + self.route + " with " + str(total) + " instance" + ('s' if total > 1 else '') + " started") else: self.log.info(route_type + ' ' + self.route + " with " + str(total) + " instance" + ('s' if total > 1 else '') + " started") # listen for incoming events while True: event = await self.queue.get() self.queue.task_done() if event is None: break else: if isinstance(event, int): # ready signal from a worker await self.ready_queue.put(event) if self._buffering: buffered = self.disk_queue.read() if buffered: self.send_to_worker(buffered) else: # nothing buffered in disk queue self._buffering = False self.disk_queue.close() if isinstance(event, dict): # it is a data item if self._buffering: # Once buffering is started, continue to spool items to disk to guarantee items in order await self.disk_queue.write(event) else: w = self.peek_next_worker() if w: # Nothing buffered in disk queue. Find a worker to receive the item. self.send_to_worker(event) else: # start buffered because there are no available workers self._buffering = True await self.disk_queue.write(event) # tell workers to stop for i in self.worker_list: wq = self.worker_list[i] wq.put_nowait(None) # destroy disk queue self.disk_queue.destroy() # minimize logging for temporary inbox that starts with the "r" prefix if self._interceptor and self.util.is_inbox(self.route): self.log.debug(self.route + " stopped") else: self.log.info(self.route + " stopped")
class WorkerQueue: DISTRIBUTED_TRACING = 'distributed.tracing' def __init__(self, loop, executor, manager_queue, worker_queue, route, user_function, instance, singleton, interceptor): self.platform = Platform() self.util = Utility() self.log = self.platform.log self._loop = loop self._executor = executor self.manager_queue = manager_queue self.worker_queue = worker_queue self.route = route # trace all routes except ws.outgoing normal_service = not (interceptor and self.util.is_inbox(route)) self.tracing = normal_service and route != 'ws.outgoing' self.user_function = user_function self.instance = instance self.singleton = singleton self.interceptor = interceptor self._loop.create_task(self.listen()) self.log.debug(f'{self.route} #{self.instance} started') async def listen(self): while True: event = await self.worker_queue.get() self.worker_queue.task_done() if event is None: break else: # Execute the user function in parallel if self.interceptor: self._loop.run_in_executor(self._executor, self.handle_event, event, 0) elif self.singleton: self._loop.run_in_executor(self._executor, self.handle_event, event, -1) else: self._loop.run_in_executor(self._executor, self.handle_event, event, self.instance) self.log.debug(f'{self.route} #{self.instance} stopped') def handle_event(self, event, instance): headers = dict() if 'headers' not in event else event['headers'] body = None if 'body' not in event else event['body'] result = None error_code = None error_msg = None # start distributed tracing if the event contains trace_id and trace_path if 'trace_id' in event and 'trace_path' in event: self.platform.start_tracing(self.route, trace_id=event['trace_id'], trace_path=event['trace_path']) else: self.platform.start_tracing(self.route) # execute user function begin = end = time.perf_counter() has_error = False try: if instance == 0: # service is an interceptor. e.g. inbox for RPC call self.user_function(EventEnvelope().from_map(event)) elif instance == -1: # service is a singleton result = self.user_function(headers, body) else: # service with multiple instances result = self.user_function(headers, body, instance) end = time.perf_counter() except AppException as e: has_error = True error_code = e.get_status() error_msg = _normalize_exception('AppException', e) except ValueError as e: has_error = True error_code = 400 error_msg = _normalize_exception('ValueError', e) except Exception as e: has_error = True error_code = 500 error_msg = _normalize_exception(type(e).__name__, e) # execution time is rounded to 3 decimal points exec_time = round((end - begin) * 1000, 3) if error_code: if 'reply_to' in event: # set exception as result result = EventEnvelope().set_status(error_code).set_body( error_msg) else: self.log.warn( f'Unhandled exception for {self.route} - code={error_code}, message={error_msg}' ) # # interceptor should not send regular response because it will forward the request to another function. # However, if error_code exists, the system will send the exception response. # This allows interceptor to simply throw exception to indicate an error case. # if 'reply_to' in event and (error_code or not self.interceptor): reply_to = event['reply_to'] # in case this is an RPC call from within if reply_to.startswith('->'): reply_to = reply_to[2:] response = EventEnvelope().set_to(reply_to) if not error_code: response.set_exec_time(exec_time) if 'extra' in event: response.set_extra(event['extra']) if has_error: # adding the 'exception' tag would throw exception to the caller response.add_tag('exception') if 'cid' in event: response.set_correlation_id(event['cid']) if 'trace_id' in event and 'trace_path' in event: response.set_trace(event['trace_id'], event['trace_path']) if isinstance(result, EventEnvelope): for h in result.get_headers(): response.set_header(h, result.get_header(h)) response.set_body(result.get_body()) response.set_status(result.get_status()) else: response.set_body(result) try: self.platform.send_event(response.set_from(self.route)) except Exception as e: self.log.warn(f'Event dropped because {e}') # send tracing info to distributed trace logger trace_info = self.platform.stop_tracing() if self.tracing and trace_info is not None and isinstance(trace_info, TraceInfo) \ and trace_info.get_id() is not None and trace_info.get_path() is not None \ and self.platform.has_route(self.DISTRIBUTED_TRACING): dt = EventEnvelope().set_to(self.DISTRIBUTED_TRACING).set_body( trace_info.get_annotations()) dt.set_header('origin', self.platform.get_origin()) dt.set_header('id', trace_info.get_id()).set_header( 'path', trace_info.get_path()) dt.set_header('service', self.route).set_header('start', trace_info.get_start_time()) if 'from' in event: dt.set_header('from', event['from']) if not error_code: dt.set_header('success', 'true') dt.set_header('exec_time', exec_time) else: dt.set_header('success', 'false') dt.set_header('status', error_code) dt.set_header('exception', error_msg) self.platform.send_event(dt) self._loop.call_soon_threadsafe(self._ack) def _ack(self): self.manager_queue.put_nowait(self.instance)