def main(): platform = Platform() platform.connect_to_cloud() # wait until connected while not platform.cloud_ready(): try: time.sleep(0.1) except KeyboardInterrupt: # this allows us to stop the application while waiting for cloud connection platform.stop() return util = Utility() pubsub = PubSub() if pubsub.feature_enabled(): # Publish an event # headers = optional parameters for the event # body = event payload for x in range(10): print("publishing event#", x) pubsub.publish("hello.topic", headers={ "some_parameter": "some_value", "n": x }, body="hello world " + util.get_iso_8601(time.time())) # quit application platform.stop() else: print("Pub/Sub feature not available from the underlying event stream") print("Did you start the language connector with Kafka?") print( "e.g. java -Dcloud.connector=kafka -Dcloud.services=kafka.reporter -jar language-connector-1.12.31.jar" )
def __init__(self, data=None): self.util = Utility() self.normalized = False self.dataset = dict() if data is None else data if not isinstance(self.dataset, dict): raise ValueError('Invalid input - Expect: dict, Actual: ' + str(type(data)))
def __init__(self, config_file: str = None): if sys.version_info.major < 3: python_version = str(sys.version_info.major) + "." + str( sys.version_info.minor) raise RuntimeError("Requires python 3.6 and above. Actual: " + python_version) self.origin = 'py' + (''.join(str(uuid.uuid4()).split('-'))) self.config = ConfigReader(config_file) self.util = Utility() log_dir = self.config.get_property('log.directory') log_file = self.config.get_property('log.filename') log_level = self.config.get_property('log.level') self._max_threads = self.config.get('max.threads') self.work_dir = self.config.get_property('work.directory') self.log = LoggingService(log_dir=log_dir, log_file=log_file, log_level=log_level).get_logger() self._loop = asyncio.new_event_loop() # DO NOT CHANGE 'distributed.trace.processor' which is an optional user defined trace aggregator my_tracer = DistributedTrace(self, 'distributed.trace.processor') my_nc = self.config.get_property('network.connector') self._cloud = NetworkConnector(self, my_tracer, self._loop, my_nc, self.origin) self._function_queues = dict() self._executor = concurrent.futures.ThreadPoolExecutor( max_workers=self._max_threads) self.log.info("Concurrent thread pool = " + str(self._max_threads)) # # Before we figure out how to solve blocking file I/O, we will regulate event output rate. # my_test_dir = self.util.normalize_path(self.work_dir + "/test") if not os.path.exists(my_test_dir): os.makedirs(my_test_dir) self._throttle = Throttle(self.util.normalize_path(my_test_dir + "/to_be_deleted"), log=self.log) self._seq = 0 self.util.cleanup_dir(my_test_dir) self.log.info("Estimated performance is " + format(self._throttle.get_tps(), ',d') + " events per second") self.running = True self.stopped = False # distributed trace sessions self._traces = {} # start event loop in a new thread to avoid blocking the main thread def main_event_loop(): self.log.info("Event system started") self._loop.run_forever() self.log.info("Event system stopped") self._loop.close() threading.Thread(target=main_event_loop).start()
def __init__(self, log_dir='/tmp/log', log_file=None, log_level='INFO'): # automatically create log directory if not os.path.exists(log_dir): os.makedirs(log_dir) # DEBUG | INFO | WARN | ERROR | FATAL level = logging.INFO if log_level.upper() == 'DEBUG': level = logging.DEBUG elif log_level.upper() == 'ERROR': level = logging.ERROR elif log_level.upper() == 'WARN': level = logging.WARNING elif log_level.upper() == 'FATAL': level = logging.CRITICAL self.logger = logging.getLogger(log_file) self.logger.setLevel(level) ch = logging.StreamHandler() ch.setLevel(level) formatter = logging.Formatter( fmt='%(asctime)s %(levelname)s %(filename)s:%(lineno)s %(message)s' ) formatter.default_msec_format = '%s.%03d' ch.setFormatter(formatter) self.logger.addHandler(ch) if log_file is not None: filename = Utility().normalize_path(log_dir + '/' + log_file) + '.log' fh = RotatingFileHandler(filename, maxBytes=1024 * 1024, backupCount=10) fh.setLevel(level) fh.setFormatter(formatter) self.logger.addHandler(fh)
def publish_some_events(): util = Utility() ps = PubSub() if ps.feature_enabled(): # Publish an event # headers = optional parameters for the event # body = event payload for x in range(10): log.info(f'publishing event#{x}') ps.publish('hello.topic', headers={"some_parameter": "some_value", "n": x}, body="hello python - " + util.get_iso_8601(time.time())) else: print('Pub/Sub feature is not available from the underlying event stream') print('Did you start the language connector with cloud.connector=Kafka or cloud.services=kafka.pubsub?') print('e.g. java -Dcloud.connector=kafka -Dcloud.services=kafka.reporter -jar language-connector.jar') # quit application platform.stop()
def __init__(self, loop, executor, manager_queue, worker_queue, route, user_function, instance, singleton, interceptor): self.platform = Platform() self.util = Utility() self.log = self.platform.log self._loop = loop self._executor = executor self.manager_queue = manager_queue self.worker_queue = worker_queue self.route = route # trace all routes except ws.outgoing normal_service = not (interceptor and self.util.is_inbox(route)) self.tracing = normal_service and route != 'ws.outgoing' self.user_function = user_function self.instance = instance self.singleton = singleton self.interceptor = interceptor self._loop.create_task(self.listen()) self.log.debug(f'{self.route} #{self.instance} started')
def __init__(self, platform, distributed_trace, loop, url_list, origin): self.platform = platform self._distributed_trace = distributed_trace self._loop = loop self.log = platform.log self.normal = True self.started = False self.ready = False self.ws = None self.close_code = 1000 self.close_message = 'OK' self.last_active = time.time() self.max_ws_payload = 32768 self.util = Utility() self.urls = self.util.multi_split(url_list, ', ') self.next_url = 1 self.origin = origin self.cache = SimpleCache(loop, self.log, timeout_seconds=30) self.api_key = self._get_api_key()
def __init__(self, queue_dir: str = None, queue_id: str = None): # automatically create queue directory if queue_dir is None or queue_id is None: raise ValueError('Missing queue_dir or queue_id') self.queue_id = queue_id if not os.path.exists(queue_dir): os.makedirs(queue_dir, exist_ok=True) self.util = Utility() self._dir = self.util.normalize_path(f'{queue_dir}/{queue_id}') self._empty = False self._create_dir = False self._memory = list() self._read_file_no = 1 self._write_file_no = 1 self._read_counter = 0 self._write_counter = 0 self._file = None self._peeked = None self.initialize()
def __init__(self, loop, executor, queue, route, user_function, total_instances): self.platform = Platform() self.util = Utility() self.log = self.platform.log queue_dir = self.util.normalize_path(self.platform.work_dir + "/queues/" + self.platform.get_origin()) self.disk_queue = ElasticQueue(queue_dir=queue_dir, queue_id=route) self._loop = loop self._executor = executor self.queue = queue self.route = route self.user_function = user_function self.ready_queue = asyncio.Queue(loop=self._loop) self.worker_list = dict() self._peek_worker = None self._buffering = True self._interceptor = total_instances == 0 self._singleton = True if total_instances < 1 else False self._loop.create_task(self.listen(total_instances))
def __init__(self): self.platform = Platform() self.po = PostOffice() self.util = Utility() self.subscription = dict() def subscription_sync(headers: dict, body: any): if 'type' in headers and headers['type'] == 'subscription_sync': if len(self.subscription) > 0: for topic in self.subscription: route_map = self.subscription[topic] for route in route_map: parameters = route_map[route] self.platform.log.info('Update subscription ' + topic + ' -> ' + route) self.subscribe(topic, route, parameters) else: self.platform.log.info('No subscription to update') self.platform.register('pub.sub.sync', subscription_sync, 1, is_private=True)
def __init__(self, route: str = None, expiry_seconds: int = 1800): self.platform = Platform() self.po = PostOffice() self.util = Utility() self.route = None self.input_stream = None self.output_stream = None self.eof = False self.input_closed = False self.output_closed = False if route is not None: # open an existing stream if isinstance(route, str): name: str = route if name.startswith('stream.') and '@' in name: self.route = name if self.route is None: raise ValueError('Invalid stream route') else: # create a new stream if not isinstance(expiry_seconds, int): raise ValueError('expiry_seconds must be int') result = self.po.request(self.STREAM_IO_MANAGER, 6.0, headers={ 'type': 'create', 'expiry_seconds': expiry_seconds }) if isinstance(result, EventEnvelope) and isinstance(result.get_body(), str) \ and result.get_status() == 200: name: str = result.get_body() if name.startswith('stream.') and '@' in name: self.route = name if self.route is None: raise IOError('Stream manager is not responding correctly')
def __init__(self, work_dir: str = None, log_file: str = None, log_level: str = None, max_threads: int = None, network_connector: str = None): if sys.version_info.major < 3: python_version = str(sys.version_info.major) + "." + str( sys.version_info.minor) raise RuntimeError("Requires python 3.6 and above. Actual: " + python_version) self.util = Utility() self.origin = 'py' + (''.join(str(uuid.uuid4()).split('-'))) config = AppConfig() my_log_file = (config.LOG_FILE if hasattr(config, 'LOG_FILE') else None) if log_file is None else log_file my_log_level = config.LOG_LEVEL if log_level is None else log_level self._max_threads = config.MAX_THREADS if max_threads is None else max_threads self.work_dir = config.WORK_DIRECTORY if work_dir is None else work_dir self.log = LoggingService( log_dir=self.util.normalize_path(self.work_dir + "/log"), log_file=my_log_file, log_level=my_log_level).get_logger() self._loop = asyncio.new_event_loop() my_distributed_trace = DistributedTrace( self, config.DISTRIBUTED_TRACE_PROCESSOR) my_connector = config.NETWORK_CONNECTOR if network_connector is None else network_connector self._cloud = NetworkConnector(self, my_distributed_trace, self._loop, my_connector, self.origin) self._function_queues = dict() self._executor = concurrent.futures.ThreadPoolExecutor( max_workers=self._max_threads) self.log.info("Concurrent thread pool = " + str(self._max_threads)) # # Before we figure out how to solve blocking file I/O, we will regulate event output rate. # my_test_dir = self.util.normalize_path(self.work_dir + "/test") if not os.path.exists(my_test_dir): os.makedirs(my_test_dir) self._throttle = Throttle(self.util.normalize_path(my_test_dir + "/to_be_deleted"), log=self.log) self._seq = 0 self.util.cleanup_dir(my_test_dir) self.log.debug("Estimated processing rate is " + format(self._throttle.get_tps(), ',d') + " events per second for this computer") self.running = True self.stopped = False # distributed trace sessions self._traces = {} # start event loop in a new thread to avoid blocking the main thread def main_event_loop(): self.log.info("Event system started") self._loop.run_forever() self.log.info("Event system stopped") self._loop.close() threading.Thread(target=main_event_loop).start()
class Platform: SERVICE_QUERY = 'system.service.query' def __init__(self, work_dir: str = None, log_file: str = None, log_level: str = None, max_threads: int = None, network_connector: str = None): if sys.version_info.major < 3: python_version = str(sys.version_info.major) + "." + str( sys.version_info.minor) raise RuntimeError("Requires python 3.6 and above. Actual: " + python_version) self.util = Utility() self.origin = 'py' + (''.join(str(uuid.uuid4()).split('-'))) config = AppConfig() my_log_file = (config.LOG_FILE if hasattr(config, 'LOG_FILE') else None) if log_file is None else log_file my_log_level = config.LOG_LEVEL if log_level is None else log_level self._max_threads = config.MAX_THREADS if max_threads is None else max_threads self.work_dir = config.WORK_DIRECTORY if work_dir is None else work_dir self.log = LoggingService( log_dir=self.util.normalize_path(self.work_dir + "/log"), log_file=my_log_file, log_level=my_log_level).get_logger() self._loop = asyncio.new_event_loop() my_distributed_trace = DistributedTrace( self, config.DISTRIBUTED_TRACE_PROCESSOR) my_connector = config.NETWORK_CONNECTOR if network_connector is None else network_connector self._cloud = NetworkConnector(self, my_distributed_trace, self._loop, my_connector, self.origin) self._function_queues = dict() self._executor = concurrent.futures.ThreadPoolExecutor( max_workers=self._max_threads) self.log.info("Concurrent thread pool = " + str(self._max_threads)) # # Before we figure out how to solve blocking file I/O, we will regulate event output rate. # my_test_dir = self.util.normalize_path(self.work_dir + "/test") if not os.path.exists(my_test_dir): os.makedirs(my_test_dir) self._throttle = Throttle(self.util.normalize_path(my_test_dir + "/to_be_deleted"), log=self.log) self._seq = 0 self.util.cleanup_dir(my_test_dir) self.log.debug("Estimated processing rate is " + format(self._throttle.get_tps(), ',d') + " events per second for this computer") self.running = True self.stopped = False # distributed trace sessions self._traces = {} # start event loop in a new thread to avoid blocking the main thread def main_event_loop(): self.log.info("Event system started") self._loop.run_forever() self.log.info("Event system stopped") self._loop.close() threading.Thread(target=main_event_loop).start() def get_origin(self): """ get the origin ID of this application instance :return: origin ID """ return self.origin def get_trace_id(self) -> str: """ get trace ID for a transaction :return: trace ID """ trace_info = self.get_trace() return trace_info.get_id() if trace_info is not None else None def get_trace(self) -> TraceInfo: """ get trace info for a transaction :return: """ thread_id = threading.get_ident() return self._traces[thread_id] if thread_id in self._traces else None def annotate_trace(self, key: str, value: str): """ Annotate a trace at a point of a transaction :param key: any key :param value: any value :return: """ trace_info = self.get_trace() if trace_info is not None and isinstance(trace_info, TraceInfo): trace_info.annotate(key, value) def start_tracing(self, route: str, trace_id: str = None, trace_path: str = None): """ IMPORTANT: This method is reserved for system use. DO NOT call this from a user application. :param route: route name :param trace_id: id :param trace_path: path such as URI :return: None """ thread_id = threading.get_ident() self._traces[thread_id] = TraceInfo(route, trace_id, trace_path) def stop_tracing(self): """ IMPORTANT: This method is reserved for system use. DO NOT call this from a user application. :return: TraceInfo """ thread_id = threading.get_ident() if thread_id in self._traces: trace_info = self.get_trace() self._traces.pop(thread_id) return trace_info def run_forever(self): """ Tell the platform to run in the background until user presses CTL-C or the application is stopped by admin :return: None """ def graceful_shutdown(signum, frame): self.log.warn("Control-C detected" if signal.SIGINT == signum else "KILL signal detected") self.running = False if threading.current_thread() is threading.main_thread(): signal.signal(signal.SIGTERM, graceful_shutdown) signal.signal(signal.SIGINT, graceful_shutdown) # keep the main thread running so CTL-C can be detected self.log.info("To stop this application, press Control-C") while self.running: time.sleep(0.1) # exit forever loop and ask platform to end event loop self.stop() else: raise ValueError( 'Unable to register Control-C and KILL signals because this is not the main thread' ) def register(self, route: str, user_function: any, total_instances: int, is_private: bool = False) -> None: """ Register a user function :param route: ID of the function :param user_function: the lambda function given by you :param total_instances: 1 for singleton or more for concurrency :param is_private: true if internal function within this application instance :return: """ self.util.validate_service_name(route) if route in self._function_queues: raise ValueError("route " + route + " already registered") if not isinstance(total_instances, int): raise ValueError("Expect total_instances to be int, actual: " + str(type(total_instances))) if total_instances < 1: raise ValueError("total_instances must be at least 1") if total_instances > self._max_threads: raise ValueError( "total_instances must not exceed max threads of " + str(self._max_threads)) function_type = self.util.get_function_type(user_function) if function_type == FunctionType.NOT_SUPPORTED: raise ValueError( "Function signature should be (headers: dict, body: any, instance: int) or " + "(headers: dict, body: any) or (event: EventEnvelope)") queue = asyncio.Queue(loop=self._loop) if function_type == FunctionType.INTERCEPTOR: self._function_queues[route] = { 'queue': queue, 'private': is_private, 'instances': 1 } ServiceQueue(self._loop, self._executor, queue, route, user_function, 0) elif function_type == FunctionType.REGULAR: self._function_queues[route] = { 'queue': queue, 'private': is_private, 'instances': total_instances } ServiceQueue(self._loop, self._executor, queue, route, user_function, total_instances) else: # function_type == FunctionType.SINGLETON self._function_queues[route] = { 'queue': queue, 'private': is_private, 'instances': 1 } ServiceQueue(self._loop, self._executor, queue, route, user_function, -1) # advertise the new route to the network if self._cloud.is_ready() and not is_private: self._cloud.send_payload({'type': 'add', 'route': route}) def cloud_ready(self): return self._cloud.is_ready() def release(self, route: str) -> None: # this will un-register a route if not isinstance(route, str): raise ValueError("Expect route to be str, actual: " + str(type(route))) if route not in self._function_queues: raise ValueError("route " + route + " not found") # advertise the deleted route to the network if self._cloud.is_ready() and self.route_is_private(route): self._cloud.send_payload({'type': 'remove', 'route': route}) self._remove_route(route) def has_route(self, route: str) -> bool: if not isinstance(route, str): raise ValueError("Expect route to be str, actual: " + str(type(route))) return route in self._function_queues def get_routes(self, options: str = 'all'): result = list() if 'public' == options: for route in self._function_queues: if not self.route_is_private(route): result.append(route) return result elif 'private' == options: for route in self._function_queues: if self.route_is_private(route): result.append(route) return result elif 'all' == options: return list(self._function_queues.keys()) else: return result def route_is_private(self, route: str) -> bool: config = self._function_queues[route] if config and 'private' in config: return config['private'] else: return False def route_instances(self, route: str) -> int: config = self._function_queues[route] if config and 'instances' in config: return config['instances'] else: return 0 def parallel_request(self, events: list, timeout_seconds: float): timeout_value = self.util.get_float(timeout_seconds) if timeout_value <= 0: raise ValueError( "timeout value in seconds must be positive number") if not isinstance(events, list): raise ValueError("events must be a list of EventEnvelope") if len(events) == 0: raise ValueError("event list is empty") if len(events) == 1: result = list() result.append(self.request(events[0], timeout_value)) return result for evt in events: if not isinstance(evt, EventEnvelope): raise ValueError("events must be a list of EventEnvelope") # retrieve distributed tracing info if any trace_info = self.get_trace() # emulate RPC inbox = Inbox(self) temp_route = inbox.get_route() inbox_queue = inbox.get_queue() try: for evt in events: # restore distributed tracing info from current thread if trace_info: if trace_info.get_route( ) is not None and evt.get_from() is None: evt.set_from(trace_info.get_route()) if trace_info.get_id() is not None and trace_info.get_path( ) is not None: evt.set_trace(trace_info.get_id(), trace_info.get_path()) route = evt.get_to() evt.set_reply_to(temp_route, me=True) if route in self._function_queues: self._loop.call_soon_threadsafe(self._send, route, evt.to_map()) else: if self._cloud.is_connected(): self._cloud.send_payload({ 'type': 'event', 'event': evt.to_map() }) else: raise ValueError("route " + route + " not found") total_requests = len(events) result_list = list() while True: try: # wait until all response events are delivered to the inbox result_list.append(inbox_queue.get(True, timeout_value)) if len(result_list) == len(events): return result_list except Empty: raise TimeoutError('Requests timeout for ' + format(timeout_value, '.3f') + " seconds. Expect: " + str(total_requests) + " responses, actual: " + str(len(result_list))) finally: inbox.close() def request(self, event: EventEnvelope, timeout_seconds: float): timeout_value = self.util.get_float(timeout_seconds) if timeout_value <= 0: raise ValueError( "timeout value in seconds must be positive number") if not isinstance(event, EventEnvelope): raise ValueError("event object must be an EventEnvelope") # restore distributed tracing info from current thread trace_info = self.get_trace() if trace_info: if trace_info.get_route() is not None and event.get_from() is None: event.set_from(trace_info.get_route()) if trace_info.get_id() is not None and trace_info.get_path( ) is not None: event.set_trace(trace_info.get_id(), trace_info.get_path()) # emulate RPC inbox = Inbox(self) temp_route = inbox.get_route() inbox_queue = inbox.get_queue() try: route = event.get_to() event.set_reply_to(temp_route, me=True) if route in self._function_queues: self._loop.call_soon_threadsafe(self._send, route, event.to_map()) else: if self._cloud.is_connected(): self._cloud.send_payload({ 'type': 'event', 'event': event.to_map() }) else: raise ValueError("route " + route + " not found") # wait until response event is delivered to the inbox return inbox_queue.get(True, timeout_value) except Empty: raise TimeoutError('Route ' + event.get_to() + ' timeout for ' + format(timeout_value, '.3f') + " seconds") finally: inbox.close() def send_event(self, event: EventEnvelope, broadcast=False) -> None: if not isinstance(event, EventEnvelope): raise ValueError("event object must be an EventEnvelope class") # restore distributed tracing info from current thread trace_info = self.get_trace() if trace_info: if trace_info.get_route() is not None and event.get_from() is None: event.set_from(trace_info.get_route()) if trace_info.get_id() is not None and trace_info.get_path( ) is not None: event.set_trace(trace_info.get_id(), trace_info.get_path()) # regulate rate for best performance self._seq += 1 self._throttle.regulate_rate(self._seq) route = event.get_to() if broadcast: event.set_broadcast(True) reply_to = event.get_reply_to() if reply_to: target = reply_to[2:] if reply_to.startswith('->') else reply_to if route == target: raise ValueError("route and reply_to must not be the same") if route in self._function_queues: if event.is_broadcast() and self._cloud.is_connected(): self._cloud.send_payload({ 'type': 'event', 'event': event.to_map() }) else: self._loop.call_soon_threadsafe(self._send, route, event.to_map()) else: if self._cloud.is_connected(): self._cloud.send_payload({ 'type': 'event', 'event': event.to_map() }) else: raise ValueError("route " + route + " not found") def exists(self, routes: any): if isinstance(routes, str): single_route = routes if self.has_route(single_route): return True if self.cloud_ready(): event = EventEnvelope() event.set_to(self.SERVICE_QUERY).set_header( 'type', 'find').set_header('route', single_route) result = self.request(event, 8.0) if isinstance(result, EventEnvelope): if result.get_body() is not None: return result.get_body() if isinstance(routes, list): if len(routes) > 0: remote_routes = list() for r in routes: if not self.platform.has_route(r): remote_routes.append(r) if len(remote_routes) == 0: return True if self.platform.cloud_ready(): # tell service query to use the route list in body event = EventEnvelope() event.set_to(self.SERVICE_QUERY).set_header('type', 'find') event.set_header('route', '*').set_body(routes) result = self.request(event, 8.0) if isinstance( result, EventEnvelope) and result.get_body() is not None: return result.get_body() return False def _remove_route(self, route): if route in self._function_queues: self._send(route, None) self._function_queues.pop(route) def _send(self, route, event): if route in self._function_queues: config = self._function_queues[route] if 'queue' in config: config['queue'].put_nowait(event) def connect_to_cloud(self): self._loop.run_in_executor(self._executor, self._cloud.start_connection) def stop(self): # # to allow user application to invoke the "stop" method from a registered service, # the system must start a new thread so that the service can finish first. # if not self.stopped: self.log.info('Bye') # guarantee this stop function to execute only once self.stopped = True # exit the run_forever loop if any self.running = False # in case the calling function has just send an event asynchronously time.sleep(0.5) threading.Thread(target=self._bye).start() def _bye(self): def stopping(): route_list = [] for route in self.get_routes(): route_list.append(route) for route in route_list: self._remove_route(route) self._loop.create_task(full_stop()) async def full_stop(): # give time for registered services to stop await asyncio.sleep(1.0) queue_dir = self.util.normalize_path(self.work_dir + "/queues/" + self.get_origin()) self.util.cleanup_dir(queue_dir) self._loop.stop() self._cloud.close_connection(1000, 'bye', stop_engine=True) self._loop.call_soon_threadsafe(stopping)
class ElasticQueue: DATA = b'\x01' EOF = b'\x00' QUEUE = "data-" MEMORY_BUFFER = 10 MAX_FILE_SIZE = 10 * 1024 * 1024 def __init__(self, queue_dir: str = None, queue_id: str = None): # print('-----------', queue_dir, queue_id) # automatically create queue directory if queue_dir is None or queue_id is None: raise ValueError('Missing queue_dir or queue_id') self.queue_id = queue_id if not os.path.exists(queue_dir): os.makedirs(queue_dir) self.util = Utility() self._dir = self.util.normalize_path(queue_dir + '/' + queue_id) self._empty = False self._create_dir = False self._memory = list() self._read_file_no = 1 self._write_file_no = 1 self._read_counter = 0 self._write_counter = 0 self._file = None self._peeked = None self.initialize() def get_id(self): return self.queue_id def initialize(self): if not self._empty: self._empty = True if os.path.exists(self._dir): self.util.cleanup_dir(self._dir, clear_dir=False) self._create_dir = False else: self._create_dir = True self._memory = list() self._read_file_no = 1 self._write_file_no = 1 self._read_counter = 0 self._write_counter = 0 def close(self): if self._file is not None: self._file.close() self._file = None self.initialize() def is_closed(self): return self._file is None and self._write_counter == 0 def destroy(self): self.close() if self.is_closed(): self.util.cleanup_dir(self._dir) async def write(self, data: dict): if self._write_counter < self.MEMORY_BUFFER: self._memory.append(data) self._write_counter += 1 self._empty = False else: if self._create_dir: self._create_dir = False os.makedirs(self._dir) filename = self.util.normalize_path(self._dir + '/' + self.QUEUE + str(self._write_file_no)) if not os.path.exists(filename): open(filename, 'w').close() # pack data as bytes block = msgpack.packb(data, use_bin_type=True) file_size = os.path.getsize(filename) with open(filename, 'ab') as f: buffer = io.BytesIO() buffer.write(self.DATA) buffer.write(self.util.int_to_bytes(len(block))) buffer.write(block) file_size += len(block) if file_size > self.MAX_FILE_SIZE: buffer.write(self.EOF) self._write_file_no += 1 f.write(buffer.getvalue()) self._write_counter += 1 self._empty = False def peek(self): if self._peeked is not None: return self._peeked self._peeked = self.read() return self._peeked def read(self): if self._peeked is not None: result = self._peeked self._peeked = None return result if self._read_counter >= self._write_counter: # catch up with writes and thus nothing to read self.close() return None if self._read_counter < self.MEMORY_BUFFER: data = self._memory.pop(0) if data is not None: self._read_counter += 1 return data filename = self.util.normalize_path(self._dir + '/' + self.QUEUE + str(self._read_file_no)) if self._file is None: if not os.path.exists(filename): return None self._file = open(filename, 'rb') # read control indicator ctl = self._file.read(1) if ctl is None: return None if ctl == self.EOF: # EOF - drop file and increment read sequence self._file.close() self._file = None os.remove(filename) self._read_file_no += 1 return self.read() if ctl != self.DATA: raise ValueError("Corrupted queue for " + self.queue_id) # read data block size size = self._file.read(4) if size is None or len(size) != 4: raise ValueError("Corrupted queue for " + self.queue_id) block_size = self.util.bytes_to_int(size) block = self._file.read(block_size) if block is None or len(block) != block_size: raise ValueError("Corrupted queue for " + self.queue_id) self._read_counter += 1 # unpack from bytes into the original data return msgpack.unpackb(block, raw=False)
def __init__(self): self.platform = Platform() self.util = Utility()
class PostOffice: """ Convenient class for making RPC, async and callback. """ DEFERRED_DELIVERY = 'system.deferred.delivery' def __init__(self): self.platform = Platform() self.util = Utility() def get_route(self): """ Obtain my route name for the currently running service. This is useful for Role Based Access Control (RBAC) to restrict certain user roles for a service. Note that RBAC is the responsibility of the user application. :return: route name """ trace_info = self.get_trace() return "?" if trace_info is None else trace_info.get_route() def get_trace_id(self): return self.platform.get_trace_id() def get_trace(self): return self.platform.get_trace() def annotate_trace(self, key: str, value: str): self.platform.annotate_trace(key, value) def broadcast(self, route: str, headers: dict = None, body: any = None) -> None: self.util.validate_service_name(route) if headers is None and body is None: raise ValueError( 'Unable to broadcast because both headers and body are missing' ) event = EventEnvelope().set_to(route) if headers is not None: if not isinstance(headers, dict): raise ValueError('headers must be dict') for h in headers: event.set_header(h, str(headers[h])) if body is not None: event.set_body(body) self.platform.send_event(event, True) def send_later(self, route: str, headers: dict = None, body: any = None, seconds: float = 1.0) -> None: self.util.validate_service_name(route, True) if isinstance(seconds, float) or isinstance(seconds, int): relay = dict() relay['route'] = route if headers is not None: relay_headers = dict() for h in headers: relay_headers[str(h)] = str(headers[h]) relay['headers'] = relay_headers if body is not None: relay['body'] = body relay['seconds'] = seconds self.send(self.DEFERRED_DELIVERY, body=relay) else: raise ValueError('delay in seconds must be int or float') def send(self, route: str, headers: dict = None, body: any = None, reply_to: str = None, me=True) -> None: self.util.validate_service_name(route, True) if headers is None and body is None: raise ValueError( 'Unable to send because both headers and body are missing') event = EventEnvelope().set_to(route) if headers is not None: if not isinstance(headers, dict): raise ValueError('headers must be dict') for h in headers: event.set_header(str(h), str(headers[h])) if body is not None: event.set_body(body) if reply_to is not None: if not isinstance(reply_to, str): raise ValueError('reply_to must be str') # encode 'me' in the "call back" if replying to this instance event.set_reply_to(reply_to, me) self.platform.send_event(event) def request(self, route: str, timeout_seconds: float, headers: dict = None, body: any = None, correlation_id: str = None) -> EventEnvelope: self.util.validate_service_name(route, True) if headers is None and body is None: raise ValueError( 'Unable to make RPC call because both headers and body are missing' ) timeout_value = self.util.get_float(timeout_seconds) if timeout_value <= 0: raise ValueError( "timeout value in seconds must be positive number") event = EventEnvelope().set_to(route) if headers is not None: if not isinstance(headers, dict): raise ValueError('headers must be dict') for h in headers: event.set_header(h, str(headers[h])) if body is not None: event.set_body(body) if correlation_id is not None: event.set_correlation_id(str(correlation_id)) return self.platform.request(event, timeout_seconds) def parallel_request(self, events: list, timeout_seconds: float) -> list: return self.platform.parallel_request(events, timeout_seconds) def exists(self, routes: any): return self.platform.exists(routes)
class PostOffice: """ Convenient class for making RPC, async and callback. """ def __init__(self): self.platform = Platform() self.util = Utility() def get_route(self): """ Obtain my route name for the currently running service. This is useful for Role Based Access Control (RBAC) to restrict certain user roles for a service. Note that RBAC is the responsibility of the user application. Returns: route name """ trace_info = self.get_trace() return "?" if trace_info is None else trace_info.get_route() def get_trace_id(self): return self.platform.get_trace_id() def get_trace(self): return self.platform.get_trace() def annotate_trace(self, key: str, value: str): self.platform.annotate_trace(key, value) def broadcast(self, route: str, headers: dict = None, body: any = None) -> None: self.util.validate_service_name(route) if headers is None and body is None: raise ValueError( 'Unable to broadcast because both headers and body are missing' ) event = EventEnvelope().set_to(route) if headers is not None: if not isinstance(headers, dict): raise ValueError('headers must be dict') for h in headers: event.set_header(h, str(headers[h])) if body is not None: event.set_body(body) self.platform.send_event(event, True) def send_later(self, route: str, headers: dict = None, body: any = None, reply_to: str = None, me=True, seconds: float = 1.0) -> None: self.util.validate_service_name(route, True) if isinstance(seconds, float) or isinstance(seconds, int): if seconds > 0: if headers is None and body is None: raise ValueError( 'Unable to send because both headers and body are missing' ) event = EventEnvelope().set_to(route) if headers is not None: if not isinstance(headers, dict): raise ValueError('headers must be dict') for h in headers: event.set_header(str(h), str(headers[h])) if body is not None: event.set_body(body) if reply_to is not None: if not isinstance(reply_to, str): raise ValueError('reply_to must be str') # encode 'me' in the "call back" if replying to this instance event.set_reply_to(reply_to, me) self.platform.send_event_later(event, seconds) else: raise ValueError('delay in seconds must be larger than zero') else: raise ValueError('delay in seconds must be int or float') def send_event_later(self, event: EventEnvelope, seconds: float = 1.0, me=True): if event.get_reply_to() is not None: event.set_reply_to(event.get_reply_to(), me) self.platform.send_event_later(event, seconds) def send(self, route: str, headers: dict = None, body: any = None, reply_to: str = None, me=True) -> None: self.util.validate_service_name(route, True) if headers is None and body is None: raise ValueError( 'Unable to send because both headers and body are missing') event = EventEnvelope().set_to(route) if headers is not None: if not isinstance(headers, dict): raise ValueError('headers must be dict') for h in headers: event.set_header(str(h), str(headers[h])) if body is not None: event.set_body(body) if reply_to is not None: if not isinstance(reply_to, str): raise ValueError('reply_to must be str') # encode 'me' in the "call back" if replying to this instance event.set_reply_to(reply_to, me) self.platform.send_event(event) def send_event(self, event: EventEnvelope, me=True): if event.get_reply_to() is not None: event.set_reply_to(event.get_reply_to(), me) self.platform.send_event(event) def request(self, route: str, timeout_seconds: float, headers: dict = None, body: any = None, correlation_id: str = None) -> EventEnvelope: self.util.validate_service_name(route, True) if headers is None and body is None: raise ValueError( 'Unable to make RPC call because both headers and body are missing' ) timeout_value = self.util.get_float(timeout_seconds) if timeout_value <= 0: raise ValueError( 'timeout value in seconds must be positive number') event = EventEnvelope().set_to(route) if headers is not None: if not isinstance(headers, dict): raise ValueError('headers must be dict') for h in headers: event.set_header(h, str(headers[h])) if body is not None: event.set_body(body) if correlation_id is not None: event.set_correlation_id(str(correlation_id)) response = self.platform.request(event, timeout_seconds) if isinstance(response, EventEnvelope): if response.get_tag('exception') is None: return response else: raise AppException(response.get_status(), response.get_body()) raise ValueError( f'Expect response is EventEnvelope, actual: ({response})') def single_request(self, event: EventEnvelope, timeout_seconds: float): response = self.platform.request(event, timeout_seconds) if isinstance(response, EventEnvelope): if response.get_tag('exception') is None: return response else: raise AppException(response.get_status(), response.get_body()) raise ValueError( f'Expect response is EventEnvelope, actual: ({response})') def parallel_request(self, events: list, timeout_seconds: float) -> list: return self.platform.parallel_request(events, timeout_seconds) def exists(self, routes: any): return self.platform.exists(routes)
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # from mercury.system.singleton import Singleton from mercury.platform import Platform from mercury.system.po import PostOffice from mercury.system.models import EventEnvelope, AppException from mercury.system.utility import Utility platform = Platform() log = platform.get_logger() po = PostOffice() util = Utility() @Singleton class PubSub: def __init__(self, domain: str = 'system'): if not isinstance(domain, str): raise ValueError('Pub/sub domain must be str. e.g. system or user') value = domain.strip() self.domain = 'system' if value == '' else value self.subscription = dict() def feature_enabled(self): result = po.request('pub.sub.controller', 10.0, headers={'type': 'feature', 'domain': self.domain}) return self._normalize_result(result, True)
class MultiLevelDict: def __init__(self, data=None): self.util = Utility() self.normalized = False self.dataset = dict() if data is None else data if not isinstance(self.dataset, dict): raise ValueError('Invalid input - Expect: dict, Actual: ' + str(type(data))) def get_dict(self): return self.dataset @staticmethod def is_digits(n: str): for i in n: if i < '0' or i > '9': return False return True @staticmethod def is_list_element(item: str): return '[' in item and item.endswith(']') and ( not item.startswith('[')) def set_element(self, composite_path: str, value: any, source_data: dict = None): if composite_path is None: raise ValueError('Missing composite_path') self.validate_composite_path_syntax(composite_path) data = self.dataset if source_data is None else source_data if not isinstance(data, dict): raise ValueError( f'Invalid input - Expect: dict, Actual: {type(data)}') segments = self.util.multi_split(composite_path, './') if len(segments) == 0: return current = data size = len(segments) n = 0 composite = '' for p in segments: n += 1 if self.is_list_element(p): sep = p.index('[') indexes = self._get_indexes(p[sep:]) element = p[0:sep] parent = self.get_element(composite + element, source_data) if n == size: if isinstance(parent, list): self._set_list_element(indexes, parent, value) else: new_list = list() self._set_list_element(indexes, new_list, value) current[element] = new_list break else: if isinstance(parent, list): next_dict = self.get_element(composite + p, source_data) if isinstance(next_dict, dict): current = next_dict else: m = dict() self._set_list_element(indexes, parent, m) current = m else: next_map = dict() new_list = list() self._set_list_element(indexes, new_list, next_map) current[element] = new_list current = next_map else: if n == size: current[p] = value break else: if p in current and isinstance(current[p], dict): current = current[p] else: next_map = dict() current[p] = next_map current = next_map composite = composite + p + '.' def _set_list_element(self, indexes: list, source_data: list, value: any): current = self._expand_list(indexes, source_data) size = len(indexes) for i in range(0, size): idx = indexes[i] if i == size - 1: current[idx] = value else: o = current[idx] if isinstance(o, list): current = o @staticmethod def _expand_list(indexes: list, source_data: list): current = source_data size = len(indexes) for i in range(0, size): idx = indexes[i] if idx >= len(current): diff = idx - len(current) while diff >= 0: current.append(None) diff -= 1 if i == size - 1: break o = current[idx] if isinstance(o, list): current = o else: new_list = list() current[idx] = new_list current = new_list return source_data @staticmethod def _is_composite(path: str): return True if '.' in path or '/' in path or '[' in path or ']' in path else False def _get_indexes(self, index_segment: str): result = list() indexes = self.util.multi_split(index_segment, '[]') for i in indexes: if self.is_digits(i): result.append(int(i)) else: result.append(-1) return result @staticmethod def _get_list_element(indexes: list, source_data: list): if (not isinstance(indexes, list)) or (not isinstance(source_data, list)) \ or len(indexes) == 0 or len(source_data) == 0: return None current = source_data n = 0 size = len(indexes) for i in indexes: n += 1 if not isinstance(i, int): return None if i < 0 or i >= len(current): break o = current[i] if n == size: return o if isinstance(o, list): current = o else: break return None def get_element(self, composite_path: str, source_data: dict = None): if composite_path is None: return None data = self.dataset if source_data is None else source_data if not isinstance(data, dict): raise ValueError( f'Invalid input - Expect: dict, Actual: {type(data)}') if len(data) == 0: return None # special case for top level element that is using composite itself if composite_path in data: return data[composite_path] if not self._is_composite(composite_path): return None parts = self.util.multi_split(composite_path, './') current = dict(data) size = len(parts) n = 0 for p in parts: n += 1 if self.is_list_element(p): start = p.index('[') end = p.index(']', start) if end == -1: break key = p[0:start] index = p[start + 1:end].strip() if len(index) == 0 or not self.is_digits(index): break if key in current: next_list = current[key] if isinstance(next_list, list): indexes = self._get_indexes(p[start:]) next_result = self._get_list_element( indexes, next_list) if n == size: return next_result if isinstance(next_result, dict): current = next_result continue else: if p in current: next_dict = current[p] if n == size: return next_dict elif isinstance(next_dict, dict): current = next_dict continue # item not found break return None def normalize_map(self): if not self.normalized: # do only once self.normalized = True flat_map = self.get_flat_map(self.dataset) result = dict() for k in flat_map: self.set_element(k, flat_map[k], result) self.dataset = result def get_flat_map(self, data: dict = None): if not isinstance(data, dict): raise ValueError( f'Invalid input - Expect: dict, Actual: {type(data)}') result = dict() self._get_flat_map(None, data, result) return result def _get_flat_map(self, prefix: any, src: dict, target: dict): for k in src: v = src[k] key = k if prefix is None else prefix + "." + k if isinstance(v, dict): self._get_flat_map(key, v, target) elif isinstance(v, list): self._get_flat_list(key, v, target) else: target[key] = v def _get_flat_list(self, prefix: str, src: list, target: dict): n = 0 for v in src: key = prefix + "[" + str(n) + "]" n += 1 if isinstance(v, dict): self._get_flat_map(key, v, target) elif isinstance(v, list): self._get_flat_list(key, v, target) else: target[key] = v def validate_composite_path_syntax(self, path: str): segments = self.util.multi_split(path, './') if len(segments) == 0: raise ValueError('Missing composite path') for s in segments: if '[' in s or ']' in s: if '[' not in s: raise ValueError( 'Invalid composite path - missing start bracket') if not s.endswith(']'): raise ValueError( 'Invalid composite path - missing end bracket') sep1 = s.index('[') sep2 = s.index(']') if sep2 < sep1: raise ValueError( 'Invalid composite path - missing start bracket') start = False for c in s[sep1:]: if c == '[': if start: raise ValueError( 'Invalid composite path - missing end bracket') else: start = True elif c == ']': if not start: raise ValueError( 'Invalid composite path - duplicated end bracket' ) else: start = False else: if start: if c < '0' or c > '9': raise ValueError( 'Invalid composite path - indexes must be digits' ) else: raise ValueError( 'Invalid composite path - invalid indexes')
class WorkerQueue: DISTRIBUTED_TRACING = 'distributed.tracing' def __init__(self, loop, executor, manager_queue, worker_queue, route, user_function, instance, singleton, interceptor): self.platform = Platform() self.util = Utility() self.log = self.platform.log self._loop = loop self._executor = executor self.manager_queue = manager_queue self.worker_queue = worker_queue self.route = route # trace all routes except ws.outgoing normal_service = not (interceptor and self.util.is_inbox(route)) self.tracing = normal_service and route != 'ws.outgoing' self.user_function = user_function self.instance = instance self.singleton = singleton self.interceptor = interceptor self._loop.create_task(self.listen()) self.log.debug(f'{self.route} #{self.instance} started') async def listen(self): while True: event = await self.worker_queue.get() self.worker_queue.task_done() if event is None: break else: # Execute the user function in parallel if self.interceptor: self._loop.run_in_executor(self._executor, self.handle_event, event, 0) elif self.singleton: self._loop.run_in_executor(self._executor, self.handle_event, event, -1) else: self._loop.run_in_executor(self._executor, self.handle_event, event, self.instance) self.log.debug(f'{self.route} #{self.instance} stopped') def handle_event(self, event, instance): headers = dict() if 'headers' not in event else event['headers'] body = None if 'body' not in event else event['body'] result = None error_code = None error_msg = None # start distributed tracing if the event contains trace_id and trace_path if 'trace_id' in event and 'trace_path' in event: self.platform.start_tracing(self.route, trace_id=event['trace_id'], trace_path=event['trace_path']) else: self.platform.start_tracing(self.route) # execute user function begin = end = time.perf_counter() has_error = False try: if instance == 0: # service is an interceptor. e.g. inbox for RPC call self.user_function(EventEnvelope().from_map(event)) elif instance == -1: # service is a singleton result = self.user_function(headers, body) else: # service with multiple instances result = self.user_function(headers, body, instance) end = time.perf_counter() except AppException as e: has_error = True error_code = e.get_status() error_msg = _normalize_exception('AppException', e) except ValueError as e: has_error = True error_code = 400 error_msg = _normalize_exception('ValueError', e) except Exception as e: has_error = True error_code = 500 error_msg = _normalize_exception(type(e).__name__, e) # execution time is rounded to 3 decimal points exec_time = round((end - begin) * 1000, 3) if error_code: if 'reply_to' in event: # set exception as result result = EventEnvelope().set_status(error_code).set_body( error_msg) else: self.log.warn( f'Unhandled exception for {self.route} - code={error_code}, message={error_msg}' ) # # interceptor should not send regular response because it will forward the request to another function. # However, if error_code exists, the system will send the exception response. # This allows interceptor to simply throw exception to indicate an error case. # if 'reply_to' in event and (error_code or not self.interceptor): reply_to = event['reply_to'] # in case this is an RPC call from within if reply_to.startswith('->'): reply_to = reply_to[2:] response = EventEnvelope().set_to(reply_to) if not error_code: response.set_exec_time(exec_time) if 'extra' in event: response.set_extra(event['extra']) if has_error: # adding the 'exception' tag would throw exception to the caller response.add_tag('exception') if 'cid' in event: response.set_correlation_id(event['cid']) if 'trace_id' in event and 'trace_path' in event: response.set_trace(event['trace_id'], event['trace_path']) if isinstance(result, EventEnvelope): for h in result.get_headers(): response.set_header(h, result.get_header(h)) response.set_body(result.get_body()) response.set_status(result.get_status()) else: response.set_body(result) try: self.platform.send_event(response.set_from(self.route)) except Exception as e: self.log.warn(f'Event dropped because {e}') # send tracing info to distributed trace logger trace_info = self.platform.stop_tracing() if self.tracing and trace_info is not None and isinstance(trace_info, TraceInfo) \ and trace_info.get_id() is not None and trace_info.get_path() is not None \ and self.platform.has_route(self.DISTRIBUTED_TRACING): dt = EventEnvelope().set_to(self.DISTRIBUTED_TRACING).set_body( trace_info.get_annotations()) dt.set_header('origin', self.platform.get_origin()) dt.set_header('id', trace_info.get_id()).set_header( 'path', trace_info.get_path()) dt.set_header('service', self.route).set_header('start', trace_info.get_start_time()) if 'from' in event: dt.set_header('from', event['from']) if not error_code: dt.set_header('success', 'true') dt.set_header('exec_time', exec_time) else: dt.set_header('success', 'false') dt.set_header('status', error_code) dt.set_header('exception', error_msg) self.platform.send_event(dt) self._loop.call_soon_threadsafe(self._ack) def _ack(self): self.manager_queue.put_nowait(self.instance)
class Platform: """ Event system platform instance """ SERVICE_QUERY = 'system.service.query' def __init__(self, config_file: str = None): if sys.version_info.major < 3: python_version = f'{sys.version_info.major}.{sys.version_info.minor}' raise RuntimeError( f'Requires python 3.6 and above. Actual: {python_version}') self.origin = 'py-' + (''.join(str(uuid.uuid4()).split('-'))) self.config = ConfigReader(config_file) self.util = Utility() log_level = self.config.get_property('log.level') self._max_threads = self.config.get('max.threads') self.work_dir = self.config.get_property('work.directory') self.log = LoggingService(log_level).get_logger() self._loop = asyncio.new_event_loop() # DO NOT CHANGE 'distributed.trace.processor' which is an optional user defined trace aggregator my_tracer = DistributedTrace(self, 'distributed.trace.processor') my_nc = self.config.get_property('network.connector') self._cloud = NetworkConnector(self, my_tracer, self._loop, my_nc, self.origin) self._function_queues = dict() self._executor = concurrent.futures.ThreadPoolExecutor( max_workers=self._max_threads) self.log.info(f'Concurrent thread pool = {self._max_threads}') # # Before we figure out how to solve blocking file I/O, we will regulate event output rate. # my_test_dir = self.util.normalize_path( f'{self.work_dir}/safe_to_delete_when_apps_stop') if not os.path.exists(my_test_dir): os.makedirs(my_test_dir, exist_ok=True) self._throttle = Throttle(self.util.normalize_path(f'{my_test_dir}/' + self.origin), log=self.log) self._seq = 0 self.log.info( f'Estimated performance is {format(self._throttle.get_tps(), ",d")} events per second' ) self.running = True self.stopped = False # distributed trace sessions self._traces = {} self.trace_aggregation = True # start event loop in a new thread to avoid blocking the main thread def main_event_loop(): self.log.info('Event system started') self._loop.run_forever() self.log.info('Event system stopped') self._loop.close() threading.Thread(target=main_event_loop).start() def get_origin(self): """ Get the origin ID of this application instance Returns: origin ID """ return self.origin def get_logger(self): """ Get Logger Returns: logger instance """ return self.log def is_trace_supported(self): return self.trace_aggregation def set_trace_support(self, enabled: bool = True): self.trace_aggregation = enabled status = 'ON' if enabled else 'OFF' self.log.info(f'Trace aggregation is {status}') def get_trace_id(self) -> str: """ Get trace ID for a transaction Returns: trace ID """ trace_info = self.get_trace() return trace_info.get_id() if trace_info is not None else None def get_trace(self) -> TraceInfo: """ Get trace info for a transaction Returns: trace info """ thread_id = threading.get_ident() return self._traces[thread_id] if thread_id in self._traces else None def annotate_trace(self, key: str, value: str) -> None: """ Annotate a trace at the current point of a transaction Args: key: any key value: any value Returns: None """ trace_info = self.get_trace() if trace_info is not None and isinstance(trace_info, TraceInfo): trace_info.annotate(key, value) def start_tracing(self, route: str, trace_id: str = None, trace_path: str = None) -> None: """ This method is reserved for system use. DO NOT call this from a user application. Args: route: route name trace_id: id trace_path: path such as Method and URI Returns: None """ thread_id = threading.get_ident() self._traces[thread_id] = TraceInfo(route, trace_id, trace_path) def stop_tracing(self) -> TraceInfo: """ This method is reserved for system use. DO NOT call this from a user application. Returns: trace info """ thread_id = threading.get_ident() if thread_id in self._traces: trace_info = self.get_trace() self._traces.pop(thread_id) return trace_info def run_forever(self) -> None: """ Tell the platform to run in the background until user presses CTL-C or the application is stopped by admin Returns: None """ def graceful_shutdown(signum, frame): self.running = False if frame is not None: self.log.warn('Control-C detected' if signal.SIGINT == signum else 'KILL signal detected') if threading.current_thread() is threading.main_thread(): signal.signal(signal.SIGTERM, graceful_shutdown) signal.signal(signal.SIGINT, graceful_shutdown) # keep the main thread running so CTL-C can be detected self.log.info('To stop this application, press Control-C') while self.running: time.sleep(0.1) # exit forever loop and ask platform to end event loop self.stop() else: raise ValueError( 'Unable to register Control-C and KILL signals because this is not the main thread' ) def register(self, route: str, user_function: any, total_instances: int = 1, is_private: bool = False) -> None: """ Register a user function Args: route: ID of the function user_function: the lambda function given by you total_instances: 1 for singleton or more for concurrency is_private: true if internal function within this application instance Returns: None """ self.util.validate_service_name(route) if not isinstance(total_instances, int): raise ValueError( f'Expect total_instances to be int, actual: {type(total_instances)}' ) if total_instances < 1: raise ValueError('total_instances must be at least 1') if total_instances > self._max_threads: raise ValueError( f'total_instances must not exceed max threads of {self._max_threads}' ) function_type = self.util.get_function_type(user_function) if function_type == FunctionType.NOT_SUPPORTED: raise ValueError( 'Function signature should be (headers: dict, body: any, instance: int) or ' + '(headers: dict, body: any) or (event: EventEnvelope)') if route in self._function_queues: self.log.warn(f'{route} will be reloaded') self.release(route) queue = asyncio.Queue() if function_type == FunctionType.INTERCEPTOR: self._function_queues[route] = { 'queue': queue, 'private': is_private, 'instances': 1 } ServiceQueue(self._loop, self._executor, queue, route, user_function, 0) elif function_type == FunctionType.REGULAR: self._function_queues[route] = { 'queue': queue, 'private': is_private, 'instances': total_instances } ServiceQueue(self._loop, self._executor, queue, route, user_function, total_instances) else: # function_type == FunctionType.SINGLETON self._function_queues[route] = { 'queue': queue, 'private': is_private, 'instances': 1 } ServiceQueue(self._loop, self._executor, queue, route, user_function, -1) # advertise the new route to the network if self._cloud.is_ready() and not is_private: self._cloud.send_payload({'type': 'add', 'route': route}) def cloud_ready(self): return self._cloud.is_ready() def subscribe_life_cycle(self, callback: str): self._cloud.subscribe_life_cycle(callback) def unsubscribe_life_cycle(self, callback: str): self._cloud.unsubscribe_life_cycle(callback) def release(self, route: str) -> None: # this will un-register a route if not isinstance(route, str): raise ValueError(f'Expect route to be str, actual: {type(route)}') if route not in self._function_queues: raise ValueError(f'route {route} not found') # advertise the deleted route to the network if self._cloud.is_ready() and self.route_is_private(route): self._cloud.send_payload({'type': 'remove', 'route': route}) self._remove_route(route) def has_route(self, route: str) -> bool: if not isinstance(route, str): raise ValueError(f'Expect route to be str, actual: {type(route)}') return route in self._function_queues def get_routes(self, options: str = 'all'): result = list() if 'public' == options: for route in self._function_queues: if not self.route_is_private(route): result.append(route) return result elif 'private' == options: for route in self._function_queues: if self.route_is_private(route): result.append(route) return result elif 'all' == options: return list(self._function_queues.keys()) else: return result def route_is_private(self, route: str) -> bool: config = self._function_queues[route] if config and 'private' in config: return config['private'] else: return False def route_instances(self, route: str) -> int: config = self._function_queues[route] if config and 'instances' in config: return config['instances'] else: return 0 def parallel_request(self, events: list, timeout_seconds: float): timeout_value = self.util.get_float(timeout_seconds) if timeout_value <= 0: raise ValueError( 'timeout value in seconds must be positive number') if not isinstance(events, list): raise ValueError('events must be a list of EventEnvelope') if len(events) == 0: raise ValueError('event list is empty') if len(events) == 1: result = list() result.append(self.request(events[0], timeout_value)) return result for evt in events: if not isinstance(evt, EventEnvelope): raise ValueError('events must be a list of EventEnvelope') # retrieve distributed tracing info if any trace_info = self.get_trace() # emulate RPC inbox = Inbox(self) temp_route = inbox.get_route() inbox_queue = inbox.get_queue() try: for evt in events: # restore distributed tracing info from current thread if trace_info: if trace_info.get_route( ) is not None and evt.get_from() is None: evt.set_from(trace_info.get_route()) if trace_info.get_id() is not None and trace_info.get_path( ) is not None: evt.set_trace(trace_info.get_id(), trace_info.get_path()) route = evt.get_to() evt.set_reply_to(temp_route, me=True) if route in self._function_queues: self._loop.call_soon_threadsafe(self._send, route, evt.to_map()) else: if self._cloud.is_connected(): self._cloud.send_payload({ 'type': 'event', 'event': evt.to_map() }) else: raise ValueError(f'route {route} not found') total_requests = len(events) result_list = list() while True: try: # wait until all response events are delivered to the inbox result_list.append(inbox_queue.get(True, timeout_value)) if len(result_list) == len(events): return result_list except Empty: raise TimeoutError( f'Requests timeout for {round(timeout_value, 3)} seconds. ' f'Expect: {total_requests} responses, actual: {len(result_list)}' ) finally: inbox.close() def request(self, event: EventEnvelope, timeout_seconds: float): timeout_value = self.util.get_float(timeout_seconds) if timeout_value <= 0: raise ValueError( 'timeout value in seconds must be positive number') if not isinstance(event, EventEnvelope): raise ValueError('event object must be an EventEnvelope') # restore distributed tracing info from current thread trace_info = self.get_trace() if trace_info: if trace_info.get_route() is not None and event.get_from() is None: event.set_from(trace_info.get_route()) if trace_info.get_id() is not None and trace_info.get_path( ) is not None: event.set_trace(trace_info.get_id(), trace_info.get_path()) # emulate RPC inbox = Inbox(self) temp_route = inbox.get_route() inbox_queue = inbox.get_queue() try: route = event.get_to() event.set_reply_to(temp_route, me=True) if route in self._function_queues: self._loop.call_soon_threadsafe(self._send, route, event.to_map()) else: if self._cloud.is_connected(): self._cloud.send_payload({ 'type': 'event', 'event': event.to_map() }) else: raise ValueError(f'route {route} not found') # wait until response event is delivered to the inbox return inbox_queue.get(True, timeout_value) except Empty: raise TimeoutError( f'Route {event.get_to()} timeout for {round(timeout_value, 3)} seconds' ) finally: inbox.close() def send_event(self, event: EventEnvelope, broadcast=False) -> None: if not isinstance(event, EventEnvelope): raise ValueError('event object must be an EventEnvelope class') # restore distributed tracing info from current thread trace_info = self.get_trace() if trace_info: if trace_info.get_route() is not None and event.get_from() is None: event.set_from(trace_info.get_route()) if trace_info.get_id() is not None and trace_info.get_path( ) is not None: event.set_trace(trace_info.get_id(), trace_info.get_path()) # regulate rate for best performance self._seq += 1 self._throttle.regulate_rate(self._seq) route = event.get_to() if broadcast: event.set_broadcast(True) reply_to = event.get_reply_to() if reply_to: target = reply_to[2:] if reply_to.startswith('->') else reply_to if route == target: raise ValueError('route and reply_to must not be the same') if route in self._function_queues: if event.is_broadcast() and self._cloud.is_connected(): self._cloud.send_payload({ 'type': 'event', 'event': event.to_map() }) else: self._loop.call_soon_threadsafe(self._send, route, event.to_map()) else: if self._cloud.is_connected(): self._cloud.send_payload({ 'type': 'event', 'event': event.to_map() }) else: raise ValueError(f'route {route} not found') def send_event_later(self, event: EventEnvelope, delay_in_seconds: float) -> None: self._loop.call_later(delay_in_seconds, self.send_event, event) def exists(self, routes: any): if isinstance(routes, str): single_route = routes if self.has_route(single_route): return True if self.cloud_ready(): event = EventEnvelope() event.set_to(self.SERVICE_QUERY).set_header( 'type', 'find').set_header('route', single_route) result = self.request(event, 8.0) if isinstance(result, EventEnvelope): if result.get_body() is not None: return result.get_body() if isinstance(routes, list): if len(routes) > 0: remote_routes = list() for r in routes: if not self.has_route(r): remote_routes.append(r) if len(remote_routes) == 0: return True if self.cloud_ready(): # tell service query to use the route list in body event = EventEnvelope() event.set_to(self.SERVICE_QUERY).set_header('type', 'find') event.set_header('route', '*').set_body(routes) result = self.request(event, 8.0) if isinstance( result, EventEnvelope) and result.get_body() is not None: return result.get_body() return False def _remove_route(self, route): if route in self._function_queues: self._send(route, None) self._function_queues.pop(route) def _send(self, route, event): if route in self._function_queues: config = self._function_queues[route] if 'queue' in config: config['queue'].put_nowait(event) def connect_to_cloud(self): self._loop.run_in_executor(self._executor, self._cloud.start_connection) def stop(self): # # to allow user application to invoke the "stop" method from a registered service, # the system must start a new thread so that the service can finish first. # if not self.stopped: self.log.info('Bye') # guarantee this stop function to execute only once self.stopped = True # exit the run_forever loop if any self.running = False # in case the calling function has just send an event asynchronously time.sleep(1.0) threading.Thread(target=self._bye).start() def _bye(self): def stopping(): route_list = [] for route in self.get_routes(): route_list.append(route) for route in route_list: self._remove_route(route) self._loop.create_task(full_stop()) async def full_stop(): # give time for registered services to stop await asyncio.sleep(1.0) queue_dir = self.util.normalize_path( f'{self.work_dir}/queues/{self.get_origin()}') self.util.cleanup_dir(queue_dir) self._loop.stop() self._cloud.close_connection( 1000, f'Application {self.get_origin()} is stopping', stop_engine=True) self._loop.call_soon_threadsafe(stopping)
class ServiceQueue: def __init__(self, loop, executor, queue, route, user_function, total_instances): self.platform = Platform() self.util = Utility() self.log = self.platform.log queue_dir = self.util.normalize_path(self.platform.work_dir + "/queues/" + self.platform.get_origin()) self.disk_queue = ElasticQueue(queue_dir=queue_dir, queue_id=route) self._loop = loop self._executor = executor self.queue = queue self.route = route self.user_function = user_function self.ready_queue = asyncio.Queue(loop=self._loop) self.worker_list = dict() self._peek_worker = None self._buffering = True self._interceptor = total_instances == 0 self._singleton = True if total_instances < 1 else False self._loop.create_task(self.listen(total_instances)) def peek_next_worker(self): if self._peek_worker is None: self._peek_worker = self._fetch_next_worker() return self._peek_worker def get_next_worker(self): if self._peek_worker is not None: result = self._peek_worker self._peek_worker = None return result return self._fetch_next_worker() def _fetch_next_worker(self): try: worker_number = self.ready_queue.get_nowait() if worker_number: self.ready_queue.task_done() return worker_number except QueueEmpty: return None def send_to_worker(self, item): worker_number = self.get_next_worker() if worker_number: wq = self.worker_list[worker_number] if wq: wq.put_nowait(item) else: self.log.error("Event for " + self.route + " dropped because worker #" + str(worker_number) + "not found") else: self.log.error("Event for " + self.route + " dropped because there are no workers available") async def listen(self, total_instances): # create concurrent workers and total = 1 if self._singleton else total_instances for i in range(total): instance_number = i + 1 worker_queue = asyncio.Queue(loop=self._loop) self.worker_list[instance_number] = worker_queue WorkerQueue(self._loop, self._executor, self.queue, worker_queue, self.route, self.user_function, instance_number, self._singleton, self._interceptor) # populate the ready queue with an initial set of worker numbers await self.queue.put(instance_number) route_type = 'PRIVATE' if self.platform.route_is_private( self.route) else 'PUBLIC' # minimize logging for temporary inbox that starts with the "r" prefix if self._interceptor and self.util.is_inbox(self.route): self.log.debug(route_type + ' ' + self.route + " with " + str(total) + " instance" + ('s' if total > 1 else '') + " started") else: self.log.info(route_type + ' ' + self.route + " with " + str(total) + " instance" + ('s' if total > 1 else '') + " started") # listen for incoming events while True: event = await self.queue.get() self.queue.task_done() if event is None: break else: if isinstance(event, int): # ready signal from a worker await self.ready_queue.put(event) if self._buffering: buffered = self.disk_queue.read() if buffered: self.send_to_worker(buffered) else: # nothing buffered in disk queue self._buffering = False self.disk_queue.close() if isinstance(event, dict): # it is a data item if self._buffering: # Once buffering is started, continue to spool items to disk to guarantee items in order await self.disk_queue.write(event) else: w = self.peek_next_worker() if w: # Nothing buffered in disk queue. Find a worker to receive the item. self.send_to_worker(event) else: # start buffered because there are no available workers self._buffering = True await self.disk_queue.write(event) # tell workers to stop for i in self.worker_list: wq = self.worker_list[i] wq.put_nowait(None) # destroy disk queue self.disk_queue.destroy() # minimize logging for temporary inbox that starts with the "r" prefix if self._interceptor and self.util.is_inbox(self.route): self.log.debug(self.route + " stopped") else: self.log.info(self.route + " stopped")
class NetworkConnector: INCOMING_WS_PATH = "ws.incoming" OUTGOING_WS_PATH = "ws.outgoing" SYSTEM_ALERT = "system.alerts" SERVER_CONFIG = "system.config" MAX_PAYLOAD = "max.payload" DISTRIBUTED_TRACING = "distributed.tracing" def __init__(self, platform, distributed_trace, loop, url_list, origin): self.platform = platform self._distributed_trace = distributed_trace self._loop = loop self.log = platform.log self.normal = True self.started = False self.ready = False self.ws = None self.close_code = 1000 self.close_message = 'OK' self.last_active = time.time() self.max_ws_payload = 32768 self.util = Utility() self.urls = self.util.multi_split(url_list, ', ') self.next_url = 1 self.origin = origin self.cache = SimpleCache(loop, self.log, timeout_seconds=30) self.api_key = self._get_api_key() def _get_api_key(self): config = AppConfig() if config.API_KEY_LOCATION in os.environ: self.log.info('Found API key in environment variable ' + config.API_KEY_LOCATION) return os.environ[config.API_KEY_LOCATION] # check temp file system because API key not in environment temp_dir = '/tmp/config' if not os.path.exists(temp_dir): os.makedirs(temp_dir) api_key_file = temp_dir + "/lang-api-key.txt" if os.path.exists(api_key_file): with open(api_key_file) as f: self.log.info('Reading API key from ' + api_key_file) return f.read().strip() else: with open(api_key_file, 'w') as f: self.log.info( 'Generating new API key in ' + api_key_file + ' because it is not found in environment variable ' + config.API_KEY_LOCATION) value = ''.join(str(uuid.uuid4()).split('-')) f.write(value + '\n') return value def _get_next_url(self): # index starts from 1 return self.urls[self.next_url - 1] def _skip_url(self): self.next_url += 1 if self.next_url > len(self.urls): self.next_url = 1 def send_keep_alive(self): message = "Keep-Alive " + self.util.get_iso_8601(time.time(), show_ms=True) envelope = EventEnvelope() envelope.set_to(self.OUTGOING_WS_PATH).set_header( 'type', 'text').set_body(message) self.platform.send_event(envelope) def send_payload(self, data: dict): payload = msgpack.packb(data, use_bin_type=True) payload_len = len(payload) if 'type' in data and data[ 'type'] == 'event' and 'event' in data and payload_len > self.max_ws_payload: evt = data['event'] if 'id' in evt: msg_id = evt['id'] total = int(payload_len / self.max_ws_payload) if payload_len > total: total += 1 buffer = io.BytesIO(payload) count = 0 for i in range(total): count += 1 block = EventEnvelope() block.set_header('id', msg_id) block.set_header('count', count) block.set_header('total', total) block.set_body(buffer.read(self.max_ws_payload)) block_map = dict() block_map['type'] = 'block' block_map['block'] = block.to_map() block_payload = msgpack.packb(block_map, use_bin_type=True) envelope = EventEnvelope() envelope.set_to(self.OUTGOING_WS_PATH).set_header( 'type', 'bytes').set_body(block_payload) self.platform.send_event(envelope) else: envelope = EventEnvelope() envelope.set_to(self.OUTGOING_WS_PATH).set_header( 'type', 'bytes').set_body(payload) self.platform.send_event(envelope) def _get_server_config(self, headers: dict, body: any): if 'type' in headers: # at this point, login is successful if headers['type'] == 'system.config' and isinstance(body, dict): if self.MAX_PAYLOAD in body: self.max_ws_payload = body[self.MAX_PAYLOAD] self.log.info( 'Automatic segmentation when event payload exceeds ' + format(self.max_ws_payload, ',d')) # advertise public routes to language connector for r in self.platform.get_routes('public'): self.send_payload({'type': 'add', 'route': r}) # tell server that I am ready self.send_payload({'type': 'ready'}) # server acknowledges my ready signal if headers['type'] == 'ready': self.ready = True self.log.info('Ready') # redo subscription if any if self.platform.has_route('pub.sub.sync'): event = EventEnvelope() event.set_to('pub.sub.sync').set_header( 'type', 'subscription_sync') self.platform.send_event(event) def _alert(self, headers: dict, body: any): if 'status' in headers: if headers['status'] == '200': self.log.info(str(body)) else: self.log.warn(str(body) + ", status=" + headers['status']) def _incoming(self, headers: dict, body: any): """ This function handles incoming messages from the websocket connection with the Mercury language connector. It must be invoked using events. It should not be called directly to guarantee proper event sequencing. :param headers: type is open, close, text or bytes :param body: string or bytes :return: None """ if self.ws and 'type' in headers: if headers['type'] == 'open': self.ready = False self.log.info("Login to language connector") self.send_payload({'type': 'login', 'api_key': self.api_key}) if headers['type'] == 'close': self.ready = False self.log.info("Closed") if headers['type'] == 'text': self.log.debug(body) if headers['type'] == 'bytes': event = msgpack.unpackb(body, raw=False) if 'type' in event: event_type = event['type'] if event_type == 'block' and 'block' in event: envelope = EventEnvelope() inner_event = envelope.from_map(event['block']) inner_headers = inner_event.get_headers() if 'id' in inner_headers and 'count' in inner_headers and 'total' in inner_headers: msg_id = inner_headers['id'] msg_count = inner_headers['count'] msg_total = inner_headers['total'] data = inner_event.get_body() if isinstance(data, bytes): buffer = self.cache.get(msg_id) if buffer is None: buffer = io.BytesIO() buffer.write(data) self.cache.put(msg_id, buffer) if msg_count == msg_total: buffer.seek(0) # reconstruct event for processing event = msgpack.unpackb(buffer.read(), raw=False) event_type = 'event' self.cache.remove(msg_id) if event_type == 'event' and 'event' in event: envelope = EventEnvelope() inner_event = envelope.from_map(event['event']) if self.platform.has_route(inner_event.get_to()): self.platform.send_event(inner_event) else: self.log.warn('Incoming event dropped because ' + str(inner_event.get_to()) + ' not found') def _outgoing(self, headers: dict, body: any): """ This function handles sending outgoing messages to the websocket connection with the Mercury language connector. It must be invoked using events. It should not be called directly to guarantee proper event sequencing. :param headers: type is close, text or bytes :param body: string or bytes :return: None """ if 'type' in headers: if headers['type'] == 'close': code = 1000 if 'code' not in headers else headers['code'] reason = 'OK' if 'reason' not in headers else headers['reason'] self.close_connection(code, reason) if headers['type'] == 'text': self._send_text(body) if headers['type'] == 'bytes': self._send_bytes(body) def _send_text(self, body: str): def send(data: str): async def async_send(d: str): await self.ws.send_str(d) self._loop.create_task(async_send(data)) if self.is_connected(): self._loop.call_soon_threadsafe(send, body) def _send_bytes(self, body: bytes): def send(data: bytes): async def async_send(d: bytes): await self.ws.send_bytes(d) self._loop.create_task(async_send(data)) if self.is_connected(): self._loop.call_soon_threadsafe(send, body) def is_connected(self): return self.started and self.ws def is_ready(self): return self.is_connected() and self.ready def start_connection(self): async def worker(): while self.normal: await self._loop.create_task( self.connection_handler(self._get_next_url())) # check again because the handler may have run for a while if self.normal: # retry connection in 5 seconds for _ in range(10): await asyncio.sleep(0.5) if not self.normal: break else: break if not self.started: self.started = True self.platform.register(self.DISTRIBUTED_TRACING, self._distributed_trace.logger, 1, is_private=True) self.platform.register(self.INCOMING_WS_PATH, self._incoming, 1, is_private=True) self.platform.register(self.OUTGOING_WS_PATH, self._outgoing, 1, is_private=True) self.platform.register(self.SYSTEM_ALERT, self._alert, 1, is_private=True) self.platform.register(self.SERVER_CONFIG, self._get_server_config, 1, is_private=True) self._loop.create_task(worker()) def close_connection(self, code, reason, stop_engine=False): async def async_close(rc, msg): if self.is_connected(): # this only send a "closing signal" to the handler - it does not actually close the connection. self.close_code = rc self.close_message = msg await self.ws.close() def closing(rc, msg): self._loop.create_task(async_close(rc, msg)) if stop_engine: self.normal = False self.cache.stop() self._loop.call_soon_threadsafe(closing, code, reason) async def connection_handler(self, url): try: async with aiohttp.ClientSession( loop=self._loop, timeout=aiohttp.ClientTimeout(total=10)) as session: full_path = url + '/' + self.origin self.ws = await session.ws_connect(full_path) envelope = EventEnvelope() envelope.set_to(self.INCOMING_WS_PATH).set_header( 'type', 'open') self.platform.send_event(envelope) self.log.info("Connected to " + full_path) closed = False self.last_active = time.time() while self.normal: try: msg = await self.ws.receive(timeout=1) except asyncio.TimeoutError: if not self.normal: break else: # idle - send keep-alive now = time.time() if self.is_connected( ) and now - self.last_active > 30: self.last_active = now self.send_keep_alive() continue # receive incoming event self.last_active = time.time() if msg.type == aiohttp.WSMsgType.TEXT: if self.platform.has_route(self.INCOMING_WS_PATH): envelope = EventEnvelope() envelope.set_to(self.INCOMING_WS_PATH).set_header( 'type', 'text').set_body(msg.data) self.platform.send_event(envelope) else: break elif msg.type == aiohttp.WSMsgType.BINARY: if self.platform.has_route(self.INCOMING_WS_PATH): envelope = EventEnvelope() envelope.set_to(self.INCOMING_WS_PATH).set_header( 'type', 'bytes').set_body(msg.data) self.platform.send_event(envelope) else: break else: if msg.type == aiohttp.WSMsgType.ERROR: self.log.error("Unexpected connection error") if msg.type == aiohttp.WSMsgType.CLOSING: # closing signal received - close the connection now self.log.info("Disconnected, status=" + str(self.close_code) + ", message=" + self.close_message) await self.ws.close(code=self.close_code, message=bytes( self.close_message, 'utf-8')) if self.platform.has_route(self.INCOMING_WS_PATH): envelope = EventEnvelope() envelope.set_to(self.INCOMING_WS_PATH).set_body(self.close_message)\ .set_header('type', 'close').set_header('status', self.close_code) self.platform.send_event(envelope) closed = True if msg.type == aiohttp.WSMsgType.CLOSE or msg.type == aiohttp.WSMsgType.CLOSED: self.close_code = 1001 if msg.data is None else msg.data self.close_message = 'OK' if msg.extra is None else str( msg.extra) self.log.info("Disconnected, status=" + str(self.close_code) + ", message=" + self.close_message) if self.platform.has_route(self.INCOMING_WS_PATH): envelope = EventEnvelope() envelope.set_to(self.INCOMING_WS_PATH).set_body(self.close_message)\ .set_header('type', 'close').set_header('status', self.close_code) self.platform.send_event(envelope) closed = True break if not closed: await self.ws.close(code=1000, message=b'OK') self.ws = None if self.platform.has_route(self.INCOMING_WS_PATH): envelope = EventEnvelope() envelope.set_to(self.INCOMING_WS_PATH).set_body('OK')\ .set_header('type', 'close').set_header('status', 1000) self.platform.send_event(envelope) except aiohttp.ClientConnectorError: self._skip_url() self.log.warn("Unreachable " + url)
class NetworkConnector: INCOMING_WS_PATH = "ws.incoming" OUTGOING_WS_PATH = "ws.outgoing" SYSTEM_ALERT = "system.alerts" SERVER_CONFIG = "system.config" MAX_PAYLOAD = "max.payload" TRACE_AGGREGATION = "trace.aggregation" DISTRIBUTED_TRACING = "distributed.tracing" CONNECTOR_LIFECYCLE = 'cloud.connector.lifecycle' # payload segmentation reserved tags (from v1.13.0 onwards) MSG_ID = '_id_' COUNT = '_blk_' TOTAL = '_max_' def __init__(self, platform, distributed_trace, loop, url_list, origin): self.platform = platform self._subscription = list() self._distributed_trace = distributed_trace self._loop = loop self.log = platform.log self.config = platform.config self.normal = True self.started = False self.ready = False self.ws = None self.close_code = 1000 self.close_message = 'OK' self.last_active = time.time() self.max_ws_payload = 32768 self.util = Utility() self.urls = self.util.multi_split(url_list, ', ') self.next_url = 1 self.origin = origin self.cache = SimpleCache(loop, self.log, timeout_seconds=30) self.api_key = self._get_api_key() def _get_api_key(self): api_key_env_var = self.config.get_property( 'language.pack.key', default_value='LANGUAGE_PACK_KEY') if api_key_env_var in os.environ: self.log.info( f'Found API key in environment variable {api_key_env_var}') return os.environ[api_key_env_var] # check temp file system because API key not in environment temp_dir = '/tmp/config' if not os.path.exists(temp_dir): os.makedirs(temp_dir, exist_ok=True) self.log.info(f'Folder {temp_dir} created') api_key_file = temp_dir + "/lang-api-key.txt" if os.path.exists(api_key_file): with open(api_key_file) as f: self.log.info(f'Reading language API key from {api_key_file}') return f.read().strip() else: with open(api_key_file, 'w') as f: self.log.info( f'Generating new language API key in {api_key_file}') value = ''.join(str(uuid.uuid4()).split('-')) f.write(value + '\n') return value def _get_next_url(self): # index starts from 1 return self.urls[self.next_url - 1] def _skip_url(self): self.next_url += 1 if self.next_url > len(self.urls): self.next_url = 1 def send_keep_alive(self): message = "Keep-Alive " + self.util.get_iso_8601(time.time(), show_ms=True) envelope = EventEnvelope() envelope.set_to(self.OUTGOING_WS_PATH).set_header( 'type', 'text').set_body(message) self.platform.send_event(envelope) def send_payload(self, data: dict): if 'type' in data and data['type'] == 'event' and 'event' in data: evt = data['event'] payload = msgpack.packb(evt, use_bin_type=True) payload_len = len(payload) if payload_len > self.max_ws_payload: msg_id = evt['id'] total = int(payload_len / self.max_ws_payload) if payload_len > total: total += 1 buffer = io.BytesIO(payload) count = 0 for i in range(total): count += 1 block = EventEnvelope() block.set_header(self.MSG_ID, msg_id) block.set_header(self.COUNT, count) block.set_header(self.TOTAL, total) block.set_body(buffer.read(self.max_ws_payload)) block_map = dict() block_map['type'] = 'block' block_map['block'] = block.to_map() block_payload = msgpack.packb(block_map, use_bin_type=True) envelope = EventEnvelope() envelope.set_to(self.OUTGOING_WS_PATH).set_header( 'type', 'bytes').set_body(block_payload) self.platform.send_event(envelope) else: relay_map = dict() relay_map['type'] = 'event' relay_map['event'] = payload envelope = EventEnvelope() envelope_payload = msgpack.packb(relay_map, use_bin_type=True) envelope.set_to(self.OUTGOING_WS_PATH).set_header( 'type', 'bytes').set_body(envelope_payload) self.platform.send_event(envelope) else: envelope = EventEnvelope() envelope_payload = msgpack.packb(data, use_bin_type=True) envelope.set_to(self.OUTGOING_WS_PATH).set_header( 'type', 'bytes').set_body(envelope_payload) self.platform.send_event(envelope) def _get_server_config(self, headers: dict, body: any): if 'type' in headers: # at this point, login is successful if headers['type'] == 'system.config' and isinstance(body, dict): if self.MAX_PAYLOAD in body: self.max_ws_payload = body[self.MAX_PAYLOAD] self.log.info('Authenticated') self._send_life_cycle_event({'type': 'authenticated'}) self.log.info( f'Automatic payload segmentation at {format(self.max_ws_payload, ",d")} bytes' ) if self.TRACE_AGGREGATION in body: self.platform.set_trace_support( body[self.TRACE_AGGREGATION]) # advertise public routes to language connector for r in self.platform.get_routes('public'): self.send_payload({'type': 'add', 'route': r}) # tell server that I am ready self.send_payload({'type': 'ready'}) # server acknowledges my ready signal if headers['type'] == 'ready': self.ready = True self.log.info('Ready') self._send_life_cycle_event({'type': 'ready'}) def subscribe_life_cycle(self, callback: str): if not isinstance(callback, str): raise ValueError('callback route name must be str') if callback not in self._subscription: self._subscription.append(callback) def unsubscribe_life_cycle(self, callback: str): if not isinstance(callback, str): raise ValueError('callback route name must be str') if callback in self._subscription: self._subscription.remove(callback) def _send_life_cycle_event(self, headers: dict): event = EventEnvelope() event.set_to(self.CONNECTOR_LIFECYCLE).set_headers(headers) self.platform.send_event(event) def _life_cycle(self, headers: dict, body: any): for subscriber in self._subscription: try: event = EventEnvelope() event.set_to(subscriber).set_headers(headers).set_body(body) self.platform.send_event(event) except ValueError as e: self.log.warn( f'Unable to relay life cycle event {headers} to {subscriber} - {e}' ) def _alert(self, headers: dict, body: any): if 'status' in headers: if headers['status'] == '200': self.log.info(str(body)) else: self.log.warn(str(body) + ", status=" + headers['status']) def _incoming(self, headers: dict, body: any): """ This function handles incoming messages from the websocket connection with the Mercury language connector. It must be invoked using events. It should not be called directly to guarantee proper event sequencing. Args: headers: type is open, close, text or bytes body: string or bytes Returns: None """ if self.ws and 'type' in headers: if headers['type'] == 'open': self.ready = False self.log.info("Login to language connector") self.send_payload({'type': 'login', 'api_key': self.api_key}) self._send_life_cycle_event(headers) if headers['type'] == 'close': self.ready = False self.log.info("Closed") self._send_life_cycle_event(headers) if headers['type'] == 'text': self.log.debug(body) if headers['type'] == 'bytes': event = msgpack.unpackb(body, raw=False) if 'type' in event: event_type = event['type'] if event_type == 'block' and 'block' in event: envelope = EventEnvelope() inner_event = envelope.from_map(event['block']) inner_headers = inner_event.get_headers() if self.MSG_ID in inner_headers and self.COUNT in inner_headers and self.TOTAL in inner_headers: msg_id = inner_headers[self.MSG_ID] msg_count = inner_headers[self.COUNT] msg_total = inner_headers[self.TOTAL] data = inner_event.get_body() if isinstance(data, bytes): buffer = self.cache.get(msg_id) if buffer is None: buffer = io.BytesIO() buffer.write(data) if msg_count == msg_total: self.cache.remove(msg_id) # reconstruct event for processing buffer.seek(0) envelope = EventEnvelope() unpacked = msgpack.unpackb(buffer.read(), raw=False) restored = envelope.from_map(unpacked) target = restored.get_to() if self.platform.has_route(target): self.platform.send_event(restored) else: self.log.warn( f'Incoming event dropped because {target} not found' ) else: self.cache.put(msg_id, buffer) if event_type == 'event' and 'event' in event: unpacked = msgpack.unpackb(event['event'], raw=False) envelope = EventEnvelope() inner_event = envelope.from_map(unpacked) if self.platform.has_route(inner_event.get_to()): self.platform.send_event(inner_event) else: self.log.warn( f'Incoming event dropped because {inner_event.get_to()} not found' ) def _outgoing(self, headers: dict, body: any): """ This function handles sending outgoing messages to the websocket connection with the Mercury language connector. It must be invoked using events. It should not be called directly to guarantee proper event sequencing. Args: headers: type is close, text or bytes body: string or bytes Returns: None """ if 'type' in headers: if headers['type'] == 'close': code = 1000 if 'code' not in headers else headers['code'] reason = 'OK' if 'reason' not in headers else headers['reason'] self.close_connection(code, reason) if headers['type'] == 'text': self._send_text(body) if headers['type'] == 'bytes': self._send_bytes(body) def _send_text(self, body: str): def send(data: str): async def async_send(d: str): await self.ws.send_str(d) self._loop.create_task(async_send(data)) if self.is_connected(): self._loop.call_soon_threadsafe(send, body) def _send_bytes(self, body: bytes): def send(data: bytes): async def async_send(d: bytes): await self.ws.send_bytes(d) self._loop.create_task(async_send(data)) if self.is_connected(): self._loop.call_soon_threadsafe(send, body) def is_connected(self): return self.started and self.ws def is_ready(self): return self.is_connected() and self.ready def start_connection(self): async def worker(): while self.normal: await self._loop.create_task( self.connection_handler(self._get_next_url())) # check again because the handler may have run for a while if self.normal: # retry connection in 5 seconds for _ in range(10): await asyncio.sleep(0.5) if not self.normal: break else: break if not self.started: self.started = True self.platform.register(self.DISTRIBUTED_TRACING, self._distributed_trace.logger, 1, is_private=True) self.platform.register(self.INCOMING_WS_PATH, self._incoming, 1, is_private=True) self.platform.register(self.OUTGOING_WS_PATH, self._outgoing, 1, is_private=True) self.platform.register(self.SYSTEM_ALERT, self._alert, 1, is_private=True) self.platform.register(self.SERVER_CONFIG, self._get_server_config, 1, is_private=True) self.platform.register(self.CONNECTOR_LIFECYCLE, self._life_cycle, 1, is_private=True) self._loop.create_task(worker()) def close_connection(self, code, reason, stop_engine=False): async def async_close(rc, msg): if self.is_connected(): # this only send a "closing signal" to the handler - it does not actually close the connection. self.close_code = rc self.close_message = msg await self.ws.close() def closing(rc, msg): self._loop.create_task(async_close(rc, msg)) if stop_engine: self.normal = False self.cache.stop() self._loop.call_soon_threadsafe(closing, code, reason) async def connection_handler(self, url): try: async with aiohttp.ClientSession( loop=self._loop, timeout=aiohttp.ClientTimeout(total=10)) as session: full_path = f'{url}/{self.origin}' self.ws = await session.ws_connect(full_path) envelope = EventEnvelope() envelope.set_to(self.INCOMING_WS_PATH).set_header( 'type', 'open').set_header('url', full_path) self.platform.send_event(envelope) self.log.info(f'Connected to {full_path}') closed = False self.last_active = time.time() while self.normal: try: msg = await self.ws.receive(timeout=1) except asyncio.TimeoutError: if not self.normal: break else: # idle - send keep-alive now = time.time() if self.is_connected( ) and now - self.last_active > 30: self.last_active = now self.send_keep_alive() continue # receive incoming event self.last_active = time.time() if msg.type == aiohttp.WSMsgType.TEXT: if self.platform.has_route(self.INCOMING_WS_PATH): envelope = EventEnvelope() envelope.set_to(self.INCOMING_WS_PATH).set_header( 'type', 'text').set_body(msg.data) self.platform.send_event(envelope) else: break elif msg.type == aiohttp.WSMsgType.BINARY: if self.platform.has_route(self.INCOMING_WS_PATH): envelope = EventEnvelope() envelope.set_to(self.INCOMING_WS_PATH).set_header( 'type', 'bytes').set_body(msg.data) self.platform.send_event(envelope) else: break else: if msg.type == aiohttp.WSMsgType.ERROR: self.log.error('Unexpected connection error') if msg.type == aiohttp.WSMsgType.CLOSING: # closing signal received - close the connection now self.log.info( f'Disconnected, status={self.close_code}, message={self.close_message}' ) await self.ws.close(code=self.close_code, message=bytes( self.close_message, 'utf-8')) if self.platform.has_route(self.INCOMING_WS_PATH): envelope = EventEnvelope() envelope.set_to(self.INCOMING_WS_PATH).set_body(self.close_message)\ .set_header('type', 'close').set_header('status', self.close_code) self.platform.send_event(envelope) closed = True if msg.type == aiohttp.WSMsgType.CLOSE or msg.type == aiohttp.WSMsgType.CLOSED: self.close_code = 1001 if msg.data is None else msg.data self.close_message = 'OK' if msg.extra is None else str( msg.extra) self.log.info( f'Disconnected, status={self.close_code}, message={self.close_message}' ) if self.platform.has_route(self.INCOMING_WS_PATH): envelope = EventEnvelope() envelope.set_to(self.INCOMING_WS_PATH).set_header('message', self.close_message) \ .set_header('type', 'close').set_header('status', self.close_code) self.platform.send_event(envelope) closed = True break if not closed: await self.ws.close(code=1000, message=b'OK') self.ws = None if self.platform.has_route(self.INCOMING_WS_PATH): envelope = EventEnvelope() envelope.set_to(self.INCOMING_WS_PATH).set_body('OK')\ .set_header('type', 'close').set_header('status', 1000) self.platform.send_event(envelope) except aiohttp.ClientConnectorError: self._skip_url() self.log.warn(f'Unreachable {url}')