def __init__(self, args_dict): self._POLL_MAX_NUM_MESSAGES = 500 self._POLL_TIMEOUT = py3utils.ensure_long(10000) self._set_args(args_dict) self._prefix = self._sandboxid + "-" + self._workflowid + "-" self._wf_local = {} self._setup_loggers() self._state_utils = StateUtils( self._function_state_type, self._function_state_name, self._function_state_info, self._function_runtime, self._logger, self._workflowid, self._sandboxid, self._function_topic, self._datalayer, self._storage_userid, self._internal_endpoint) # check the runtime if self._function_runtime == "java": self._api_thrift = thriftpy2.load( "/opt/mfn/FunctionWorker/MicroFunctionsAPI.thrift", module_name="mfnapi_thrift") elif self._function_runtime == "python 3.6": # if it is python, load the user code sys.path.insert(1, self._function_folder) if self._state_utils.isTaskState(): try: prevdir = os.path.dirname(__file__) #self._logger.debug("dir of functionworker before importing user code: " + prevdir) os.chdir(self._function_folder) # FIXME: need to fix this part for python3 compatibility self.code = imp.load_source(self._function_name, self._function_path) os.chdir(prevdir) #curdir = os.path.dirname(__file__) #self._logger.debug("dir of functionworker after importing user code: " + curdir) except Exception as exc: self._logger.exception("Exception loading user code: %s", str(exc)) sys.stdout.flush() os._exit(1) # for retrieving new messages self.local_queue_client = LocalQueueClient(connect=self._queue) # for storing (key, pid) tuples on the data layer # keyspace: hostname + "InstancePidMap" # tablename: topic with "_" as separator # entries := (key, pid) #self._map_name_key_pid = "KeyPidMap_" + self._hostname + "_" + self._function_topic.replace("-", "_").replace(".", "_") #self.local_data_layer_client = DataLayerClient(locality=0, sid=self._sandboxid, for_mfn=True, connect=self._datalayer) signal(SIGCHLD, SIG_IGN) # do this once rather than at every forked process if self._state_utils.isTaskState(): os.chdir(self._function_folder) self._is_running = False
def run(self): self._is_running = True max_num_messages = 1000 # initially, it is the heartbeat_interval poll_timeout = self._local_poll_timeout if self._heartbeat_enabled: t_cur = time.time() * 1000.0 self._send_heartbeat() last_heartbeat_time = t_cur # _XXX_: our location is stored as part of our metadata # so that the remote functions can # look it up and send their message via that that location # first, create local topic self._local_queue_client.addTopic(self._local_topic_communication) while self._is_running: #self._logger.debug("[SessionHelperThread] polling new session update messages...") # wait until the polling interval finishes # the polling interval depends on the heartbeat interval and when we actually receive a message # if we get a message before, then update the polling interval as (heartbeat_interval - passed_time) lqm_list = self._local_queue_client.getMultipleMessages(self._local_topic_communication, max_num_messages, poll_timeout) # double check we are still running # if the long-running function finished while we were polling, no need to send another heartbeat if not self._is_running: break num = len(lqm_list) for i in range(num): lqm = lqm_list[i] if lqm is not None: self._process_message(lqm) if self._heartbeat_enabled: # send heartbeat # this is part of the message loop, such that we can have a more precise heartbeat # if it was only after the message loop, then there is a corner case, where the # processing of the messages would take more than the heartbeat interval, # meaning we would miss our deadline t_cur = time.time() * 1000.0 if (t_cur - last_heartbeat_time) >= self._heartbeat_interval: self._send_heartbeat() last_heartbeat_time = t_cur if self._heartbeat_enabled: # send heartbeat # even if there are no messages, we might need to send a heartbeat t_cur = time.time() * 1000.0 if (t_cur - last_heartbeat_time) >= self._heartbeat_interval: self._send_heartbeat() last_heartbeat_time = t_cur # update the poll time # if we sent a heartbeat recently, last_heartbeat and t_cur will cancel each other out poll_timeout = py3utils.ensure_long(last_heartbeat_time + self._local_poll_timeout - t_cur) #self._logger.debug("updated poll timeout: " + str(poll_timeout)) self._cleanup()
def __init__(self, helper_params, logger, pubutils, sessutils, queueservice, datalayer): self._logger = logger #self._logger.debug("[SessionHelperThread] " + str(helper_params)) self._publication_utils = pubutils self._session_utils = sessutils self._queue_service = queueservice self._datalayer = datalayer self._sandboxid = helper_params["sandboxid"] self._workflowid = helper_params["workflowid"] self._session_function_id = helper_params["session_function_id"] self._session_id = helper_params["session_id"] # set up heartbeat parameters self._heartbeat_enabled = False self._heartbeat_method = None # our own local queue client to be used when sending a heartbeat # TODO: double check if we can just reuse the one we're polling # probably yes self._local_queue_client_heartbeat = None self._heartbeat_function = None self._heartbeat_data_layer_key = None self._data_layer_client_heartbeat = None self._init_heartbeat_parameters(helper_params["heartbeat_parameters"]) # set up communication parameters self._communication_params = helper_params["communication_parameters"] # similar to the data layer rendezvous point for message delivery, we listen to a local topic # allowing us to queue messages and deliver multiple messages to the session function if desired self._local_topic_communication = self._communication_params[ "local_topic_communication"] # by default, assign a simple poll timeout # if the heartbeat is specified, it will be updated to the heartbeat to ensure # we can send regular heartbeats self._local_poll_timeout = py3utils.ensure_long(10000) # use a queue to keep the incoming update messages for blocking and/or blocking get_update_messages() requests self._message_queue = queue.Queue() self._local_queue_client = LocalQueueClient( connect=self._queue_service) self._special_messages = {} self._special_messages["--stop"] = True self._special_messages["--update-heartbeat"] = True self._is_running = False #self._logger.debug("[SessionHelperThread] init done.") threading.Thread.__init__(self)
def run(self): self._is_running = True # initially, it is the heartbeat_interval / 2 poll_timeout = self._local_poll_timeout if self._heartbeat_enabled: t_cur = time.time() * 1000.0 self._send_heartbeat() last_heartbeat_time = t_cur # _XXX_: our location is stored as part of our metadata # so that the remote functions can # look it up and send their message via that that location # first, create local topic self._local_queue_client.addTopic(self._local_topic_communication) while self._is_running: #self._logger.debug("[SessionHelperThread] polling new session update messages...") # wait until the polling interval finishes # the polling interval depends on the heartbeat interval and when we actually receive a message # if we get a message before, then update the polling interval as (heartbeat_interval - passed_time) lqm = self._local_queue_client.getMessage( self._local_topic_communication, poll_timeout) # double check we are still running # if the long-running function finished while we were polling, no need to send another heartbeat if not self._is_running: break if lqm is not None: self._process_message(lqm) if self._heartbeat_enabled: # send heartbeat # even if there are no messages, we might need to send a heartbeat t_cur = time.time() * 1000.0 if (t_cur - last_heartbeat_time) >= self._heartbeat_interval: self._send_heartbeat() last_heartbeat_time = t_cur # update the poll time # if we sent a heartbeat recently, last_heartbeat and t_cur will cancel each other out poll_timeout = py3utils.ensure_long(last_heartbeat_time + self._local_poll_timeout - t_cur) #self._logger.debug("updated poll timeout: " + str(poll_timeout)) if poll_timeout <= 0: # we just missed a deadline; send a heartbeat right away t_cur = time.time() * 1000.0 self._send_heartbeat() last_heartbeat_time = t_cur # reset the poll timeout accordingly poll_timeout = self._local_poll_timeout #self._logger.debug("updated poll timeout (after missing deadline): " + str(poll_timeout)) self._cleanup()
import logging_helpers import process_utils sys.path.insert(1, os.path.join(sys.path[0], '../FunctionWorker/python')) import py3utils from DataLayerClient import DataLayerClient from LocalQueueClient import LocalQueueClient from LocalQueueClientMessage import LocalQueueClientMessage LOG_FILENAME = '/opt/mfn/logs/sandboxagent.log' FLUENTBIT_FOLDER = '/opt/mfn/LoggingService/fluent-bit' # this a symbolic link to the actual fluent-bit folder location inside the sandbox container ELASTICSEARCH_INDEX_WF = 'mfnwf' ELASTICSEARCH_INDEX_FE = 'mfnfe' POLL_TIMEOUT = py3utils.ensure_long(60000) #ELASTICSEARCH_INDEX = 'wf' # index name will be: 'wf' + [the first character of the workflow name (in lower case)] class SandboxAgent: def __init__(self, hostname, queue, datalayer, sandboxid, userid, workflowid, elasticsearch, workflowname, endpoint_key): self._start = time.time() self._python_version = sys.version_info self._hostname = hostname self._queue = queue self._datalayer = datalayer self._elasticsearch = elasticsearch
def __init__(self, helper_params, logger, pubutils, sessutils, queue, datalayer): self._logger = logger #self._logger.debug("[SessionHelperThread] " + str(helper_params)) self._publication_utils = pubutils self._session_utils = sessutils self._queue = queue self._datalayer = datalayer self._sandboxid = helper_params["sandboxid"] self._workflowid = helper_params["workflowid"] self._session_function_id = helper_params["session_function_id"] self._session_id = helper_params["session_id"] # initialize only needed # need a separate backup data layer client from the publication utils; otherwise, we run into concurrent modification # problems from Thrift # locality = -1 means that the writes happen to the local data layer first and then asynchronously to the global data layer self._backup_data_layer_client = DataLayerClient( locality=-1, for_mfn=True, sid=self._sandboxid, connect=self._datalayer) # set up heartbeat parameters self._heartbeat_enabled = False self._heartbeat_method = None # our own local queue client to be used when sending a heartbeat # TODO: double check if we can just reuse the one we're polling # probably yes self._local_queue_client_heartbeat = None self._heartbeat_function = None self._heartbeat_data_layer_key = None self._data_layer_client_heartbeat = None self._init_heartbeat_parameters(helper_params["heartbeat_parameters"]) # set up communication parameters self._communication_params = helper_params["communication_parameters"] # similar to the data layer rendezvous point for message delivery, we listen to a local topic # allowing us to queue messages and deliver multiple messages to the session function if desired self._local_topic_communication = self._communication_params[ "local_topic_communication"] # by default, assign a simple poll timeout # if the heartbeat is specified, it will be updated to the heartbeat to ensure # we can send regular heartbeats self._local_poll_timeout = py3utils.ensure_long(10000) # use a deque to keep the list of messages # updating the list and retrieving the list would be done by two threads # this should be safe without lock because of the global interpreter lock in python self._message_queue = deque() self._local_queue_client = LocalQueueClient(connect=self._queue) self._special_messages = {} self._special_messages["--stop"] = True self._special_messages["--update-heartbeat"] = True self._is_running = False #self._logger.debug("[SessionHelperThread] init done.") threading.Thread.__init__(self)