def __init__(self, context, address, receive_queue): self._log = logging.getLogger("ResilientServer-%s" % (address, )) self._context = context self._rep_socket = context.socket(zmq.REP) self._rep_socket.setsockopt(zmq.LINGER, 1000) # a server can bind to multiple zeromq addresses if type(address) in [list, tuple, ]: addresses = address else: addresses = [address, ] for bind_address in addresses: # we need a valid path for IPC sockets if bind_address.startswith("ipc://"): prepare_ipc_path(bind_address) self._log.debug("binding to %s" % (bind_address, )) self._rep_socket.bind(bind_address) self._receive_queue = receive_queue self._dispatch_table = { "ping" : \ self._handle_ping, "resilient-server-handshake" : \ self._handle_resilient_server_handshake, "resilient-server-signoff" : \ self._handle_resilient_server_signoff, } self._active_clients = dict()
def _create_sub_socket(zeromq_context): log = logging.getLogger("_create_sub_socket") if _skeeter_pub_socket_uri.startswith("ipc://"): prepare_ipc_path(_skeeter_pub_socket_uri) sub_socket = zeromq_context.socket(zmq.SUB) log.info("connecting to {0}".format(_skeeter_pub_socket_uri)) sub_socket.connect(_skeeter_pub_socket_uri) log.info("subscribing to {0}".format(_cache_update_channel)) sub_socket.setsockopt(zmq.SUBSCRIBE, _cache_update_channel) return sub_socket
def __init__(self, context, address): self._log = logging.getLogger("PUBServer-%s" % (address, )) # we need a valid path for IPC sockets if address.startswith("ipc://"): prepare_ipc_path(address) self._pub_socket = context.socket(zmq.PUB) self._pub_socket.setsockopt(zmq.LINGER, 1000) self._log.debug("binding") self._pub_socket.bind(address)
def _create_sub_socket(zeromq_context): log = logging.getLogger("_create_pub_socket") if _skeeter_pub_socket_uri.startswith("ipc://"): prepare_ipc_path(_skeeter_pub_socket_uri) sub_socket = zeromq_context.socket(zmq.SUB) log.info("connecting to {0}".format(_skeeter_pub_socket_uri)) sub_socket.connect(_skeeter_pub_socket_uri) log.info("subscribing to {0}".format(_cache_update_channel)) sub_socket.setsockopt(zmq.SUBSCRIBE, _cache_update_channel) return sub_socket
def __init__(self, context, address, receive_queue): self._log = logging.getLogger("PULLServer-%s" % (address, )) # we need a valid path for IPC sockets if address.startswith("ipc://"): prepare_ipc_path(address) self._pull_socket = context.socket(zmq.PULL) self._log.debug("binding") self._pull_socket.bind(address) self._receive_queue = receive_queue
def _bind_rep_socket(zeromq_context): log = logging.getLogger("_bind_rep_socket") # we need a valid path for IPC sockets if _zfec_server_address.startswith("ipc://"): prepare_ipc_path(_zfec_server_address) rep_socket = zeromq_context.socket(zmq.REP) rep_socket.setsockopt(zmq.LINGER, 1000) log.info("binding to {0}".format(_zfec_server_address)) rep_socket.bind(_zfec_server_address) return rep_socket
def __init__(self, context, address, receive_queue): self._log = logging.getLogger("REPServer-{0}".format(address)) # we need a valid path for IPC sockets if address.startswith("ipc://"): prepare_ipc_path(address) self._rep_socket = context.socket(zmq.REP) self._rep_socket.setsockopt(zmq.LINGER, 1000) self._log.debug("binding") self._rep_socket.bind(address) self._receive_queue = receive_queue
def __init__(self, context, address, reply_function): self._log = logging.getLogger("ReplyPULLServer-%s" % (address, )) # we need a valid path for IPC sockets if address.startswith("ipc://"): prepare_ipc_path(address) self._pull_socket = context.socket(zmq.PULL) self._pull_socket.setsockopt(zmq.RCVHWM, _pull_hwm) self._log.debug("binding") self._pull_socket.bind(address) self._reply_function = reply_function
def __init__(self, context, address, receive_queue): self._log = logging.getLogger("RouterServer-%s" % (address, )) # we need a valid path for IPC sockets if address.startswith("ipc://"): prepare_ipc_path(address) self._router_socket = context.socket(zmq.XREP) self._router_socket.setsockopt(zmq.LINGER, 1000) self._log.debug("binding") self._router_socket.bind(address) self._send_queue = deque() self._receive_queue = receive_queue
def __init__(self, context, address, deliverator): Greenlet.__init__(self) self._log = logging.getLogger("PULLServer-%s" % (address, )) # we need a valid path for IPC sockets if address.startswith("ipc://"): prepare_ipc_path(address) self._pull_socket = context.socket(zmq.PULL) self._log.debug("binding") self._pull_socket.bind(address) self._deliverator = deliverator
def __init__(self, zmq_context, address, client_tag, client_address, halt_event): self._name = address self._log = logging.getLogger(self._name) self._socket = zmq_context.socket(zmq.REQ) # we need a valid path for IPC sockets if address.startswith("ipc://"): prepare_ipc_path(address) self._log.info("connecting to {0}".format(address)) self._socket.connect(address) self._client_tag = client_tag self._client_address = client_address self._halt_event = halt_event
def __init__(self, context, address, receive_queue): self._log = logging.getLogger("ResilientServer-%s" % (address, )) self._context = context self._rep_socket = context.socket(zmq.REP) self._rep_socket.setsockopt(zmq.LINGER, 1000) # a server can bind to multiple zeromq addresses if type(address) in [ list, tuple, ]: addresses = address else: addresses = [ address, ] for bind_address in addresses: # we need a valid path for IPC sockets if bind_address.startswith("ipc://"): prepare_ipc_path(bind_address) self._log.debug("binding to %s" % (bind_address, )) self._rep_socket.bind(bind_address) self._receive_queue = receive_queue self._dispatch_table = { "ping" : \ self._handle_ping, "resilient-server-handshake" : \ self._handle_resilient_server_handshake, "resilient-server-signoff" : \ self._handle_resilient_server_signoff, } self._active_clients = dict()
def main(): """ main entry point returns 0 for normal termination (usually SIGTERM) """ return_value = 0 log_path = _log_path_template.format(os.environ["NIMBUSIO_LOG_DIR"], _local_node_name) initialize_logging(log_path) log = logging.getLogger("main") log.info("program starts") for internal_socket_uri in internal_socket_uri_list: prepare_ipc_path(internal_socket_uri) halt_event = Event() set_signal_handler(halt_event) database_pool_controller = _launch_database_pool_controller() io_controller = _launch_io_controller() zeromq_context = zmq.Context() rep_socket = _bind_rep_socket(zeromq_context) db_controller_push_socket = \ _connect_db_controller_push_socket(zeromq_context) event_push_client = EventPushClient(zeromq_context, "retrieve_source") event_push_client.info("program-starts", "retrieve source starts") # we poll the sockets for readability, we assume we can always # write to the push client sockets poller = zmq.Poller() poller.register(rep_socket, zmq.POLLIN | zmq.POLLERR) last_report_time = 0.0 request_count = 0 try: while not halt_event.is_set(): poll_subprocess(database_pool_controller) poll_subprocess(io_controller) # we've only registered one socket, so we could use an 'if' here, # but this 'for' works ok and it has the same form as the other # places where we use poller for active_socket, event_flags in poller.poll(_poll_timeout): if event_flags & zmq.POLLERR: error_message = \ "error flags from zmq {0}".format(active_socket) log.error(error_message) raise PollError(error_message) assert active_socket is rep_socket _process_one_request(rep_socket, db_controller_push_socket) request_count += 1 current_time = time.time() elapsed_time = current_time - last_report_time if elapsed_time > _reporting_interval: report_message = "{0:,} requests".format(request_count) log.info(report_message) event_push_client.info("request_count", report_message, request_count=request_count) last_report_time = current_time request_count = 0 except KeyboardInterrupt: # convenience for testing log.info("keyboard interrupt: terminating normally") except zmq.ZMQError as zmq_error: if is_interrupted_system_call(zmq_error) and halt_event.is_set(): log.info("program teminates normally with interrupted system call") else: log.exception("zeromq error processing request") event_push_client.exception(unhandled_exception_topic, "zeromq_error", exctype="ZMQError") return_value = 1 except Exception as instance: log.exception("error processing request") event_push_client.exception(unhandled_exception_topic, str(instance), exctype=instance.__class__.__name__) return_value = 1 else: log.info("program teminates normally") finally: terminate_subprocess(database_pool_controller) terminate_subprocess(io_controller) rep_socket.close() db_controller_push_socket.close() event_push_client.close() zeromq_context.term() return return_value
def main(): """ main entry point returns 0 for normal termination (usually SIGTERM) """ return_value = 0 log_path = _log_path_template.format(os.environ["NIMBUSIO_LOG_DIR"], _local_node_name) initialize_logging(log_path) log = logging.getLogger("main") log.info("program starts") prepare_ipc_path(_pull_socket_uri) halt_event = Event() set_signal_handler(halt_event) zeromq_context = zmq.Context() pull_socket = _bind_pull_socket(zeromq_context) event_push_client = EventPushClient(zeromq_context, "service_availability") event_push_client.info("program-starts", "service availability monitor starts") message_count = 0 try: ping_process_dict = _start_ping_processes(halt_event) while not halt_event.is_set(): if message_count % len(ping_process_dict) == 0: for ping_process in ping_process_dict.values(): poll_subprocess(ping_process.process) message = pull_socket.recv_pyobj() assert not pull_socket.rcvmore _process_one_message(message, ping_process_dict, event_push_client) message_count += 1 except KeyboardInterrupt: # convenience for testing log.info("keyboard interrupt: terminating normally") except zmq.ZMQError as zmq_error: if is_interrupted_system_call(zmq_error) and halt_event.is_set(): log.info("program terminating normally; interrupted system call") else: log.exception("zeromq error processing request") event_push_client.exception(unhandled_exception_topic, "zeromq_error", exctype="ZMQError") return_value = 1 except Exception as instance: log.exception("error processing request") event_push_client.exception(unhandled_exception_topic, str(instance), exctype=instance.__class__.__name__) return_value = 1 else: log.info("program teminating normally") log.debug("terminating subprocesses") _terminate_ping_processes(ping_process_dict) pull_socket.close() event_push_client.close() zeromq_context.term() return return_value
def main(): """ main entry point returns 0 for normal termination (usually SIGTERM) """ return_value = 0 log_path = _log_path_template.format(os.environ["NIMBUSIO_LOG_DIR"], _local_node_name) initialize_logging(log_path) log = logging.getLogger("main") log.info("program starts") for internal_socket_uri in internal_socket_uri_list: prepare_ipc_path(internal_socket_uri) halt_event = Event() set_signal_handler(halt_event) database_pool_controller = _launch_database_pool_controller() io_controller = _launch_io_controller() zeromq_context = zmq.Context() rep_socket = _bind_rep_socket(zeromq_context) db_controller_push_socket = _connect_db_controller_push_socket(zeromq_context) event_push_client = EventPushClient(zeromq_context, "retrieve_source") event_push_client.info("program-starts", "retrieve source starts") # we poll the sockets for readability, we assume we can always # write to the push client sockets poller = zmq.Poller() poller.register(rep_socket, zmq.POLLIN | zmq.POLLERR) last_report_time = 0.0 request_count = 0 try: while not halt_event.is_set(): poll_subprocess(database_pool_controller) poll_subprocess(io_controller) # we've only registered one socket, so we could use an 'if' here, # but this 'for' works ok and it has the same form as the other # places where we use poller for active_socket, event_flags in poller.poll(_poll_timeout): if event_flags & zmq.POLLERR: error_message = "error flags from zmq {0}".format(active_socket) log.error(error_message) raise PollError(error_message) assert active_socket is rep_socket _process_one_request(rep_socket, db_controller_push_socket) request_count += 1 current_time = time.time() elapsed_time = current_time - last_report_time if elapsed_time > _reporting_interval: report_message = "{0:,} requests".format(request_count) log.info(report_message) event_push_client.info("request_count", report_message, request_count=request_count) last_report_time = current_time request_count = 0 except KeyboardInterrupt: # convenience for testing log.info("keyboard interrupt: terminating normally") except zmq.ZMQError as zmq_error: if is_interrupted_system_call(zmq_error) and halt_event.is_set(): log.info("program teminates normally with interrupted system call") else: log.exception("zeromq error processing request") event_push_client.exception(unhandled_exception_topic, "zeromq_error", exctype="ZMQError") return_value = 1 except Exception as instance: log.exception("error processing request") event_push_client.exception(unhandled_exception_topic, str(instance), exctype=instance.__class__.__name__) return_value = 1 else: log.info("program teminates normally") finally: terminate_subprocess(database_pool_controller) terminate_subprocess(io_controller) rep_socket.close() db_controller_push_socket.close() event_push_client.close() zeromq_context.term() return return_value
def process_segment_rows(halt_event, zeromq_context, args, node_dict, node_databases, raw_segment_rows): """ process handoffs of segment rows """ log = logging.getLogger("process_segment_rows") rep_socket_uri = ipc_socket_uri(_socket_dir, args.node_name, "handoff_client") prepare_ipc_path(rep_socket_uri) rep_socket = zeromq_context.socket(zmq.REP) rep_socket.setsockopt(zmq.SNDHWM, _socket_high_water_mark) rep_socket.setsockopt(zmq.RCVHWM, _socket_high_water_mark) log.info("binding rep socket to {0}".format(rep_socket_uri)) rep_socket.bind(rep_socket_uri) log.debug("starting workers") workers = list() for index in range(args.worker_count): worker_id = str(index + 1) workers.append(_start_worker_process(worker_id, args, rep_socket_uri)) # loop until all handoffs have been accomplished log.debug("start handoffs") work_generator = _generate_segment_rows(raw_segment_rows) pending_handoff_count = 0 while not halt_event.is_set(): # get a segment row to process. If we are at EOF, segment_row = None try: source_node_names, segment_row = next(work_generator) except StopIteration: if pending_handoff_count == 0: break else: source_node_names, segment_row = None, None # if we have a segment row, and it is a tombstone, we can act # directly on the node database(s) without sending it to a worker if segment_row is not None: if segment_row["status"] == segment_status_tombstone: _process_tombstone(node_databases, source_node_names, segment_row) _purge_handoff_from_source_nodes( node_databases, source_node_names, segment_row["collection_id"], segment_row["key"], segment_row["unified_id"], segment_row["conjoined_part"], segment_row["handoff_node_id"], segment_status_tombstone) continue assert segment_row["status"] == segment_status_final, \ segment_row["status"] # at this point we eaither have a segment row in final status, or # None, indicating no more data # block until we have a ready worker try: request = rep_socket.recv_pyobj() except zmq.ZMQError as zmq_error: if is_interrupted_system_call(zmq_error) and halt_event.is_set(): log.warn("breaking due to halt_event") break raise assert not rep_socket.rcvmore # see how the worker handled the previous segment (if any) initial_request = False if request["message-type"] == "start": log.info("{0} initial request".format(request["worker-id"])) initial_request = True elif request["handoff-successful"]: log.info("{0} handoff ({1}, {2}) successful".format( request["worker-id"], request["unified-id"], request["conjoined-part"])) assert pending_handoff_count > 0 pending_handoff_count -= 1 _purge_handoff_from_source_nodes( node_databases, request["source-node-names"], request["collection-id"], request["key"], request["unified-id"], request["conjoined-part"], request["handoff-node-id"], segment_status_final) else: log.error("{0} handoff ({1}, {2}) failed: {3}".format( request["worker-id"], request["unified-id"], request["conjoined-part"], request["error-message"])) assert pending_handoff_count > 0 pending_handoff_count -= 1 if segment_row is None: # if we have no more work, tell the worker to stop work_message = {"message-type": "stop"} else: # otherwise, send the segment to the worker work_message = { "message-type": "work", "source-node-names": source_node_names, "segment-row": segment_row } # if this is the worker's first request, send him the node_dict if initial_request: work_message["node-dict"] = node_dict pending_handoff_count += 1 rep_socket.send_pyobj(work_message) log.debug("end of handoffs") for worker in workers: terminate_subprocess(worker) rep_socket.close()
def process_segment_rows(halt_event, zeromq_context, args, node_dict, node_databases, raw_segment_rows): """ process handoffs of segment rows """ log = logging.getLogger("process_segment_rows") rep_socket_uri = ipc_socket_uri(_socket_dir, args.node_name, "handoff_client") prepare_ipc_path(rep_socket_uri) rep_socket = zeromq_context.socket(zmq.REP) rep_socket.setsockopt(zmq.SNDHWM, _socket_high_water_mark) rep_socket.setsockopt(zmq.RCVHWM, _socket_high_water_mark) log.info("binding rep socket to {0}".format(rep_socket_uri)) rep_socket.bind(rep_socket_uri) log.debug("starting workers") workers = list() for index in range(args.worker_count): worker_id = str(index+1) workers.append(_start_worker_process(worker_id, args, rep_socket_uri)) # loop until all handoffs have been accomplished log.debug("start handoffs") work_generator = _generate_segment_rows(raw_segment_rows) pending_handoff_count = 0 while not halt_event.is_set(): # get a segment row to process. If we are at EOF, segment_row = None try: source_node_names, segment_row = next(work_generator) except StopIteration: if pending_handoff_count == 0: break else: source_node_names, segment_row = None, None # if we have a segment row, and it is a tombstone, we can act # directly on the node database(s) without sending it to a worker if segment_row is not None: if segment_row["status"] == segment_status_tombstone: _process_tombstone(node_databases, source_node_names, segment_row) _purge_handoff_from_source_nodes(node_databases, source_node_names, segment_row["collection_id"], segment_row["key"], segment_row["unified_id"], segment_row["conjoined_part"], segment_row["handoff_node_id"], segment_status_tombstone) continue assert segment_row["status"] == segment_status_final, \ segment_row["status"] # at this point we eaither have a segment row in final status, or # None, indicating no more data # block until we have a ready worker try: request = rep_socket.recv_pyobj() except zmq.ZMQError as zmq_error: if is_interrupted_system_call(zmq_error) and halt_event.is_set(): log.warn("breaking due to halt_event") break raise assert not rep_socket.rcvmore # see how the worker handled the previous segment (if any) initial_request = False if request["message-type"] == "start": log.info("{0} initial request".format(request["worker-id"])) initial_request = True elif request["handoff-successful"]: log.info("{0} handoff ({1}, {2}) successful".format( request["worker-id"], request["unified-id"], request["conjoined-part"])) assert pending_handoff_count > 0 pending_handoff_count -= 1 _purge_handoff_from_source_nodes(node_databases, request["source-node-names"], request["collection-id"], request["key"], request["unified-id"], request["conjoined-part"], request["handoff-node-id"], segment_status_final) else: log.error("{0} handoff ({1}, {2}) failed: {3}".format( request["worker-id"], request["unified-id"], request["conjoined-part"], request["error-message"])) assert pending_handoff_count > 0 pending_handoff_count -= 1 if segment_row is None: # if we have no more work, tell the worker to stop work_message = {"message-type" : "stop"} else: # otherwise, send the segment to the worker work_message = {"message-type" : "work", "source-node-names" : source_node_names, "segment-row" : segment_row} # if this is the worker's first request, send him the node_dict if initial_request: work_message["node-dict"] = node_dict pending_handoff_count += 1 rep_socket.send_pyobj(work_message) log.debug("end of handoffs") for worker in workers: terminate_subprocess(worker) rep_socket.close()