def publishPoint(self, point): streams = ( "%s.%s" % (self.prefix, point["pointname"]), "%s.%s.multipart" % (self.prefix, point["pointname"]), "%s.allpoints" % (self.prefix), "%s.allpoints.multipart" % (self.prefix)) streams = [str(i) for i in streams] for stream in streams: if stream not in self.streams: socket = zmq.Context().socket(zmq.PUB) socket.setsockopt(HIGHWATER, 15000) port = socket.bind_to_random_port("tcp://%s" % self.ip) self.streams[stream] = socket if "multipart" in stream: try: self.ns.publishService(stream, "tcp://%s:%s" % (self.ip, port), self.publishTimeout, "pub/sub", "ZmqMultipartPoint") except nnslib.NameServerException, e: if str(e) == "ZmqMultipartPoint is an unknown data type": self.ns.addDataType("ZmqMultipartPoint", "ZmqMultipart", "host,time,pointname,val,unit", "") else: raise else: try: self.ns.publishService(stream, "tcp://%s:%s" % (self.ip, port), self.publishTimeout, "pub/sub", "Point") except nnslib.NameServerException, e: if str(e) == "Point is an unknown data type": self.ns.addDataType("Point", "JSON", "host,time,pointname,val,unit", "") else: raise
def server(pid, ep): """ Worker for listening socket for communication with web server. Parameters ---------- pid: int Process id. Used to send SIGINT. ep: mp.Pipe endpoint. Used to communicate with model processor. """ context = zmq.Context() socket = context.socket(zmq.REP) port = socket.bind_to_random_port("tcp://0.0.0.0") ep.send(port) def sig_handler(signum, frame): socket.close() return signal.signal(signal.SIGTERM, sig_handler) while True: message = socket.recv() if message == "KILL": socket.send("OK") socket.close() ep.send(None) p = psutil.Process(pid) p.send_signal(signal.SIGINT) return
def wait_for_ack(ip=local_ip, port=None, socket_type=zmq.PAIR, **additional_info): socket = ctx.socket(socket_type) poller = zmq.Poller() poller.register(socket, zmq.POLLIN) if not ip: ip = get_local_ip() if not port: port = socket.bind_to_random_port('tcp://%s' % ip) else: socket.bind('tcp://%s:%s' % (ip, port)) def _(timeout=3 * 1000): if poller.poll(timeout): m = socket.recv_json() socket.close() assert 'act' in m assert 'ack' == m['act'] for k, v in additional_info.items(): if k not in m: return False if v != m[k]: return False return True return port, _
def _create_and_bind_socket(self, type): """Create and bind a socket of the specified type. Returns the ZMQStream and endpoint address. """ socket = self._zmq_ctx.socket(type) port = socket.bind_to_random_port('tcp://{0}'.format(self.address)) return ZMQStream(socket, self._io_loop), 'tcp://{0}:{1}'.format(self._server_address, port)
def bind_port(socket, ip, port): """ Binds the specified ZMQ socket. If the port is zero, a random port is chosen. Returns the port that was bound. """ connection = 'tcp://%s' % ip if port <= 0: port = socket.bind_to_random_port(connection) else: connection += ':%i' % port socket.bind(connection) return port
def server_loop(devnum): logger.info("server loop starts") context = zmq.Context() socket = context.socket(zmq.REP) selected_port = socket.bind_to_random_port("tcp://*") _global_port[0] = selected_port logger.info('bind to port: %s', selected_port) with cuda.gpus[devnum]: while True: req = socket.recv() out = _handle_request(req) socket.send(out)
def _bind_to_random_ports(self, port_range: Tuple[int, int], num_connections: int) -> None: import zmq import zmq.error check.lt(num_connections, port_range[1] - port_range[0]) for _ in range(num_connections): socket = self.context.socket(zmq.REP) # type: ignore try: selected_port = socket.bind_to_random_port( addr="tcp://*", min_port=port_range[0], max_port=port_range[1] ) self.ports.append(selected_port) except zmq.error.ZMQBindError as e: raise det.errors.InternalException( f"Failed to bind to port range {port_range}." ) from e self.sockets.append(socket)
def _reply_heartbeat(self, target): """Worker will kill its jobs when it lost connection with the master. """ socket = self.ctx.socket(zmq.REP) socket.linger = 0 socket.setsockopt(zmq.RCVTIMEO, remote_constants.HEARTBEAT_RCVTIMEO_S * 1000) heartbeat_master_port =\ socket.bind_to_random_port("tcp://*") self.master_heartbeat_address = "{}:{}".format(self.worker_ip, heartbeat_master_port) logger.set_dir( os.path.expanduser('~/.parl_data/worker/{}'.format( self.master_heartbeat_address.replace(':', '_')))) self.heartbeat_socket_initialized.set() logger.info("[Worker] Connect to the master node successfully. " "({} CPUs)".format(self.cpu_num)) while self.master_is_alive and self.worker_is_alive: try: message = socket.recv_multipart() worker_status = self._get_worker_status() socket.send_multipart([ remote_constants.HEARTBEAT_TAG, to_byte(str(worker_status[0])), to_byte(str(worker_status[1])), to_byte(worker_status[2]), to_byte(str(worker_status[3])) ]) except zmq.error.Again as e: self.master_is_alive = False except zmq.error.ContextTerminated as e: break socket.close(0) logger.warning( "[Worker] lost connection with the master, will exit reply heartbeat for master." ) self.worker_status.clear() self.log_server_proc.kill() self.log_server_proc.wait() # exit the worker self.worker_is_alive = False self.exit()
def publishPoint(self, point): streams = ("%s.%s" % (self.prefix, point["pointname"]), "%s.%s.multipart" % (self.prefix, point["pointname"]), "%s.allpoints" % (self.prefix), "%s.allpoints.multipart" % (self.prefix)) streams = [str(i) for i in streams] for stream in streams: if stream not in self.streams: socket = zmq.Context().socket(zmq.PUB) socket.setsockopt(HIGHWATER, 15000) port = socket.bind_to_random_port("tcp://%s" % self.ip) self.streams[stream] = socket if "multipart" in stream: try: self.ns.publishService(stream, "tcp://%s:%s" % (self.ip, port), self.publishTimeout, "pub/sub", "ZmqMultipartPoint") except nnslib.NameServerException, e: if str( e ) == "ZmqMultipartPoint is an unknown data type": self.ns.addDataType( "ZmqMultipartPoint", "ZmqMultipart", "host,time,pointname,val,unit", "") else: raise else: try: self.ns.publishService(stream, "tcp://%s:%s" % (self.ip, port), self.publishTimeout, "pub/sub", "Point") except nnslib.NameServerException, e: if str(e) == "Point is an unknown data type": self.ns.addDataType( "Point", "JSON", "host,time,pointname,val,unit", "") else: raise
def _reply_heartbeat(self): """Reply heartbeat signals to the master node.""" socket = self.ctx.socket(zmq.REP) socket.linger = 0 socket.setsockopt(zmq.RCVTIMEO, remote_constants.HEARTBEAT_RCVTIMEO_S * 1000) reply_master_heartbeat_port =\ socket.bind_to_random_port(addr="tcp://*") self.reply_master_heartbeat_address = "{}:{}".format( get_ip_address(), reply_master_heartbeat_port) self.heartbeat_socket_initialized.set() connected = False while self.client_is_alive and self.master_is_alive: try: message = socket.recv_multipart() elapsed_time = datetime.timedelta(seconds=int(time.time() - self.start_time)) socket.send_multipart([ remote_constants.HEARTBEAT_TAG, to_byte(self.executable_path), to_byte(str(self.actor_num)), to_byte(str(elapsed_time)), to_byte(str(self.log_monitor_url)), ]) # TODO: remove additional information except zmq.error.Again as e: if connected: logger.warning("[Client] Cannot connect to the master." "Please check if it is still alive.") else: logger.warning( "[Client] Cannot connect to the master." "Please check the firewall between client and master.(e.g., ping the master IP)" ) self.master_is_alive = False socket.close(0) logger.warning("Client exit replying heartbeat for master.")