def init(r_process, backlog_size, timeout, verbose): HostConnection.relayer_process = r_process # 2 queues for 0MQ and tcp, 0MQ is (True, conn_str), TCP is (False, conn_str) HostConnection.hc_dict = {} # lut to map message_ids to host_connections HostConnection.message_lut = {} HostConnection.backlog_size = backlog_size HostConnection.timeout = timeout HostConnection.verbose = verbose HostConnection.g_log( "backlog size is {:d}, timeout is {:d}, verbose is {}".format( HostConnection.backlog_size, HostConnection.timeout, str(HostConnection.verbose), ) ) # router socket id_str = "relayer_rtr_{}".format(process_tools.get_machine_name()) new_sock = process_tools.get_socket( HostConnection.relayer_process.zmq_context, "ROUTER", identity=id_str, linger=0, sndhwm=HostConnection.backlog_size, rcvhwm=HostConnection.backlog_size, backlog=HostConnection.backlog_size, immediate=True, ) HostConnection.zmq_socket = new_sock HostConnection.relayer_process.register_poller(new_sock, zmq.POLLIN, HostConnection.get_result) # @UndefinedVariable
def init(cls, r_process, backlog_size, timeout, zmq_discovery): cls.relayer_process = r_process # 2 queues for 0MQ and tcp, 0MQ is (True, conn_str), TCP is (False, conn_str) cls.hc_dict = {} # lut to map message_ids to host_connections cls.message_lut = {} cls.backlog_size = backlog_size cls.timeout = timeout cls.g_log( "backlog size is {:d}, timeout is {:d}".format( cls.backlog_size, cls.timeout, ) ) # router socket id_str = "relayer_rtr_{}".format( process_tools.get_machine_name() ) new_sock = process_tools.get_socket( cls.relayer_process.zmq_context, "ROUTER", identity=id_str, linger=0, sndhwm=cls.backlog_size, rcvhwm=cls.backlog_size, backlog=cls.backlog_size, immediate=True, ) cls.zmq_socket = new_sock cls.relayer_process.register_poller(new_sock, zmq.POLLIN, cls.get_result) # ZMQDiscovery instance cls.zmq_discovery = zmq_discovery
def main(): my_p = _get_parser() opts = my_p.parse_args() _context = zmq.Context() _sender = process_tools.get_socket(_context, "DEALER", identity="spcc_{:d}".format(os.getpid())) conn_str = "tcp://{}:{:d}".format(opts.host, opts.port) _com = server_command.srv_command( command="passive_check_result", device=opts.device, check=opts.check, state=opts.state, output=opts.output, ) _sender.connect(conn_str) _sender.send_unicode(str(_com)) if _sender.poll(opts.timeout * 1000): recv_str = server_command.srv_command(source=_sender.recv()) _str, _ret = recv_str.get_log_tuple() print(_str) else: print( "error timeout in receive() from {} after {}".format( conn_str, logging_tools.get_plural("second", opts.timeout) ) ) _ret = 1 _sender.close() _context.term() sys.exit(_ret)
def _init_network_sockets(self): _log_base = "/var/lib/logging-server" _handle_names = [ os.path.join(_log_base, "py_{}".format(_type)) for _type in ["out", "err", "log"] ] self.__open_handles = [ io_stream_helper.zmq_socket_name(h_name) for h_name in _handle_names ] + [h_name for h_name in _handle_names] self._remove_handles() client = self.zmq_context.socket(zmq.PULL) # @UndefinedVariable for h_name in _handle_names: client.bind( io_stream_helper.zmq_socket_name(h_name, check_ipc_prefix=True)) os.chmod(io_stream_helper.zmq_socket_name(h_name), stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) self.network_bind( bind_port=global_config["COMMAND_PORT"], bind_to_localhost=True, pollin=self._recv_data, client_type=icswServiceEnum.logging_server, ) _fwd_string = self.CC.CS["log.forward.address"].strip() self.__only_forward = self.CC.CS["log.forward.exclusive"] if _fwd_string: _forward = process_tools.get_socket( "PUSH", identity=uuid_tools.get_uuid().get_urn()) self.log("connecting forward socket to {}".format(_fwd_string)) try: _forward.connect(_fwd_string) except: self.log( " ... problem: {}".format(process_tools.get_except_info()), logging_tools.LOG_LEVEL_ERROR) _forward = None else: _forward = None self.net_forwarder = _forward self.register_poller(client, zmq.POLLIN, self._recv_data) # @UndefinedVariable self.std_client = client
def _init_network_sockets(self): _handle_names = [ os.path.join(ICSW_LOG_BASE, _type.value) for _type in icswLogHandleTypes ] self.__open_handles = [ io_stream_helper.icswIOStream.zmq_socket_name(h_name) for h_name in _handle_names ] + [h_name for h_name in _handle_names] self._remove_handles() client = self.zmq_context.socket(zmq.PULL) for h_name in _handle_names: client.bind( io_stream_helper.icswIOStream.zmq_socket_name( h_name, check_ipc_prefix=True)) os.chmod(io_stream_helper.icswIOStream.zmq_socket_name(h_name), stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) self.network_bind( bind_to_localhost=True, pollin=self._recv_data, service_type_enum=icswServiceEnum.logging_server, client_type=icswServiceEnum.logging_server, ) _fwd_string = self.CC.CS["log.forward.address"].strip() self.__only_forward = self.CC.CS["log.forward.exclusive"] if _fwd_string: _forward = process_tools.get_socket( "PUSH", identity=uuid_tools.get_uuid().urn) self.log("connecting forward socket to {}".format(_fwd_string)) try: _forward.connect(_fwd_string) except: self.log( " ... problem: {}".format(process_tools.get_except_info()), logging_tools.LOG_LEVEL_ERROR) _forward = None else: _forward = None self.net_forwarder = _forward self.register_poller(client, zmq.POLLIN, self._recv_data) self.std_client = client
def _get_package_server_id_from_server(self): check_sock = process_tools.get_socket( self.zmq_context, "DEALER", identity="{}:ptest:".format(uuid_tools.get_uuid().urn), ) check_sock.connect(self.srv_conn_str) self.log("fetch srv_id socket, connected to {}".format( self.srv_conn_str)) check_sock.send_unicode( str(server_command.srv_command(command="get_0mq_id"))) _timeout = 10 my_poller = zmq.Poller() my_poller.register(check_sock, zmq.POLLIN) s_time = time.time() _last_log = time.time() while True: _list = my_poller.poll(2) if _list: _result = server_command.srv_command( source=check_sock.recv_unicode()) break cur_time = time.time() if cur_time > s_time + _timeout: self.log("timeout, exiting ...", logging_tools.LOG_LEVEL_ERROR) _result = None break else: if abs(cur_time - _last_log) > 0.5: _last_log = cur_time self.log( "timeout, still waiting ({:.2f} of {:.2f})".format( abs(cur_time - s_time), _timeout, ), logging_tools.LOG_LEVEL_WARN) my_poller.unregister(check_sock) del my_poller check_sock.close() del check_sock return _result
def _bind_external(self): self.socket_list = [] for bind_ip in sorted(self.zmq_id_dict.keys()): bind_0mq_id, is_virtual = self.zmq_id_dict[bind_ip] client = process_tools.get_socket( self.zmq_context, "ROUTER", linger=0, identity=bind_0mq_id, sndhwm=16, rcvhwm=16, ) _conn_str = "tcp://{}:{:d}".format( bind_ip, self.global_config["COMMAND_PORT"]) _retry = True _count = 0 while _retry: _count += 1 try: client.bind(_conn_str) except zmq.ZMQError: self.log( "error binding to {}{}: {}".format( "virtual " if is_virtual else "", _conn_str, process_tools.get_except_info()), logging_tools.LOG_LEVEL_CRITICAL) if not is_virtual: _count += 1 time.sleep(1) else: client.close() else: _retry = False self.register_poller(client, zmq.POLLIN, self._recv_command) self.socket_list.append(client) self.log("bind to {} sucessfull after {}".format( _conn_str, logging_tools.get_plural("iteration", _count))) # dict, id -> latest connection self.connection_info_dict = {}
def _send_to_rms_server(self, srv_com): if not self.__server_socket: self.__server_socket = process_tools.get_socket( self.module_info.main_proc.zmq_context, "DEALER", linger=10, identity="afm_{}_{:d}".format(process_tools.get_machine_name(), os.getpid()), immediate=False, ) _srv_address = "tcp://{}:{:d}".format( self._config_dict["SGE_SERVER"], self._config_dict["SGE_SERVER_PORT"], ) self.__server_socket.connect(_srv_address) self.log("connected to {}".format(_srv_address)) try: self.__server_socket.send_unicode(str(srv_com)) except: self.log( "error sending affinity info: {}".format( process_tools.get_except_info()), logging_tools.LOG_LEVEL_ERROR)
def add_connection(self, conn_str, command, **kwargs): if conn_str not in self.__mult_dict: self.__mult_dict[conn_str] = 0 else: self.__mult_dict[conn_str] += 1 # handle connection to same conn_str during one run id_str = "{}{}".format( self.identity, "{:d}".format(self.__mult_dict[conn_str]) if self.__mult_dict[conn_str] else "") self.num_connections += 1 new_sock = process_tools.get_socket( self.context, "DEALER", linger=self.__linger_time, identity=id_str, immediate=kwargs.get("immediate", False), ) if isinstance(command, server_command.srv_command): c_type = "sc" else: c_type = None try: new_sock.connect(conn_str) except: self.__dummy_fd -= 1 cur_fd = self.__dummy_fd self.__add_list.append((cur_fd, c_type)) _result = server_command.srv_command(source=unicode(command)) _result.set_result( "error connecting: {}".format(process_tools.get_except_info()), server_command.SRV_REPLY_STATE_CRITICAL) self.__results[cur_fd] = unicode(_result) else: # self.register_poller(new_sock, zmq.POLLOUT, self.__show) sock_fd = new_sock.getsockopt(zmq.FD) if self.__ext_poller: self.start_time = time.time() self.__poller_base.register_poller(new_sock, zmq.POLLIN, self.__receive) else: self.register_poller(new_sock, sock_fd, zmq.POLLIN, self.__receive) # self.register_poller(new_sock, sock_fd, zmq.POLLERR, self.__show) self.__add_list.append((sock_fd, c_type)) self.__socket_dict[sock_fd] = new_sock try: new_sock.send_unicode(unicode(command)) except: _result = server_command.srv_command(source=unicode(command)) _result.set_result( "error sending to {}: {}".format( conn_str, process_tools.get_except_info(), ), server_command.SRV_REPLY_STATE_CRITICAL) self.__results[sock_fd] = unicode(_result) new_sock.close() else: self.__results[sock_fd] = None self.__pending.add(sock_fd) if not kwargs.get("multi", False) and not self.__ext_poller: return self.loop()[0] else: return self.num_connections - 1
def read_config(self): # close sockets for _send_id, sock in self.__socket_dict.iteritems(): self.log("closing socket with id {}".format(_send_id)) sock.close() self.__socket_dict = {} _conf_name = "/etc/sysconfig/host-monitoring.d/machvector.xml" if config_store.ConfigStore.exists(MACHVECTOR_CS_NAME): self.cs = config_store.ConfigStore( MACHVECTOR_CS_NAME, log_com=self.log, prefix="mv", access_mode=config_store.AccessModeEnum.LOCAL, fix_access_mode=True, ) else: if os.path.isfile(_conf_name): # migrate old config try: xml_struct = etree.fromstring( file(_conf_name, "r").read()) # @UndefinedVariable except: self.log( "cannot read {}: {}".format( _conf_name, process_tools.get_except_info()), logging_tools.LOG_LEVEL_ERROR) xml_struct = None else: xml_struct = None _def_config = { "target": "localhost", "send_every": 30, "enabled": True, "immediate": False, "send_name": "", "full_info_every": 10, "port": 8002, "format": "xml", } if xml_struct is not None: # rewrite current config _cs = config_store.ConfigStore( MACHVECTOR_CS_NAME, log_com=self.log, prefix="mv", read=False, access_mode=config_store.AccessModeEnum.LOCAL, ) for mv_idx, mv_target in enumerate( xml_struct.xpath(".//mv_target", smart_strings=False)): _attr = mv_target.attrib self.log("migrating old machvector {:d} ({})".format( mv_idx, str(_attr))) _dict = { "target": _attr.get("target", "localhost"), "send_every": int(_attr.get("send_every", "30")), "enabled": True if _attr.get("enabled", "yes")[0].lower() in ["1", "t", "y"] else False, "immediate": True if _attr.get("immediate", "no")[0].lower() in ["1", "t", "y"] else False, "send_name": _attr.get("send_name", ""), "full_info_every": int(_attr.get("full_info_every", "10")), "port": int(_attr.get("port", "8002")), "format": _attr.get("format", "xml"), } for _key, _value in _def_config.iteritems(): if _key not in _dict: _dict[_key] = _value _cs["{:d}".format(mv_idx)] = _dict # flags for _flag in xml_struct.xpath(".//mv_flags/mv_flag"): _cs[_flag.attrib["name"]] = True if _flag.attrib.get( "enabled", "yes")[0].lower() in ["1", "t", "y" ] else False else: # create new dummy config _cs = config_store.ConfigStore(MACHVECTOR_CS_NAME, log_com=self.log, prefix="mv", read=False) _cs["0"] = _def_config self.cs = _cs self.cs.write() p_pool = self.module.main_proc for send_id in self.cs.keys(): _struct = self.cs[send_id] if isinstance(_struct, dict): if _struct["enabled"]: _struct["sent"] = 0 p_pool.register_timer( self._send_vector, _struct.get("send_every", 30), data=send_id, instant=_struct.get("immediate", False), ) # zmq sending, to collectd t_sock = process_tools.get_socket( p_pool.zmq_context, "PUSH", linger=0, sndhwm=16, backlog=4, # to stop 0MQ trashing the target socket reconnect_ivl=1000, reconnect_ivl_max=30000) target_str = "tcp://{}:{:d}".format( _struct.get("target", "127.0.0.1"), _struct.get("port", 8002), ) self.log( "creating zmq.PUSH socket for {}".format(target_str)) try: t_sock.connect(target_str) self.__socket_dict[send_id] = t_sock except: self.log( "error connecting to {}: {}".format( target_str, process_tools.get_except_info(), ), logging_tools.LOG_LEVEL_ERROR) self.cs[send_id] = _struct self.cs.write()
def _init_ipc_sockets(self): # init IP lookup table # if self.__force_resolve: # self.__ip_lut = {} # self.__forward_lut = {} self.__num_messages = 0 # nhm (not host monitoring) dictionary for timeout self.__nhm_dict = {} # raw_nhm (not host monitoring) dictionary for timeout, raw connections (no XML) self.__raw_nhm_dict = {} self.__nhm_connections = set() # also used in md-sync-server/server, ToDo: Refactor sock_list = [ { "proto": "ipc", "name": "receiver", "type": zmq.PULL, "hwm_size": 2, }, { "proto": "ipc", "name": "sender", "type": zmq.PUB, "hwm_size": 1024, }, ] [ setattr(self, "{}_socket".format(_sock["name"]), None) for _sock in sock_list ] for _sock in sock_list: sock_name = process_tools.get_zmq_ipc_name(_sock["name"], s_name="collrelay") file_name = sock_name[5:] self.log( "init {} ipc_socket '{}' (HWM: {:d})".format( _sock["name"], sock_name, _sock["hwm_size"], ) ) if os.path.exists(file_name): self.log("removing previous file") try: os.unlink(file_name) except: self.log( "... {}".format(process_tools.get_except_info()), logging_tools.LOG_LEVEL_ERROR ) wait_iter = 0 while os.path.exists(file_name) and wait_iter < 100: self.log("socket {} still exists, waiting".format(sock_name)) time.sleep(0.1) wait_iter += 1 cur_socket = self.zmq_context.socket(_sock["type"]) try: process_tools.bind_zmq_socket(cur_socket, sock_name) # client.bind("tcp://*:8888") except zmq.ZMQError: self.log( "error binding {}: {}" .format( _sock["name"], process_tools.get_except_info() ), logging_tools.LOG_LEVEL_CRITICAL ) raise else: setattr(self, "{}_socket".format(_sock["name"]), cur_socket) os.chmod(file_name, 0o777) cur_socket.setsockopt(zmq.LINGER, 0) cur_socket.setsockopt(zmq.SNDHWM, _sock["hwm_size"]) cur_socket.setsockopt(zmq.RCVHWM, _sock["hwm_size"]) if _sock["type"] == zmq.PULL: self.register_poller(cur_socket, zmq.POLLIN, self._recv_command_ipc) self.client_socket = process_tools.get_socket( self.zmq_context, "ROUTER", identity="ccollclient:{}".format(process_tools.get_machine_name()), linger=0, sndhwm=2, rcvhwm=2, immediate=True, ) self.register_poller(self.client_socket, zmq.POLLIN, self._recv_nhm_result)
def network_bind(self, **kwargs): _need_all_binds = kwargs.get("need_all_binds", False) pollin = kwargs.get("pollin", None) ext_call = kwargs.get("ext_call", False) immediate = kwargs.get("immediate", True) if "server_type" in kwargs: _inst = InstanceXML(log_com=self.log) _srv_type = kwargs["server_type"] bind_port = _inst.get_port_dict(_srv_type, ptype="command") elif "service_type_enum" in kwargs: _inst = InstanceXML(log_com=self.log) _srv_type = kwargs["service_type_enum"] bind_port = _inst.get_port_dict(_srv_type, ptype="command") elif "bind_port" in kwargs: bind_port = kwargs["bind_port"] else: raise KeyError("neither bind_port, service_type_enum nor server_type defined in kwargs") main_socket_name = kwargs.get("main_socket_name", "main_socket") virtual_sockets_name = kwargs.get("virtual_sockets_name", "virtual_sockets") bind_to_localhost = kwargs.get("bind_to_localhost", False) _sock_type = kwargs.get("socket_type", "ROUTER") if "client_type" in kwargs: uuid = uuid_tools.get_uuid().get_urn() if not uuid.startswith("urn"): uuid = "urn:uuid:{}".format(uuid) self.bind_id = "{}:{}:".format( uuid, InstanceXML(quiet=True).get_uuid_postfix(kwargs["client_type"]), ) dev_r = None else: from initat.tools import cluster_location from initat.cluster.backbone.routing import get_server_uuid self.bind_id = get_server_uuid(_srv_type) if kwargs.get("simple_server_bind", False): dev_r = None else: # device recognition dev_r = cluster_location.DeviceRecognition() # virtual sockets if hasattr(self, virtual_sockets_name): _virtual_sockets = getattr(self, virtual_sockets_name) else: _virtual_sockets = [] # main socket _main_socket = None # create bind list if dev_r and dev_r.device_dict: _bind_ips = set( list(dev_r.local_ips) + sum( [ _list for _dev, _list in dev_r.ip_r_lut.iteritems() ], [] ) ) # complex bind master_bind_list = [ ( True, [ "tcp://{}:{:d}".format(_local_ip, bind_port) for _local_ip in dev_r.local_ips ], self.bind_id, None, ) ] _virt_list = [] for _dev, _ip_list in dev_r.ip_r_lut.iteritems(): if _dev.pk != dev_r.device.pk: _virt_list.append( ( False, [ "tcp://{}:{:d}".format(_virtual_ip, bind_port) for _virtual_ip in _ip_list ], # ignore local device get_server_uuid(_srv_type, _dev.uuid), _dev, ) ) else: self.log( "ignoring virtual IP list ({}) (same device)".format( ", ".join(sorted(_ip_list)), ) ) master_bind_list.extend(_virt_list) # we have to bind to localhost but localhost is not present in bind_list, add master_bind if bind_to_localhost and not any([_ip.startswith("127.") for _ip in _bind_ips]): self.log( "bind_to_localhost is set but not IP in range 127.0.0.0/8 found in list, adding virtual_bind", logging_tools.LOG_LEVEL_WARN ) master_bind_list.append( ( False, [ "tcp://127.0.0.1:{:d}".format(bind_port) ], self.bind_id, None, ) ) else: # simple bind master_bind_list = [ ( True, [ "tcp://*:{:d}".format(bind_port) ], self.bind_id, None, ) ] _errors = [] # pprint.pprint(master_bind_list) bound_list = set() for master_bind, bind_list, bind_id, bind_dev in master_bind_list: client = process_tools.get_socket( self.zmq_context, _sock_type, identity=bind_id, immediate=immediate ) for _bind_str in bind_list: if _bind_str in bound_list: self.log( "bind_str '{}' (for {}) already used, skipping ...".format( _bind_str, " device '{}'".format(bind_dev) if bind_dev is not None else " master device", ), logging_tools.LOG_LEVEL_ERROR ) else: bound_list.add(_bind_str) try: client.bind(_bind_str) except zmq.ZMQError: self.log( "error binding to {}: {}".format( _bind_str, process_tools.get_except_info(), ), logging_tools.LOG_LEVEL_CRITICAL ) _errors.append(_bind_str) else: self.log("bound {} to {} with id {}".format(_sock_type, _bind_str, bind_id)) if pollin: self.register_poller(client, zmq.POLLIN, pollin, ext_call=ext_call, bind_id=bind_id) if master_bind: _main_socket = client else: _virtual_sockets.append(client) setattr(self, main_socket_name, _main_socket) setattr(self, virtual_sockets_name, _virtual_sockets) if _errors and _need_all_binds: raise ValueError("{} went wrong: {}".format(logging_tools.get_plural("bind", len(_errors)), ", ".join(_errors)))