def _check_uuid(self): self.log("uuid checking") self.log(" - cluster_device_uuid is '{}'".format(uuid_tools.get_uuid().get_urn())) my_dev = device.objects.get(Q(pk=global_config["SERVER_IDX"])) file_uuid = uuid_tools.get_uuid().get_urn().split(":")[2] if file_uuid != my_dev.uuid: self.log( "UUID differs from DB entry ({} [file] != {} [DB]), correcting DB entry".format( file_uuid, my_dev.uuid ), logging_tools.LOG_LEVEL_ERROR ) my_dev.uuid = file_uuid my_dev.save() # recognize for which devices i am responsible dev_r = cluster_location.DeviceRecognition() self.device_r = dev_r if dev_r.device_dict: self.log( " - i am also host for {}: {}".format( logging_tools.get_plural("virtual device", len(dev_r.device_dict.keys())), ", ".join( sorted( [ cur_dev.name for cur_dev in dev_r.device_dict.itervalues() ] ) ) ) ) for cur_dev in dev_r.device_dict.itervalues(): cluster_location.db_device_variable(cur_dev, "is_virtual", description="Flag set for Virtual Machines", value=1)
def main(opt_ns): cur_uuid = uuid_tools.get_uuid() new_uuid = uuid_tools.get_uuid(renew=True) _check_uuid = uuid_tools.get_uuid() if _check_uuid == cur_uuid: print(" *** error changing uuid from {} to {}".format( cur_uuid, new_uuid)) sys.exit(-1) else: print("changed uuid from {} to {}".format(cur_uuid, new_uuid)) restart_services() sys.exit(0)
def get_server_uuid(srv_type=None, uuid=None): if uuid is None: uuid = uuid_tools.get_uuid().get_urn() if not uuid.startswith("urn"): uuid = "urn:uuid:{}".format(uuid) if srv_type is not None: uuid = "{}:{}:".format( uuid, _INSTANCE.get_uuid_postfix(srv_type), ) return uuid
def _register_local_syncer(self): _inst_xml = InstanceXML(log_com=self.log) self.__local_syncer_uuid = "urn:uuid:{}:{}:".format( uuid_tools.get_uuid(), _inst_xml.get_uuid_postfix(icswServiceEnum.monitor_slave)) self.__local_syncer_addr = "tcp://127.0.0.1:{:d}".format( _inst_xml.get_port_dict(icswServiceEnum.monitor_slave, command=True)) self.log("connecting to local syncer {} (uuid={})".format( self.__local_syncer_addr, self.__local_syncer_uuid, )) self.main_socket.connect(self.__local_syncer_addr)
def send_to_server(self, target_server, target_port, srv_com): targ_str = "tcp://{}:{:d}".format(target_server, target_port) if targ_str not in self.__target_dict: send_socket = self.zmq_context.socket(zmq.DEALER) send_socket.setsockopt(zmq.LINGER, 0) send_socket.setsockopt_string(zmq.IDENTITY, "{}_csin".format(uuid_tools.get_uuid().urn)) send_socket.connect(targ_str) send_socket.setsockopt(zmq.SNDHWM, 16) send_socket.setsockopt(zmq.RCVHWM, 16) send_socket.setsockopt(zmq.RECONNECT_IVL_MAX, 500) send_socket.setsockopt(zmq.RECONNECT_IVL, 200) send_socket.setsockopt(zmq.TCP_KEEPALIVE, 1) send_socket.setsockopt(zmq.TCP_KEEPALIVE_IDLE, 300) self.log("init connection to {}".format(targ_str)) self.__target_dict[targ_str] = send_socket self.__target_dict[targ_str].send_unicode(str(srv_com))
def _init_network_sockets(self): _log_base = "/var/lib/logging-server" _handle_names = [ os.path.join(_log_base, "py_{}".format(_type)) for _type in ["out", "err", "log"] ] self.__open_handles = [ io_stream_helper.zmq_socket_name(h_name) for h_name in _handle_names ] + [h_name for h_name in _handle_names] self._remove_handles() client = self.zmq_context.socket(zmq.PULL) # @UndefinedVariable for h_name in _handle_names: client.bind( io_stream_helper.zmq_socket_name(h_name, check_ipc_prefix=True)) os.chmod(io_stream_helper.zmq_socket_name(h_name), stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) self.network_bind( bind_port=global_config["COMMAND_PORT"], bind_to_localhost=True, pollin=self._recv_data, client_type=icswServiceEnum.logging_server, ) _fwd_string = self.CC.CS["log.forward.address"].strip() self.__only_forward = self.CC.CS["log.forward.exclusive"] if _fwd_string: _forward = process_tools.get_socket( "PUSH", identity=uuid_tools.get_uuid().get_urn()) self.log("connecting forward socket to {}".format(_fwd_string)) try: _forward.connect(_fwd_string) except: self.log( " ... problem: {}".format(process_tools.get_except_info()), logging_tools.LOG_LEVEL_ERROR) _forward = None else: _forward = None self.net_forwarder = _forward self.register_poller(client, zmq.POLLIN, self._recv_data) # @UndefinedVariable self.std_client = client
def _init_network_sockets(self): _handle_names = [ os.path.join(ICSW_LOG_BASE, _type.value) for _type in icswLogHandleTypes ] self.__open_handles = [ io_stream_helper.icswIOStream.zmq_socket_name(h_name) for h_name in _handle_names ] + [h_name for h_name in _handle_names] self._remove_handles() client = self.zmq_context.socket(zmq.PULL) for h_name in _handle_names: client.bind( io_stream_helper.icswIOStream.zmq_socket_name( h_name, check_ipc_prefix=True)) os.chmod(io_stream_helper.icswIOStream.zmq_socket_name(h_name), stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) self.network_bind( bind_to_localhost=True, pollin=self._recv_data, service_type_enum=icswServiceEnum.logging_server, client_type=icswServiceEnum.logging_server, ) _fwd_string = self.CC.CS["log.forward.address"].strip() self.__only_forward = self.CC.CS["log.forward.exclusive"] if _fwd_string: _forward = process_tools.get_socket( "PUSH", identity=uuid_tools.get_uuid().urn) self.log("connecting forward socket to {}".format(_fwd_string)) try: _forward.connect(_fwd_string) except: self.log( " ... problem: {}".format(process_tools.get_except_info()), logging_tools.LOG_LEVEL_ERROR) _forward = None else: _forward = None self.net_forwarder = _forward self.register_poller(client, zmq.POLLIN, self._recv_data) self.std_client = client
def _get_package_server_id_from_server(self): check_sock = process_tools.get_socket( self.zmq_context, "DEALER", identity="{}:ptest:".format(uuid_tools.get_uuid().urn), ) check_sock.connect(self.srv_conn_str) self.log("fetch srv_id socket, connected to {}".format( self.srv_conn_str)) check_sock.send_unicode( str(server_command.srv_command(command="get_0mq_id"))) _timeout = 10 my_poller = zmq.Poller() my_poller.register(check_sock, zmq.POLLIN) s_time = time.time() _last_log = time.time() while True: _list = my_poller.poll(2) if _list: _result = server_command.srv_command( source=check_sock.recv_unicode()) break cur_time = time.time() if cur_time > s_time + _timeout: self.log("timeout, exiting ...", logging_tools.LOG_LEVEL_ERROR) _result = None break else: if abs(cur_time - _last_log) > 0.5: _last_log = cur_time self.log( "timeout, still waiting ({:.2f} of {:.2f})".format( abs(cur_time - s_time), _timeout, ), logging_tools.LOG_LEVEL_WARN) my_poller.unregister(check_sock) del my_poller check_sock.close() del check_sock return _result
def _recv_discovery(self, sock): result = server_command.srv_command(source=sock.recv_unicode()) discovery_id = result["discovery_id"].text t_0mq_id = result["zmq_id"].text conn_str = result["conn_str"].text bc_com = result["broadcast_command"].text self.log( "got 0MQ_id '{}' for discovery_id '{}'.format(connection string {}, bc_command {})".format( t_0mq_id, discovery_id, conn_str, bc_com ) ) self.__connection_dict[conn_str] = t_0mq_id self.log("closing discovery socket for {}".format(conn_str)) self.unregister_poller(self.__discovery_dict[discovery_id], zmq.POLLIN) # @UndefinedVariable self.__discovery_dict[discovery_id].close() del self.__discovery_dict[discovery_id] try: if self.__connection_dict[conn_str] != uuid_tools.get_uuid().get_urn(): self.main_socket.connect(conn_str) else: self.log( "no connection to self", logging_tools.LOG_LEVEL_WARN ) except: self.log( "error connecting to {}: {}".format( conn_str, process_tools.get_except_info() ), logging_tools.LOG_LEVEL_ERROR ) else: self.log("connected to {}".format(conn_str))
def _call(self, cur_inst): cur_inst.srv_com["uuid"] = uuid_tools.get_uuid().urn cur_inst.srv_com.set_result( "uuid is {}".format(uuid_tools.get_uuid().urn), )
def migrate_uuid(): from initat.tools import uuid_tools uuid_tools.get_uuid()
def __init__(self): _long_host_name, mach_name = process_tools.get_fqdn() threading_tools.icswProcessPool.__init__(self, "main") self.register_exception("int_error", self._int_error) self.register_exception("term_error", self._int_error) self.CC.init(icswServiceEnum.mother_server, global_config) self.CC.check_config() # close db connection (for daemonizing) db_tools.close_connection() self.debug = global_config["DEBUG"] self.srv_helper = service_tools.ServiceHelper(self.log) self.__hs_port = InstanceXML(quiet=True).get_port_dict( icswServiceEnum.hoststatus, command=True) self.__hm_port = InstanceXML(quiet=True).get_port_dict( icswServiceEnum.host_monitoring, command=True) # log config self.CC.read_config_from_db([ ("TFTP_LINK", configfile.StringConfigVar("/tftpboot")), ("TFTP_DIR", configfile.StringConfigVar( os.path.join(CLUSTER_DIR, "system", "tftpboot"))), ("CLUSTER_DIR", configfile.StringConfigVar(CLUSTER_DIR)), # in 10th of seconds ("NODE_BOOT_DELAY", configfile.IntegerConfigVar(50)), ("FANCY_PXE_INFO", configfile.BoolConfigVar(False)), ("SERVER_SHORT_NAME", configfile.StringConfigVar(mach_name)), ("WRITE_DHCP_CONFIG", configfile.BoolConfigVar(True)), ("DHCP_AUTHORITATIVE", configfile.BoolConfigVar(False)), ("DHCP_ONLY_BOOT_NETWORKS", configfile.BoolConfigVar(True)), ("MODIFY_NFS_CONFIG", configfile.BoolConfigVar(True)), ("NEED_ALL_NETWORK_BINDS", configfile.BoolConfigVar(True)), ]) global_config.add_config_entries([ ("CONFIG_DIR", configfile.StringConfigVar( os.path.join(global_config["TFTP_DIR"], "config"))), ("ETHERBOOT_DIR", configfile.StringConfigVar( os.path.join(global_config["TFTP_DIR"], "etherboot"))), ("KERNEL_DIR", configfile.StringConfigVar( os.path.join(global_config["TFTP_DIR"], "kernels"))), ("SHARE_DIR", configfile.StringConfigVar( os.path.join(global_config["CLUSTER_DIR"], "share", "mother"))), ("NODE_SOURCE_IDX", configfile.IntegerConfigVar(LogSource.new("node").pk)), ]) self.CC.log_config() self.CC.re_insert_config() # prepare directories self._prepare_directories() # check netboot functionality self._check_netboot_functionality() # check nfs exports self._check_nfs_exports() # modify syslog config self._enable_syslog_config() # dhcp config self.write_dhcp_config() # check status entries self._check_status_entries() self.register_func("contact_hoststatus", self._contact_hoststatus) self.register_func("contact_hostmonitor", self._contact_hostmonitor) my_uuid = uuid_tools.get_uuid() self.log("cluster_device_uuid is '{}'".format(my_uuid.urn)) if self._init_network_sockets(): self.add_process(initat.mother.kernel.KernelSyncProcess("kernel"), start=True) self.add_process( initat.mother.command.ExternalCommandProcess("command"), start=True) self.add_process( initat.mother.control.NodeControlProcess("control"), start=True) self.add_process(initat.mother.control.ICMPProcess("icmp"), start=True) db_tools.close_connection() conf_dict = { key: global_config[key] for key in ["LOG_NAME", "LOG_DESTINATION", "VERBOSE"] } self.add_process(SNMPProcess("snmp_process", conf_dict=conf_dict), start=True) # send initial commands self.send_to_process( "kernel", "srv_command", str( server_command.srv_command(command="check_kernel_dir", insert_all_found="1"))) # restart hoststatus self.send_to_process("command", "delay_command", "/etc/init.d/hoststatus restart", delay_time=5) self.send_to_process("control", "refresh", refresh=False) else: self._int_error("bind problem")
def _init_network_sockets(self): zmq_id_name = "/etc/sysconfig/host-monitoring.d/0mq_id" my_0mq_id = uuid_tools.get_uuid().get_urn() if not config_store.ConfigStore.exists(ZMQ_ID_MAP_STORE): create_0mq_cs = True if os.path.exists(zmq_id_name): try: zmq_id_dict = { cur_el.attrib["bind_address"]: (cur_el.text, True if "virtual" in cur_el.attrib else False) for cur_el in etree.fromstring(file(zmq_id_name, "r").read()).xpath( ".//zmq_id[@bind_address]", smart_strings=False) } except: self.log( "error reading from {}: {}".format( zmq_id_name, process_tools.get_except_info()), logging_tools.LOG_LEVEL_ERROR) zmq_id_dict = {} else: zmq_id_dict = {} if "*" not in zmq_id_dict: zmq_id_dict["*"] = (my_0mq_id, False) _cs = config_store.ConfigStore( ZMQ_ID_MAP_STORE, log_com=self.log, read=False, prefix="bind", access_mode=config_store.AccessModeEnum.LOCAL, fix_access_mode=True, ) for _idx, _key in enumerate(["*"] + sorted( [_key for _key in zmq_id_dict.keys() if _key not in ["*"]])): _cs["{:d}".format(_idx)] = { "address": _key, "uuid": zmq_id_dict[_key][0], "virtual": zmq_id_dict[_key][1] } else: # read from cs _cs = config_store.ConfigStore( ZMQ_ID_MAP_STORE, log_com=self.log, prefix="bind", access_mode=config_store.AccessModeEnum.LOCAL, fix_access_mode=True, ) create_0mq_cs = False if "0" not in _cs: _cs["0"] = { "address": "*", "virtual": False, "uuid": my_0mq_id, } if _cs["0"]["uuid"] != my_0mq_id: self.log( "0MQ id from cluster ({}) differs from host-monitoring 0MQ id ({})" .format( my_0mq_id, _cs["bind_0_uuid"], )) # code snippet to update value _cur = _cs["0"] _cur["uuid"] = my_0mq_id _cs["0"] = _cur create_0mq_cs = True if create_0mq_cs: _cs.write() # get all ipv4 interfaces with their ip addresses, dict: interfacename -> IPv4 zmq_id_dict = {} for _idx in _cs.keys(): _bind = _cs[_idx] zmq_id_dict[_bind["address"]] = ( _bind["uuid"], _bind["virtual"], ) ipv4_dict = { cur_if_name: [ip_tuple["addr"] for ip_tuple in value[2]][0] for cur_if_name, value in [(if_name, netifaces.ifaddresses(if_name)) for if_name in netifaces.interfaces()] if 2 in value } # ipv4_lut = dict([(value, key) for key, value in ipv4_dict.iteritems()]) ipv4_addresses = ipv4_dict.values() if zmq_id_dict.keys() == ["*"]: # wildcard bind pass else: if "*" in zmq_id_dict: wc_urn, wc_virtual = zmq_id_dict.pop("*") for target_ip in ipv4_addresses: if target_ip not in zmq_id_dict: zmq_id_dict[target_ip] = (wc_urn, wc_virtual) ref_id = "*" if "*" in zmq_id_dict else "127.0.0.1" self.zeromq_id = zmq_id_dict[ref_id][0].split(":")[-1] self.log("0MQ bind info (global 0MQ id is {})".format(self.zeromq_id)) for key in sorted(zmq_id_dict.iterkeys()): self.log("bind address {:<15s}: {}{}".format( key, zmq_id_dict[key][0], " is virtual" if zmq_id_dict[key][1] else "")) self.zmq_id_dict = zmq_id_dict self._bind_external() sock_list = [ ("ipc", "vector", zmq.PULL, 512, None, ""), # @UndefinedVariable ("ipc", "command", zmq.PULL, 512, self._recv_ext_command, ""), # @UndefinedVariable ("ipc", "result", zmq.ROUTER, 512, None, process_tools.zmq_identity_str("host_monitor") ) # @UndefinedVariable ] for _sock_proto, short_sock_name, sock_type, hwm_size, dst_func, zmq_id in sock_list: sock_name = process_tools.get_zmq_ipc_name(short_sock_name, s_name="collserver") file_name = sock_name[5:] self.log("init {} ipc_socket '{}' (HWM: {:d})".format( short_sock_name, sock_name, hwm_size)) if os.path.exists(file_name): self.log("removing previous file") try: os.unlink(file_name) except: self.log("... {}".format(process_tools.get_except_info()), logging_tools.LOG_LEVEL_ERROR) wait_iter = 0 while os.path.exists(file_name) and wait_iter < 100: self.log("socket {} still exists, waiting".format(sock_name)) time.sleep(0.1) wait_iter += 1 cur_socket = self.zmq_context.socket(sock_type) if zmq_id: cur_socket.setsockopt_string(zmq.IDENTITY, zmq_id) # @UndefinedVariable try: process_tools.bind_zmq_socket(cur_socket, sock_name) # client.bind("tcp://*:8888") except zmq.ZMQError: self.log( "error binding {}: {}".format( short_sock_name, process_tools.get_except_info()), logging_tools.LOG_LEVEL_CRITICAL) raise else: setattr(self, "{}_socket".format(short_sock_name), cur_socket) _backlog_size = self.CC.CS["hm.socket.backlog.size"] os.chmod(file_name, 0777) cur_socket.setsockopt(zmq.LINGER, 0) # @UndefinedVariable cur_socket.setsockopt(zmq.SNDHWM, hwm_size) # @UndefinedVariable cur_socket.setsockopt(zmq.RCVHWM, hwm_size) # @UndefinedVariable if dst_func: self.register_poller(cur_socket, zmq.POLLIN, dst_func) # @UndefinedVariable
def __call__(self, srv_com, cur_ns): srv_com["uuid"] = uuid_tools.get_uuid().urn
def network_bind(self, **kwargs): _need_all_binds = kwargs.get("need_all_binds", False) pollin = kwargs.get("pollin", None) ext_call = kwargs.get("ext_call", False) immediate = kwargs.get("immediate", True) if "server_type" in kwargs: _inst = InstanceXML(log_com=self.log) _srv_type = kwargs["server_type"] bind_port = _inst.get_port_dict(_srv_type, ptype="command") elif "service_type_enum" in kwargs: _inst = InstanceXML(log_com=self.log) _srv_type = kwargs["service_type_enum"] bind_port = _inst.get_port_dict(_srv_type, ptype="command") elif "bind_port" in kwargs: bind_port = kwargs["bind_port"] else: raise KeyError("neither bind_port, service_type_enum nor server_type defined in kwargs") main_socket_name = kwargs.get("main_socket_name", "main_socket") virtual_sockets_name = kwargs.get("virtual_sockets_name", "virtual_sockets") bind_to_localhost = kwargs.get("bind_to_localhost", False) _sock_type = kwargs.get("socket_type", "ROUTER") if "client_type" in kwargs: uuid = uuid_tools.get_uuid().get_urn() if not uuid.startswith("urn"): uuid = "urn:uuid:{}".format(uuid) self.bind_id = "{}:{}:".format( uuid, InstanceXML(quiet=True).get_uuid_postfix(kwargs["client_type"]), ) dev_r = None else: from initat.tools import cluster_location from initat.cluster.backbone.routing import get_server_uuid self.bind_id = get_server_uuid(_srv_type) if kwargs.get("simple_server_bind", False): dev_r = None else: # device recognition dev_r = cluster_location.DeviceRecognition() # virtual sockets if hasattr(self, virtual_sockets_name): _virtual_sockets = getattr(self, virtual_sockets_name) else: _virtual_sockets = [] # main socket _main_socket = None # create bind list if dev_r and dev_r.device_dict: _bind_ips = set( list(dev_r.local_ips) + sum( [ _list for _dev, _list in dev_r.ip_r_lut.iteritems() ], [] ) ) # complex bind master_bind_list = [ ( True, [ "tcp://{}:{:d}".format(_local_ip, bind_port) for _local_ip in dev_r.local_ips ], self.bind_id, None, ) ] _virt_list = [] for _dev, _ip_list in dev_r.ip_r_lut.iteritems(): if _dev.pk != dev_r.device.pk: _virt_list.append( ( False, [ "tcp://{}:{:d}".format(_virtual_ip, bind_port) for _virtual_ip in _ip_list ], # ignore local device get_server_uuid(_srv_type, _dev.uuid), _dev, ) ) else: self.log( "ignoring virtual IP list ({}) (same device)".format( ", ".join(sorted(_ip_list)), ) ) master_bind_list.extend(_virt_list) # we have to bind to localhost but localhost is not present in bind_list, add master_bind if bind_to_localhost and not any([_ip.startswith("127.") for _ip in _bind_ips]): self.log( "bind_to_localhost is set but not IP in range 127.0.0.0/8 found in list, adding virtual_bind", logging_tools.LOG_LEVEL_WARN ) master_bind_list.append( ( False, [ "tcp://127.0.0.1:{:d}".format(bind_port) ], self.bind_id, None, ) ) else: # simple bind master_bind_list = [ ( True, [ "tcp://*:{:d}".format(bind_port) ], self.bind_id, None, ) ] _errors = [] # pprint.pprint(master_bind_list) bound_list = set() for master_bind, bind_list, bind_id, bind_dev in master_bind_list: client = process_tools.get_socket( self.zmq_context, _sock_type, identity=bind_id, immediate=immediate ) for _bind_str in bind_list: if _bind_str in bound_list: self.log( "bind_str '{}' (for {}) already used, skipping ...".format( _bind_str, " device '{}'".format(bind_dev) if bind_dev is not None else " master device", ), logging_tools.LOG_LEVEL_ERROR ) else: bound_list.add(_bind_str) try: client.bind(_bind_str) except zmq.ZMQError: self.log( "error binding to {}: {}".format( _bind_str, process_tools.get_except_info(), ), logging_tools.LOG_LEVEL_CRITICAL ) _errors.append(_bind_str) else: self.log("bound {} to {} with id {}".format(_sock_type, _bind_str, bind_id)) if pollin: self.register_poller(client, zmq.POLLIN, pollin, ext_call=ext_call, bind_id=bind_id) if master_bind: _main_socket = client else: _virtual_sockets.append(client) setattr(self, main_socket_name, _main_socket) setattr(self, virtual_sockets_name, _virtual_sockets) if _errors and _need_all_binds: raise ValueError("{} went wrong: {}".format(logging_tools.get_plural("bind", len(_errors)), ", ".join(_errors)))