def cobbler_install(config_path=None): """Install and configure Cobbler in container. This function must be called within the container 'pup-venv' python virtual environment. Cobbler will be installed within this environment. """ cfg = Config(config_path) log = logger.getlogger() # Check to see if cobbler is already installed try: util.bash_cmd('cobbler check') log.info("Cobbler is already installed") return except util.CalledProcessError as error: if error.returncode == 127: log.debug("'cobbler' command not found, continuing with " "installation") else: log.warning("Cobbler is installed but not working:") log.warning(error.output) print("\nPress enter to remove Cobbler and attempt to ") print("re-install, or 'T' to terminate.") resp = input("\nEnter or 'T': ") log.debug("User response = \'{}\'".format(resp)) if resp == 'T': sys.exit('POWER-Up stopped at user request') # Clone cobbler github repo cobbler_url = URL cobbler_branch = BRANCH install_dir = gen.get_cobbler_install_dir() if os.path.exists(install_dir): log.info( "Removing Cobbler source directory \'{}\'".format(install_dir)) util.bash_cmd('rm -rf %s' % install_dir) log.info("Cloning Cobbler branch \'%s\' from \'%s\'" % (cobbler_branch, cobbler_url)) repo = Repo.clone_from(cobbler_url, install_dir, branch=cobbler_branch, single_branch=True) log.info("Cobbler branch \'%s\' cloned into \'%s\'" % (repo.active_branch, repo.working_dir)) # Modify Cobbler scrpit that write DHCP reservations so that the # lease time is included. dhcp_lease_time = cfg.get_globals_dhcp_lease_time() util.replace_regex( MANAGE_DNSMASQ, r'systxt \= systxt \+ \"\\\\n\"', "systxt = systxt + \",{}\\\\n\"".format(dhcp_lease_time)) # Use non-secure http to download network boot-loaders util.replace_regex(COBBLER_DLCONTENT, 'https://cobbler.github.io', 'http://cobbler.github.io') # Use non-secure http to download signatures util.replace_regex(COBBLER_SETTINGS_PY, 'https://cobbler.github.io', 'http://cobbler.github.io') # Run cobbler make install util.bash_cmd('cd %s; make install' % install_dir) # Backup original files util.backup_file(DNSMASQ_TEMPLATE) util.backup_file(MODULES_CONF) util.backup_file(COBBLER_WEB_SETTINGS) util.backup_file(COBBLER_CONF_ORIG) util.backup_file(COBBLER_WEB_CONF_ORIG) util.backup_file(COBBLER_SETTINGS) util.backup_file(PXEDEFAULT_TEMPLATE) util.backup_file(KICKSTART_DONE) util.backup_file(NTP_CONF) util.backup_file(APACHE2_CONF) # Create tftp root directory if not os.path.exists(TFTPBOOT): mode = 0o755 os.mkdir(TFTPBOOT, mode) # Set IP address range to use for unrecognized DHCP clients dhcp_range = 'dhcp-range=%s,%s,%s # %s' util.remove_line(DNSMASQ_TEMPLATE, 'dhcp-range') dhcp_pool_start = gen.get_dhcp_pool_start() for index, netw_type in enumerate(cfg.yield_depl_netw_client_type()): depl_netw_client_ip = cfg.get_depl_netw_client_cont_ip(index) depl_netw_client_netmask = cfg.get_depl_netw_client_netmask(index) network = IPNetwork(depl_netw_client_ip + '/' + depl_netw_client_netmask) entry = dhcp_range % (str(network.network + dhcp_pool_start), str(network.network + network.size - 1), str(dhcp_lease_time), str(network.cidr)) util.append_line(DNSMASQ_TEMPLATE, entry) # Save PXE client network information for later if netw_type == 'pxe': cont_pxe_ipaddr = depl_netw_client_ip cont_pxe_netmask = depl_netw_client_netmask bridge_pxe_ipaddr = cfg.get_depl_netw_client_brg_ip(index) # Configure dnsmasq to enable TFTP server util.append_line(DNSMASQ_TEMPLATE, 'enable-tftp') util.append_line(DNSMASQ_TEMPLATE, 'tftp-root=%s' % TFTPBOOT) util.append_line(DNSMASQ_TEMPLATE, 'user=root') # Configure dnsmasq to use deployer as gateway if cfg.get_depl_gateway(): util.remove_line(DNSMASQ_TEMPLATE, 'dhcp-option') util.append_line(DNSMASQ_TEMPLATE, 'dhcp-option=3,%s' % bridge_pxe_ipaddr) # Cobbler modules configuration util.replace_regex(MODULES_CONF, 'module = manage_bind', 'module = manage_dnsmasq') util.replace_regex(MODULES_CONF, 'module = manage_isc', 'module = manage_dnsmasq') # Copy cobbler.conf into apache2/conf-available copy2(COBBLER_CONF_ORIG, COBBLER_CONF) # Copy cobbler_web.conf into apache2/conf-available copy2(COBBLER_WEB_CONF_ORIG, COBBLER_WEB_CONF) # Apache2 configuration util.bash_cmd('%s cobbler cobbler_web' % A2ENCONF) util.bash_cmd('%s proxy' % A2ENMOD) util.bash_cmd('%s proxy_http' % A2ENMOD) # Set secret key in web settings secret_key = _generate_random_characters() util.replace_regex(COBBLER_WEB_SETTINGS, '^SECRET_KEY = .*', 'SECRET_KEY = "%s"' % secret_key) # Remove "Order allow,deny" lines from cobbler configuration regex = '.*Order allow,deny' util.remove_line(COBBLER_CONF, regex) util.remove_line(COBBLER_WEB_CONF, regex) # Replace "Allow from all" with "Require all granted" in regex = 'Allow from all' replace = 'Require all granted' util.replace_regex(COBBLER_CONF, regex, replace) util.replace_regex(COBBLER_WEB_CONF, regex, replace) # chown www-data WEBUI_SESSIONS uid = pwd.getpwnam("www-data").pw_uid gid = -1 # unchanged os.chown(WEBUI_SESSIONS, uid, gid) # Cobbler settings util.replace_regex(COBBLER_SETTINGS, '127.0.0.1', cont_pxe_ipaddr) util.replace_regex(COBBLER_SETTINGS, 'manage_dhcp: 0', 'manage_dhcp: 1') util.replace_regex(COBBLER_SETTINGS, 'manage_dns: 0', 'manage_dns: 1') util.replace_regex(COBBLER_SETTINGS, 'pxe_just_once: 0', 'pxe_just_once: 1') globals_env_variables = cfg.get_globals_env_variables() if globals_env_variables and 'http_proxy' in globals_env_variables: util.replace_regex( COBBLER_SETTINGS, 'proxy_url_ext: ""', 'proxy_url_ext: %s' % globals_env_variables['http_proxy']) util.replace_regex( COBBLER_SETTINGS, 'default_password_crypted:', 'default_password_crypted: ' '$1$clusterp$/gd3ep3.36A2808GGdHUz.') # Create link to if not os.path.exists(PY_DIST_PKGS): util.bash_cmd('ln -s %s/cobbler %s' % (LOCAL_PY_DIST_PKGS, PY_DIST_PKGS)) # Set PXE timeout to maximum util.replace_regex(PXEDEFAULT_TEMPLATE, r'TIMEOUT \d+', 'TIMEOUT 35996') util.replace_regex(PXEDEFAULT_TEMPLATE, r'TOTALTIMEOUT \d+', 'TOTALTIMEOUT 35996') # Fix line break escape in kickstart_done snippet util.replace_regex(KICKSTART_DONE, "\\\\nwget", "wget") util.replace_regex(KICKSTART_DONE, r"\$saveks", "$saveks + \"; \\\\\\\"\n") util.replace_regex(KICKSTART_DONE, r"\$runpost", "$runpost + \"; \\\\\\\"\n") # Copy authorized_keys ssh key file to web repo directory copy2(ROOT_AUTH_KEYS, WWW_AUTH_KEYS) os.chmod(WWW_AUTH_KEYS, 0o444) # Add mgmt subnet to NTP service configuration cont_pxe_broadcast = str( IPNetwork(cont_pxe_ipaddr + '/' + cont_pxe_netmask).broadcast) util.append_line(NTP_CONF, 'broadcast %s' % cont_pxe_broadcast) # Add 'required-stop' line to cobblerd init.d to avoid warning util.replace_regex(INITD + 'cobblerd', '### END INIT INFO', '# Required-Stop:\n### END INIT INFO') # Set Apache2 'ServerName' util.append_line(APACHE2_CONF, "ServerName localhost") # Restart services _restart_service('ntp') _restart_service('cobblerd') _restart_service('apache2') # Update Cobbler boot-loader files util.bash_cmd('%s get-loaders' % COBBLER) # Update cobbler list of OS signatures util.bash_cmd('%s signature update' % COBBLER) # Run Cobbler sync util.bash_cmd('%s sync' % COBBLER) # Restart services (again) _restart_service('apache2') _restart_service('cobblerd') _restart_service('dnsmasq') # Set services to start on boot _service_start_on_boot('cobblerd') _service_start_on_boot('ntp')
def validate_pxe(self, bootdev='default', persist=True): # if self.inv.check_all_nodes_pxe_macs(): # self.log.info("Inventory exists with PXE MACs populated.") # if not self.ran_ipmi: # return # print("\nPress Enter to continue cluster deployment without " # "running PXE hardware validation.") # print("Type 'C' to validate cluster nodes defined in current " # "'config.yml'") # resp = input("Type 'T' to terminate Power-Up ") # if resp == 'T': # resp = input("Type 'y' to confirm ") # if resp == 'y': # self.log.info("'{}' entered. Terminating Power-Up at user " # "request".format(resp)) # sys.exit(1) # elif resp == 'C': # self.log.info("'{}' entered. Continuing with hardware " # "validation".format(resp)) # else: # print() # return # if not self.ran_ipmi: # return if not self.node_table_ipmi: raise UserCriticalException('No BMCs discovered') self.log.debug("Checking PXE networks and client PXE" " ports ________\n") self.log.debug('Boot device: {}'.format(bootdev)) ipmi_cnt, pxe_cnt = self._get_port_cnts() pxe_addr, bridge_addr, pxe_prefix, pxe_vlan = self._get_network('pxe') pxe_network = pxe_addr + '/' + str(pxe_prefix) addr = IPNetwork(bridge_addr + '/' + str(pxe_prefix)) netmask = str(addr.netmask) addr.value += NAME_SPACE_OFFSET_ADDR addr = str(addr) foundall = False dhcp_st = get_dhcp_pool_start() pxe_ns = NetNameSpace('pxe-ns-', 'br-pxe-' + str(pxe_vlan), addr) # setup DHCP. save start and end addr raw numeric values self.log.debug('Installing DHCP server in network namespace') addr_st = self._add_offset_to_address(pxe_network, dhcp_st) addr_end = self._add_offset_to_address(pxe_network, dhcp_st + pxe_cnt + 2) dns_list, stderr, rc = sub_proc_exec('pgrep dnsmasq') dns_list = dns_list.splitlines() if os.path.exists(self.dhcp_pxe_leases_file): os.remove(self.dhcp_pxe_leases_file) # delete any remnant dnsmasq processes for pid in dns_list: ns_name, stderr, rc = sub_proc_exec( 'ip netns identify {}'.format(pid)) if pxe_ns._get_name_sp_name() in ns_name: self.log.debug('Killing dnsmasq. pid {}'.format(pid)) stdout, stderr, rc = sub_proc_exec('kill -15 ' + pid) cmd = (f'dnsmasq --dhcp-leasefile={self.dhcp_pxe_leases_file} ' f'--interface={pxe_ns._get_name_sp_ifc_name()} ' f'--dhcp-range={addr_st},{addr_end},{netmask},3600') stdout, stderr, rc = pxe_ns._exec_cmd(cmd) if rc != 0: self.log.warning(f'Error configuring dnsmasq. rc: {rc}') if os.path.exists(self.tcp_dump_file): os.remove(self.tcp_dump_file) tcpdump_list, stderr, rc = sub_proc_exec('pgrep tcpdump') tcpdump_list = tcpdump_list.splitlines() # delete any remnant tcpdump processes for pid in tcpdump_list: ns_name, stderr, rc = sub_proc_exec('ip netns identify ' + pid) if pxe_ns._get_name_sp_name() in ns_name: self.log.debug('Killing tcpdump. pid {}'.format(pid)) stdout, stderr, rc = sub_proc_exec('kill -15 ' + pid) cmd = (f'sudo tcpdump -X -U -i {pxe_ns._get_name_sp_ifc_name()} ' f'-w {self.tcp_dump_file} --immediate-mode port 67') proc = pxe_ns._launch_cmd(cmd) if not isinstance(proc, object): self.log.error( f'Failure to launch process of tcpdump monitor {proc}') # Scan up to 25 times. Delay 10 seconds between scans # Allow infinite number of retries self.log.info('Scanning pxe network on 10 s intervals.') cnt = 0 cnt_prev = 0 cnt_down = 25 mac_list = [] dump = '' while cnt < pxe_cnt: print() cmd = 'sudo tcpdump -r {} -xx'.format(self.tcp_dump_file) for i in range(cnt_down): print('\r{} of {} nodes requesting PXE boot. Scan cnt: {} '. format(cnt, pxe_cnt, cnt_down - i), end="") sys.stdout.flush() time.sleep(10) # read the tcpdump file if size is not 0 if os.path.exists(self.tcp_dump_file) and os.path.getsize( self.tcp_dump_file): dump, stderr, rc = sub_proc_exec(cmd) if rc != 0: self.log.warning( f'Error reading tcpdump file. rc: {rc}') if 'reading' not in stderr: self.log.warning( f'Failure reading tcpdump file - {stderr}') mac_list = self._get_macs(mac_list, dump) cnt = len(mac_list) if cnt > cnt_prev: cnt_prev = cnt # Pause briefly for in flight DHCP to complete and lease file to update time.sleep(5) self._build_port_table_pxe(mac_list) if cnt >= pxe_cnt: foundall = True print( '\r{} of {} nodes requesting PXE boot. Scan count: {} ' .format(cnt, pxe_cnt, cnt_down - i), end="") break self.log.debug('Table of found PXE ports: {}'.format( self.node_table_pxe)) for switch in self.node_table_pxe: print('\n\nSwitch: {}'.format(switch)) print( tabulate(self.node_table_pxe[switch], headers=('port', 'MAC address', 'IP address'))) print() if cnt >= pxe_cnt: break print('\n\nPress Enter to continue scanning for cluster nodes.') print( "Or enter 'C' to continue cluster deployment with a subset of nodes" ) print("Or enter 'R' to cycle power to missing nodes") resp = input("Or enter 'T' to terminate Power-Up ") if resp == 'T': resp = input("Enter 'y' to confirm ") if resp == 'y': self.log.info("'{}' entered. Terminating Power-Up at user" " request".format(resp)) self._teardown_ns(self.ipmi_ns) self._teardown_ns(pxe_ns) sys.exit(1) elif resp == 'R': self._reset_unfound_nodes() elif resp == 'C': print('\nNot all nodes have been discovered') resp = input("Enter 'y' to confirm continuation of" " deployment without all nodes ") if resp == 'y': self.log.info( "'{}' entered. Continuing Power-Up".format(resp)) break if cnt < pxe_cnt: self.log.warning('Failed to validate expected number of nodes') self._teardown_ns(pxe_ns) # Cycle power on all discovered nodes if bootdev set to 'network' if bootdev == 'network': self.log.debug('\nCycling power to discovered nodes.\n') set_power_clients('off', clients=self.bmc_ai) set_power_clients('on', clients=self.bmc_ai) set_bootdev_clients('network', clients=self.bmc_ai) self._teardown_ns(self.ipmi_ns) # Reset BMCs to insure they acquire a new address from container # during inv_add_ports. Avoids conflicting addresses during redeploy self._reset_existing_bmcs(self.node_list, self._get_cred_list()) self.log.info('Cluster nodes validation complete') if not foundall: raise UserException('Not all node PXE ports validated')
def inv_set_ipmi_pxe_ip(config_path): """Configure DHCP IP reservations for IPMI and PXE interfaces IP addresses are assigned sequentially within the appropriate client networks starting with the DHCP pool start offset defined in 'lib.genesis'. Raises: UserException: - No IPMI or PXE client networks defined within the 'config.yml' - Unable to connect to BMC at new IPMI IP address """ log = logger.getlogger() cfg = Config(config_path) inv = Inventory(cfg_file=config_path) ipmiNetwork = None pxeNetwork = None nodes_list = [] # All nodes should be powered off before starting ipmi_set_power('off', config_path, wait=POWER_WAIT) # Create IPManager object for IPMI and/or PXE networks start_offset = gen.get_dhcp_pool_start() for index, netw_type in enumerate(cfg.yield_depl_netw_client_type()): ip = cfg.get_depl_netw_client_cont_ip(index) netmask = cfg.get_depl_netw_client_netmask(index) if netw_type == 'ipmi': ipmiNetwork = IPManager(IPNetwork(ip + '/' + netmask), start_offset) elif netw_type == 'pxe': pxeNetwork = IPManager(IPNetwork(ip + '/' + netmask), start_offset) # If only one network is defined use the same IPManager for both if ipmiNetwork is None and pxeNetwork is not None: ipmiNetwork = pxeNetwork elif ipmiNetwork is not None and pxeNetwork is None: pxeNetwork = ipmiNetwork elif ipmiNetwork is None and pxeNetwork is None: raise UserException('No IPMI or PXE client network found') # Modify IP addresses for each node dhcp_lease_time = cfg.get_globals_dhcp_lease_time() for index, hostname in enumerate(inv.yield_nodes_hostname()): # IPMI reservations are written directly to the dnsmasq template ipmi_ipaddr = inv.get_nodes_ipmi_ipaddr(0, index) ipmi_mac = inv.get_nodes_ipmi_mac(0, index) ipmi_new_ipaddr = ipmiNetwork.get_next_ip() util.remove_line(DNSMASQ_TEMPLATE, "^dhcp-host=" + ipmi_mac + ".*") util.append_line( DNSMASQ_TEMPLATE, 'dhcp-host=%s,%s-bmc,%s,%s\n' % (ipmi_mac, hostname, ipmi_new_ipaddr, dhcp_lease_time)) _adjust_dhcp_pool(ipmiNetwork.network, ipmiNetwork.get_next_ip(reserve=False), dhcp_lease_time) # PXE reservations are handled by Cobbler pxe_ipaddr = inv.get_nodes_pxe_ipaddr(0, index) pxe_mac = inv.get_nodes_pxe_mac(0, index) pxe_new_ipaddr = pxeNetwork.get_next_ip() log.info('Modifying Inventory PXE IP - Node: %s MAC: %s ' 'Original IP: %s New IP: %s' % (hostname, pxe_mac, pxe_ipaddr, pxe_new_ipaddr)) inv.set_nodes_pxe_ipaddr(0, index, pxe_new_ipaddr) _adjust_dhcp_pool(pxeNetwork.network, pxeNetwork.get_next_ip(reserve=False), dhcp_lease_time) # Run Cobbler sync to process DNSMASQ template cobbler_server = xmlrpclib.Server("http://127.0.0.1/cobbler_api") token = cobbler_server.login(COBBLER_USER, COBBLER_PASS) cobbler_server.sync(token) log.debug("Running Cobbler sync") # Save info to verify connection come back up ipmi_userid = inv.get_nodes_ipmi_userid(index) ipmi_password = inv.get_nodes_ipmi_password(index) # No need to reset and check if the IP does not change if ipmi_new_ipaddr != ipmi_ipaddr: nodes_list.append({ 'hostname': hostname, 'index': index, 'ipmi_userid': ipmi_userid, 'ipmi_password': ipmi_password, 'ipmi_new_ipaddr': ipmi_new_ipaddr, 'ipmi_ipaddr': ipmi_ipaddr, 'ipmi_mac': ipmi_mac }) # Issue MC cold reset to force refresh of IPMI interfaces for node in nodes_list: ipmi_userid = node['ipmi_userid'] ipmi_password = node['ipmi_password'] ipmi_ipaddr = node['ipmi_ipaddr'] ipmi_cmd = ipmi_command.Command(bmc=ipmi_ipaddr, userid=ipmi_userid, password=ipmi_password) ipmi_cmd.reset_bmc() del ipmi_cmd log.debug('BMC Cold Reset Issued - Node: %s - IP: %s' % (hostname, ipmi_ipaddr)) # Check connections for set amount of time end_time = time() + WAIT_TIME while time() < end_time and len(nodes_list) > 0: success_list = [] for list_index, node in enumerate(nodes_list): hostname = node['hostname'] index = node['index'] ipmi_userid = node['ipmi_userid'] ipmi_password = node['ipmi_password'] ipmi_new_ipaddr = node['ipmi_new_ipaddr'] ipmi_ipaddr = node['ipmi_ipaddr'] ipmi_mac = node['ipmi_mac'] # Attempt to connect to new IPMI IP address try: ipmi_cmd = ipmi_command.Command(bmc=ipmi_new_ipaddr, userid=ipmi_userid, password=ipmi_password) status = ipmi_cmd.get_power() except pyghmi_exception.IpmiException as error: log.debug('BMC connection failed - Node: %s IP: %s, %s ' '(Retrying for %s seconds)' % (hostname, ipmi_new_ipaddr, str(error), WAIT_TIME)) continue # If connection sucessful modify inventory if status.get('powerstate') in ['on', 'off']: log.debug('BMC connection success - Node: %s IP: %s' % (hostname, ipmi_new_ipaddr)) log.info('Modifying Inventory IPMI IP - Node: %s MAC: %s ' 'Original IP: %s New IP: %s' % (hostname, ipmi_mac, ipmi_ipaddr, ipmi_new_ipaddr)) inv.set_nodes_ipmi_ipaddr(0, index, ipmi_new_ipaddr) success_list.append(list_index) # Remove nodes that connected successfully for remove_index in sorted(success_list, reverse=True): del nodes_list[remove_index] for node in nodes_list: log.error('Unable to connect to BMC at new IPMI IP address- Node: %s ' 'MAC: %s Original IP: %s New IP: %s' % (hostname, ipmi_mac, ipmi_ipaddr, ipmi_new_ipaddr)) if len(nodes_list) > 0: raise UserException('%d BMC(s) not responding after IP modification' % len(nodes_list))
def validate_ipmi(self): self.log.info("Discover and validate cluster nodes") # if self.inv.check_all_nodes_ipmi_macs() and self.inv.check_all_nodes_pxe_macs(): # self.log.info("Inventory exists with IPMI and PXE MACs populated.") # print("\nPress Enter to continue cluster deployment without " # "running IPMI hardware validation.") # print("Type 'C' to validate cluster nodes defined in current " # "'config.yml'") # resp = input("Type 'T' to terminate Power-Up ") # if resp == 'T': # resp = input("Type 'y' to confirm ") # if resp == 'y': # self.log.info("'{}' entered. Terminating Power-Up at user " # "request".format(resp)) # sys.exit(1) # elif resp == 'C': # self.log.info("'{}' entered. Continuing with hardware " # "validation".format(resp)) # else: # print() # return ipmi_cnt, pxe_cnt = self._get_port_cnts() ipmi_addr, bridge_addr, ipmi_prefix, ipmi_vlan = self._get_network( 'ipmi') ipmi_network = ipmi_addr + '/' + str(ipmi_prefix) addr = IPNetwork(bridge_addr + '/' + str(ipmi_prefix)) netmask = str(addr.netmask) ipmi_size = addr.size addr.value += NAME_SPACE_OFFSET_ADDR addr = str(addr) cred_list = self._get_cred_list() rc = False dhcp_st = get_dhcp_pool_start() self.ipmi_ns = NetNameSpace('ipmi-ns-', 'br-ipmi-' + str(ipmi_vlan), addr) # setup DHCP, unless already running in namesapce # save start and end addr raw numeric values self.log.debug('Installing DHCP server in network namespace') addr_st = self._add_offset_to_address(ipmi_network, dhcp_st) addr_end = self._add_offset_to_address(ipmi_network, ipmi_size - 2) dhcp_end = self._add_offset_to_address(ipmi_network, dhcp_st + ipmi_cnt + 2) # scan ipmi network for nodes with pre-existing ip addresses cmd = 'fping -r0 -a -g {} {}'.format(addr_st, addr_end) node_list, stderr, rc = sub_proc_exec(cmd) if rc not in (0, 1): self.log.warning(f'Error scanning IPMI network. rc: {rc}') self.log.debug('Pre-existing node list: \n{}'.format(node_list)) node_list = node_list.splitlines() self._reset_existing_bmcs(node_list, cred_list) if len(node_list) > 0: print('Pause 60s for BMCs to begin reset') time.sleep(60) dns_list, stderr, rc = sub_proc_exec('pgrep dnsmasq') if rc not in [0, 1]: self.log.warning(f'Error looking for dnsmasq. rc: {rc}') dns_list = dns_list.splitlines() for pid in dns_list: ns_name, stderr, rc = sub_proc_exec( 'ip netns identify {}'.format(pid)) if self.ipmi_ns._get_name_sp_name() in ns_name: self.log.debug('DHCP already running in {}'.format(ns_name)) break else: cmd = (f'dnsmasq --dhcp-leasefile={self.dhcp_ipmi_leases_file} ' f'--interface={self.ipmi_ns._get_name_sp_ifc_name()} ' f'--dhcp-range={addr_st},{dhcp_end},{netmask},600') stdout, stderr, rc = self.ipmi_ns._exec_cmd(cmd) if rc != 0: self.log.warning(f'Error setting up dnsmasq. rc: {rc}') print(stderr) # Scan up to 25 times. Delay 5 seconds between scans # Allow infinite number of retries self.log.info('Scanning BMC network on 5 s intervals') cnt = 0 cnt_down = 25 while cnt < ipmi_cnt: print() for i in range(cnt_down): print( '\r{} of {} nodes requesting DHCP address. Scan count: {} ' .format(cnt, ipmi_cnt, cnt_down - i), end="") sys.stdout.flush() time.sleep(5) cmd = 'fping -r0 -a -g {} {}'.format(addr_st, dhcp_end) stdout, stderr, rc = sub_proc_exec(cmd) node_list = stdout.splitlines() cnt = len(node_list) if cnt >= ipmi_cnt: rc = True print( '\r{} of {} nodes requesting DHCP address. Scan count: {} ' .format(cnt, ipmi_cnt, cnt_down - i), end="") break self._get_port_table_ipmi(node_list) self.log.debug('Table of found IPMI ports: {}'.format( self.node_table_ipmi)) for switch in self.node_table_ipmi: print('\n\nSwitch: {} '.format(switch)) print( tabulate(self.node_table_ipmi[switch], headers=('port', 'MAC address', 'IP address'))) print() if cnt >= ipmi_cnt: break print( '\n\nPress Enter to continue scanning for cluster nodes.\nOr') print( "Or enter 'C' to continue cluster deployment with a subset of nodes" ) resp = input("Or Enter 'T' to terminate Power-Up ") if resp == 'T': resp = input("Enter 'y' to confirm ") if resp == 'y': self.log.info( "'{}' entered. Terminating Power-Up at user request". format(resp)) self._teardown_ns(self.ipmi_ns) sys.exit(1) elif resp == 'C': print('\nNot all nodes have been discovered') resp = input("Enter 'y' to confirm continuation of" " deployment without all nodes ") if resp == 'y': self.log.info( "'{}' entered. Continuing PowerUp".format(resp)) break self.node_list = node_list if cnt < ipmi_cnt: self.log.warning('Failed to validate expected number of nodes') if len(node_list) > 0 and len(cred_list) > 0: # Verify and power off nodes self.bmc_ai = self._get_credentials(node_list, cred_list) if not self.bmc_ai: self.log.error( 'Critical error. Unable to establish BMC communication ' 'with any cluster nodes.\n.') sys.exit('Exiting.') # set_power_cients('off') has built in 60 s delay self.log.info('\nPowering off cluster nodes') set_power_clients('off', clients=self.bmc_ai) set_power_clients('on', clients=self.bmc_ai) self.log.debug('\nSetting "network" boot device on all nodes') set_bootdev_clients('network', clients=self.bmc_ai) self.log.debug('Cluster nodes IPMI validation complete') self.ran_ipmi = True if not rc: raise UserException('Not all node IPMI ports validated')
def inv_set_ipmi_pxe_ip(config_path): """Configure DHCP IP reservations for IPMI and PXE interfaces IP addresses are assigned sequentially within the appropriate client networks starting with the DHCP pool start offset defined in 'lib.genesis'. Raises: UserException: - No IPMI or PXE client networks defined within the 'config.yml' - Unable to connect to BMC at new IPMI IP address """ log = logger.getlogger() cfg = Config(config_path) inv = Inventory(cfg_file=config_path) ipmiNetwork = None pxeNetwork = None nodes_list = [] # All nodes should be powered off before starting set_power_clients('off', config_path, wait=POWER_WAIT) # Create IPManager object for IPMI and/or PXE networks start_offset = gen.get_dhcp_pool_start() for index, netw_type in enumerate(cfg.yield_depl_netw_client_type()): ip = cfg.get_depl_netw_client_cont_ip(index) netmask = cfg.get_depl_netw_client_netmask(index) if netw_type == 'ipmi': ipmiNetwork = IPManager(IPNetwork(ip + '/' + netmask), start_offset) elif netw_type == 'pxe': pxeNetwork = IPManager(IPNetwork(ip + '/' + netmask), start_offset) # If only one network is defined use the same IPManager for both if ipmiNetwork is None and pxeNetwork is not None: ipmiNetwork = pxeNetwork elif ipmiNetwork is not None and pxeNetwork is None: pxeNetwork = ipmiNetwork elif ipmiNetwork is None and pxeNetwork is None: raise UserException('No IPMI or PXE client network found') # Modify IP addresses for each node dhcp_lease_time = cfg.get_globals_dhcp_lease_time() for index, hostname in enumerate(inv.yield_nodes_hostname()): # IPMI reservations are written directly to the dnsmasq template ipmi_ipaddr = inv.get_nodes_ipmi_ipaddr(0, index) ipmi_mac = inv.get_nodes_ipmi_mac(0, index) ipmi_new_ipaddr = ipmiNetwork.get_next_ip() util.remove_line(DNSMASQ_TEMPLATE, "^dhcp-host=" + ipmi_mac + ".*") util.append_line( DNSMASQ_TEMPLATE, 'dhcp-host=%s,%s-bmc,%s,%s\n' % (ipmi_mac, hostname, ipmi_new_ipaddr, dhcp_lease_time)) _adjust_dhcp_pool(ipmiNetwork.network, ipmiNetwork.get_next_ip(reserve=False), dhcp_lease_time) # PXE reservations are handled by Cobbler pxe_ipaddr = inv.get_nodes_pxe_ipaddr(0, index) pxe_mac = inv.get_nodes_pxe_mac(0, index) pxe_new_ipaddr = pxeNetwork.get_next_ip() log.info('Modifying Inventory PXE IP - Node: %s MAC: %s ' 'Original IP: %s New IP: %s' % (hostname, pxe_mac, pxe_ipaddr, pxe_new_ipaddr)) inv.set_nodes_pxe_ipaddr(0, index, pxe_new_ipaddr) _adjust_dhcp_pool(pxeNetwork.network, pxeNetwork.get_next_ip(reserve=False), dhcp_lease_time) # Run Cobbler sync to process DNSMASQ template cobbler_server = xmlrpc.client.Server("http://127.0.0.1/cobbler_api") token = cobbler_server.login(COBBLER_USER, COBBLER_PASS) cobbler_server.sync(token) log.debug("Running Cobbler sync") # Save info to verify connection come back up ipmi_userid = inv.get_nodes_ipmi_userid(index) ipmi_password = inv.get_nodes_ipmi_password(index) bmc_type = inv.get_nodes_bmc_type(index) # No need to reset and check if the IP does not change if ipmi_new_ipaddr != ipmi_ipaddr: nodes_list.append({ 'hostname': hostname, 'index': index, 'ipmi_userid': ipmi_userid, 'ipmi_password': ipmi_password, 'ipmi_new_ipaddr': ipmi_new_ipaddr, 'ipmi_ipaddr': ipmi_ipaddr, 'ipmi_mac': ipmi_mac, 'bmc_type': bmc_type }) # Issue MC cold reset to force refresh of IPMI interfaces for node in nodes_list: ipmi_userid = node['ipmi_userid'] ipmi_password = node['ipmi_password'] ipmi_ipaddr = node['ipmi_ipaddr'] bmc_type = node['bmc_type'] bmc = _bmc.Bmc(ipmi_ipaddr, ipmi_userid, ipmi_password, bmc_type) if bmc.is_connected(): log.debug(f'Issuing BMC Cold Reset - Node: {node["hostname"]} ' f'- IP: {ipmi_ipaddr}') if not bmc.bmc_reset('cold'): log.error( f'Failed attempting BMC reset on {node["ipmi_ipaddr"]}') bmc.logout() log.info('Pausing 1 minute for BMCs to begin reset') sleep(60) # Check connections for set amount of time end_time = time() + WAIT_TIME while time() < end_time and len(nodes_list) > 0: print(f'\rTimeout count down: {int(end_time - time())} ', end='') sys.stdout.flush() success_list = [] sleep(2) for list_index, node in enumerate(nodes_list): hostname = node['hostname'] index = node['index'] ipmi_userid = node['ipmi_userid'] ipmi_password = node['ipmi_password'] ipmi_new_ipaddr = node['ipmi_new_ipaddr'] ipmi_ipaddr = node['ipmi_ipaddr'] ipmi_mac = node['ipmi_mac'] bmc_type = node['bmc_type'] # Attempt to connect to new IPMI IP address bmc = _bmc.Bmc(ipmi_new_ipaddr, ipmi_userid, ipmi_password, bmc_type) if bmc.is_connected(): if bmc.chassis_power('status') in ('on', 'off'): log.debug(f'BMC connection success - Node: {hostname} ' f'IP: {ipmi_ipaddr}') else: log.debug(f'BMC communication failed - Node: {hostname} ' f'IP: {ipmi_ipaddr}') continue log.info( f'Modifying Inventory IPMI IP - Node: {hostname} MAC: ' f'{ipmi_mac} Original IP: {ipmi_ipaddr} New IP: ' f'{ipmi_new_ipaddr}') inv.set_nodes_ipmi_ipaddr(0, index, ipmi_new_ipaddr) success_list.append(list_index) else: log.debug(f'BMC connection failed - Node: {hostname} ' f'IP: {ipmi_ipaddr}') continue # Remove nodes that connected successfully for remove_index in sorted(success_list, reverse=True): del nodes_list[remove_index] for node in nodes_list: log.error('Unable to connect to BMC at new IPMI IP address- Node: %s ' 'MAC: %s Original IP: %s New IP: %s' % (hostname, ipmi_mac, ipmi_ipaddr, ipmi_new_ipaddr)) if len(nodes_list) > 0: raise UserException('%d BMC(s) not responding after IP modification' % len(nodes_list))