def create_initial_message(real_owner, cloud, cloud_info, ticket, cc): template_file = "initial_message" irc_bot_ip = conf["ircbot_ipaddr"] irc_bot_port = conf["ircbot_port"] irc_bot_channel = conf["ircbot_channel"] infra_location = conf["infra_location"] cc_users = conf["report_cc"].split(",") for user in cc: cc_users.append("%s@%s" % (user, conf["domain"])) if conf["email_notify"]: with open(os.path.join(TEMPLATES_PATH, template_file)) as _file: template = Template(_file.read()) content = template.render( cloud_info=cloud_info, wp_wiki=conf["wp_wiki"], cloud=cloud, quads_url=conf["quads_url"], real_owner=real_owner, password=f"{infra_location}@{ticket}", foreman_url=conf["foreman_url"], ) postman = Postman( "New QUADS Assignment Allocated - %s %s" % (cloud, ticket), real_owner, cc_users, content) postman.send_email() if conf["irc_notify"]: try: with Netcat(irc_bot_ip, irc_bot_port) as nc: message = "%s QUADS: %s is now active, choo choo! - http://%s/assignments/#%s" % ( irc_bot_channel, cloud_info, conf["wp_wiki"], cloud) nc.write(bytes(message.encode("utf-8"))) except (TypeError, BrokenPipeError) as ex: logger.debug(ex) logger.error("Beep boop netcat can't communicate with your IRC.")
async def post_system_test(self): password = f"{Config['infra_location']}@{self.cloud.ticket}" foreman = Foreman( Config["foreman_api_url"], self.cloud.name, password, loop=self.loop, ) valid_creds = await foreman.verify_credentials() if not valid_creds: logger.error("Unable to query Foreman for cloud: %s" % self.cloud.name) logger.error("Verify Foreman password is correct: %s" % password) self.report = ( self.report + "Unable to query Foreman for cloud: %s\n" % self.cloud.name ) self.report = ( self.report + "Verify Foreman password is correct: %s\n" % password ) return False build_hosts = await foreman.get_build_hosts() pending = [] schedules = Schedule.current_schedule(cloud=self.cloud) if schedules: for schedule in schedules: if schedule.host and schedule.host.name in build_hosts: pending.append(schedule.host.name) if pending: logger.info( "The following hosts are marked for build and will now be rebooted:" ) self.report = ( self.report + "The following hosts are marked for build:\n" ) for host in pending: logger.info(host) try: nc = Netcat(host) healthy = await nc.health_check() except OSError: healthy = False if not healthy: logger.warning( "Host %s didn't pass the health check. " "Potential provisioning in process. SKIPPING." % host ) continue badfish = None try: badfish = await badfish_factory( "mgmt-" + host, str(Config["ipmi_username"]), str(Config["ipmi_password"]), ) if is_supported(host): await badfish.boot_to_type( "foreman", os.path.join( os.path.dirname(__file__), "../../conf/idrac_interfaces.yml", ), ) else: await badfish.set_next_boot_pxe() await badfish.reboot_server() except BadfishException as ಥ﹏ಥ: logger.debug(ಥ﹏ಥ) if badfish: logger.warning( f"There was something wrong trying to boot from Foreman interface for: {host}" ) await badfish.reboot_server() else: logger.error( f"Could not initiate Badfish instance for: {host}" ) self.report = self.report + "%s\n" % host return False failed = False for host in self.hosts: try: badfish = await badfish_factory( "mgmt-" + host.name, str(Config["ipmi_cloud_username"]), password, ) await badfish.validate_credentials() except BadfishException: logger.info(f"Could not verify badfish credentials for: {host.name}") failed = True return not failed
async def post_network_test(self): test_host = self.hosts[0] hosts_down = [] for host in self.hosts: try: nc = Netcat(host.name) healthy = await nc.health_check() except OSError: healthy = False if not healthy: hosts_down.append(host.name) if len(host.interfaces) > len(test_host.interfaces): test_host = host if hosts_down: logger.error( "The following hosts appear to be down or with no ssh connection:" ) for i in hosts_down: logger.error(i) return False try: ssh_helper = SSHHelper(test_host.name) except (SSHException, NoValidConnectionsError, socket.timeout) as ex: logger.debug(ex) logger.error( "Could not establish connection with host: %s." % test_host.name ) self.report = ( self.report + "Could not establish connection with host: %s.\n" % test_host.name ) return False host_list = " ".join([host.name for host in self.hosts]) result, output = ssh_helper.run_cmd( f"fping -t {Config.FPING_TIMEOUT} -B 1 -u {host_list}" ) if not result: return False for i, interface in enumerate(Config.INTERFACES.keys()): new_ips = [] host_ips = [ {"ip": socket.gethostbyname(host.name), "host": host} for host in self.hosts if interface in [_interface.name for _interface in host.interfaces] ] for host in host_ips: _host_obj = host["host"] _interfaces = Config.INTERFACES[interface] last_nic = i == len(_host_obj.interfaces) - 1 if last_nic and self.cloud.vlan: continue for value in _interfaces: ip_apart = host["ip"].split(".") octets = value.split(".") ip_apart[0] = octets[0] ip_apart[1] = octets[1] new_ips.append(".".join(ip_apart)) if new_ips: all_ips = " ".join(new_ips) result, output = ssh_helper.run_cmd( f"fping -t {Config.FPING_TIMEOUT} -B 1 -u {all_ips}" ) if not result: pattern = re.compile(r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})") hosts = [] for error in output: ip = pattern.search(error.split()[-1])[0] if ip: hosts.append(ip) hosts_set = set(hosts) logger.warning("The following IPs are not responsive:") for host in hosts_set: logger.warning(host) return False ssh_helper.disconnect() return True
async def move_and_rebuild(host, new_cloud, semaphore, rebuild=False, loop=None): build_start = datetime.now() logger.debug("Moving and rebuilding host: %s" % host) untouchable_hosts = conf["untouchable_hosts"] logger.debug("Untouchable hosts: %s" % untouchable_hosts) _host_obj = Host.objects(name=host).first() if host in untouchable_hosts: logger.error("No way...") return False _new_cloud_obj = Cloud.objects(name=new_cloud).first() ipmi_new_pass = (f"{conf['infra_location']}@{_new_cloud_obj.ticket}" if _new_cloud_obj.ticket else conf["ipmi_password"]) ipmi_set_pass = [ "user", "set", "password", str(conf["ipmi_cloud_username_id"]), ipmi_new_pass, ] new_semaphore = asyncio.Semaphore(20) await execute_ipmi(host, arguments=ipmi_set_pass, semaphore=new_semaphore) ipmi_set_operator = [ "user", "priv", str(conf["ipmi_cloud_username_id"]), "0x4" ] await execute_ipmi(host, arguments=ipmi_set_operator, semaphore=new_semaphore) if rebuild and _new_cloud_obj.name != _host_obj.default_cloud.name: if "pdu_management" in conf and conf["pdu_management"]: # TODO: pdu management pass if is_supported(host): try: badfish = await badfish_factory( "mgmt-%s" % host, conf["ipmi_username"], conf["ipmi_password"], propagate=True, ) except BadfishException: logger.error( f"Could not initialize Badfish. Verify ipmi credentials for mgmt-{host}." ) return False try: changed_boot_order = asyncio.run_coroutine_threadsafe( badfish.change_boot( "director", os.path.join(os.path.dirname(__file__), "../../conf/idrac_interfaces.yml"), ), loop, ) if changed_boot_order: await badfish.reboot_server(graceful=False) except BadfishException: logger.error( f"Could not set boot order via Badfish for mgmt-{host}.") return False foreman_results = [] params = [ { "name": "operatingsystems", "value": conf["foreman_default_os"], "identifier": "title", }, { "name": "ptables", "value": conf["foreman_default_ptable"] }, { "name": "media", "value": conf["foreman_default_medium"] }, ] foreman = Foreman( conf["foreman_api_url"], conf["foreman_username"], conf["foreman_password"], semaphore=semaphore, loop=loop, ) set_result = await foreman.set_host_parameter(host, "overcloud", "true") foreman_results.append(set_result) put_result = await foreman.put_parameter(host, "build", 1) foreman_results.append(put_result) put_param_result = await foreman.put_parameters_by_name(host, params) foreman_results.append(put_param_result) owner_id = await foreman.get_user_id(new_cloud) host_id = await foreman.get_host_id(host) put_result = await foreman.put_element("hosts", host_id, "owner_id", owner_id) foreman_results.append(put_result) for result in foreman_results: if isinstance(result, Exception) or not result: logger.error( "There was something wrong setting Foreman host parameters." ) return False healthy = False for i in range(RETRIES): nc = Netcat(_host_obj.name) if nc.health_check(): healthy = True nc.close() break nc.close() if not healthy: logger.error("Failed to recover host after changing boot order.") return False if is_supported(host): try: await badfish.boot_to_type( "foreman", os.path.join(os.path.dirname(__file__), "../../conf/idrac_interfaces.yml"), ) await badfish.reboot_server(graceful=False) except BadfishException: logger.error(f"Error setting PXE boot via Badfish on {host}.") await badfish.reboot_server(graceful=False) return False else: try: ipmi_pxe_persistent = [ "chassis", "bootdev", "pxe", "options", "=", "persistent", ] await execute_ipmi(host, arguments=ipmi_pxe_persistent, semaphore=new_semaphore) await ipmi_reset(host, new_semaphore) except Exception as ex: logger.debug(ex) logger.error( f"There was something wrong setting PXE flag or resetting IPMI on {host}." ) schedule = Schedule.current_schedule(cloud=_new_cloud_obj, host=_host_obj).first() if schedule: schedule.update(build_start=build_start, build_end=datetime.now()) schedule.save() logger.debug("Updating host: %s") _host_obj.update(cloud=_new_cloud_obj, build=False, last_build=datetime.now(), validated=False) return True
def post_network_test(self): test_host = self.hosts[0] hosts_down = [] for host in self.hosts: nc = Netcat(host.name) if not nc.health_check(): hosts_down.append(host.name) nc.close() if len(host.interfaces) > len(test_host.interfaces): test_host = host if hosts_down: logger.error("The following hosts appear to be down or with no ssh connection:") for i in hosts_down: logger.error(i) return False try: ssh_helper = SSHHelper(test_host.name) except (SSHException, NoValidConnectionsError, socket.timeout) as ex: logger.debug(ex) logger.error( "Could not establish connection with host: %s." % test_host.name ) self.report = ( self.report + "Could not establish connection with host: %s.\n" % test_host.name ) return False host_list = " ".join([host.name for host in self.hosts]) if type(ssh_helper.run_cmd("fping -u %s" % host_list)) != list: return False for i, interface in enumerate(INTERFACES.keys()): new_ips = [] host_ips = [ {"ip": socket.gethostbyname(host.name), "host": host} for host in self.hosts if interface in [_interface.name for _interface in host.interfaces] ] for host in host_ips: _host_obj = host["host"] _interfaces = INTERFACES[interface] last_nic = i == len(_host_obj.interfaces) - 1 if last_nic and self.cloud.vlan: _interfaces = _interfaces[:1] for value in _interfaces: ip_apart = host["ip"].split(".") octets = value.split(".") ip_apart[0] = octets[0] ip_apart[1] = octets[1] new_ips.append(".".join(ip_apart)) if new_ips: if type(ssh_helper.run_cmd("fping -u %s" % " ".join(new_ips))) != list: return False ssh_helper.disconnect() return True