def kill_rabbit(self, node1, node2=None): logger.warning(Color.yellow("Remidiation: Hard restarting RabbitMQ")) node1.run_cmd(";".join([ "for i in `ps aux | grep [r]abbitmq | ", "awk '{print $2}'`", "do kill -9 $i", "done", "service rabbitmq-server start" ])) if node2: node2.run_cmd(";".join([ "for i in `ps aux | grep [r]abbitmq | ", "awk '{print $2}'`", "do kill -9 $i", "done", "service rabbitmq-server start" ]))
def restore_node(self, node): """ Restore a node """ logger.info('Powering on node...') rebootable = False # In casae node is not in bootable state while not rebootable: try: node.power_on() rebootable = True except: logger.warning(Color.yellow("Unable to boot: {0}".format( node.name ))) sleep(5)
def verify(self, builds, progress, node_up, node_down=None): """ Verifies state persistence """ logger.info("Verifying cluster integrity...") progress.set_stages("Progress", 14) progress.update("Progress", 0) # Verify that node_up IS INDEED up... (yes it's necessary) while not self.is_online(node_up.ipaddress): sleep(1) # Checks if RS Cloud libvirt issue has been resolved computes_reporting = False while not computes_reporting: logger.debug("Checking if compute nodes are checked in") progress.update("Progress") libvirt = node_up.run_cmd(";".join(["source openrc", ("nova service-list | " "grep 'nova-compute' " "| awk '{print $10}'")]))['return'] if "down" in libvirt: logger.warning(Color.yellow("Compute nodes are unchecked")) continue elif "up" in libvirt: logger.debug(Color.green("Compute nodes are checked in.")) computes_reporting = True progress.update("Progress", 1) # Check RPCS services (ha_proxy, keepalived, rpc daemon) services = ['haproxy', 'keepalived', 'rpcdaemon'] for service in services: self.wait_service(service, node_up) progress.update("Progress", 1) if node_down: for service in services: self.wait_service(service, node_down) progress.update("Progress", 1) else: progress.update("Progress", 3) # Check that the VIPS moved over to node_up logger.debug("Checking for vips on {0}".format(node_up.name)) exec_vips = node_up.run_cmd("ip netns exec vips ip a")['return'] progress.update("Progress", 1) exec_vips_down = " " if node_down: logger.debug("Checking for vips on {0}".format( node_down.name)) exec_vips_down = node_down.run_cmd("ip netns exec vips ip a")[ 'return'] progress.update("Progress", 1) vips = self.deployment.override_attrs['vips']['config'].keys() progress.update("Progress", 1) for vip in vips: logger.debug("VIP: {0}".format(vip)) logger.debug("Verifying VIP namespace.") # Checks if the vips are absent from both controllers while (vip not in exec_vips) and (vip not in exec_vips_down): sleep(1) exec_vips = node_up.run_cmd("ip netns exec vips " "ip a")['return'] if node_down: exec_vips_down = node_down.run_cmd("ip netns exec vips " "ip a")['return'] # Verifies that the vips do not reside on both servers if (vip in exec_vips) and (vip in exec_vips_down): assert vip not in exec_vips, ("{0} vip found on both " "controllers").format(vip) # Checks for the vips on node_up controller elif vip in exec_vips: logger.debug("{0} vip found in {1}...".format( vip, node_up.name)) # Checks for the vips on the node_down controller else: logger.debug("{0} vip found on {1}...".format( vip, node_down.name)) progress.update("Progress", 1) ########################################################################### # IP NETNS NEEDS TO CONTAIN NEUTRON NET-LIST? # ip_netns_value = node_up.run_cmd("ip netns")['return'].rstrip() ########################################################################### # Check networks rescheduled for build in builds: logger.debug("Checking DHCP on {0}".format(build.name)) self.wait_dhcp_agent_alive(build.network_id, progress) progress.update("Progress", 1) #----------------------------------------------------------------- # Check connectivity to builds logger.info("Checking connectivity to builds...") for build in builds: logger.debug("Skipping connectivity test: {0}".format(build.name)) # while not self.is_online(build.ip_info['floating_ip_address']): # logger.debug("Build {0} with IP {1} IS NOT " # "responding...". # format(build.name, # build.ip_info[ # 'floating_ip_address'])) # progress.update("Progress") # logger.debug("Build {0} with IP {1} IS responding...". # format(build.name, # build.ip_info['floating_ip_address'])) progress.update("Progress") #----------------------------------------------------------------- ########################################################################### # Check MySQL replication isn't broken and Controller2 is master. #CAM ########################################################################### # Check rabbitmq self.test_rabbit_status(progress, node_up, node_down) progress.update("Progress", 1) ########################################################################### # Check if all the configured Openstack Services are functional. # Run tempest based on the features enabled. #SELECTIVE TEMPEST RUN ########################################################################### ################################################################### # Verifies that the compute nodes are able to report ################################################################### nova_status = "down" while "down" in nova_status: logger.debug("Checking if nova is up on compute") progress.update("Progress") nova_status = node_up.run_cmd(";".join(["source openrc", "nova " "service-list | grep " "compute | awk '{print " "$10}'" ""]))['return'].rstrip() if "down" in nova_status: logger.warning(Color.yellow( "At least one compute node isn't properly reporting")) else: logger.debug("All compute nodes are properly reporting") progress.update("Progress", 1)
def destroy(self, nova, neutron, progress, node1, node2): """Cleans up build state from OpenStack.""" #float neutron floatingip-delete [floatingip-id] #instance nova delete [instance-id] #iface neutron router-interface-delete [router-id] [subnet-id] #router neutron router-delete [router-id] #subnet neutron subnet-delete [subnet-id] #network neutron net-delete [network-id] logger.info('Cleaning up instance and network clutter...') logger.debug('Deleting floating IP') progress.update("Progress") deleted = False while not deleted: try: neutron.delete_floatingip(self.ip_info['id']) deleted = True except: deleted = False logger.debug('Deleting server {0}'.format(self.name)) progress.update("Progress") deleted = False while not deleted: try: # Run server deletion command logger.debug("Attempting server deletion") progress.update("Progress") nova.servers.delete(self.server) except: deleted = False max_tries = 60 current_try = 1 # If "No server" is not found in the return, then the server # has not yet been deleted... deleted = True really_deleted = False while not really_deleted: try: nova.servers.get(self.server) if current_try > max_tries: # Force kill RabbitMQ server and start it back up logger.error(Color.red("Server deletion is hung")) progress.update("Progress") self.kill_rabbit(node1, node2) deleted = False break logger.warning(Color.yellow("Deleting server: {0}".format( current_try))) progress.update("Progress") sleep(1) current_try += 1 except: logger.debug(Color.green("Server deleted.")) really_deleted = True deleted = True logger.debug('Deleting router interface') progress.update("Progress") deleted = False while not deleted: try: neutron.remove_interface_router(self.router_id, {"subnet_id": self.subnet_id}) deleted = True except: deleted = False logger.debug("Deleting router") progress.update("Progress") deleted = False while not deleted: try: neutron.delete_router(self.router_id) deleted = True except: deleted = False logger.debug("Deleting subnet") progress.update("Progress") deleted = False while not deleted: try: neutron.delete_subnet(self.subnet_id) deleted = True except: deleted = False logger.debug("Deleting network") progress.update("Progress") deleted = False while not deleted: try: neutron.delete_network(self.network_id) deleted = True except: deleted = False