def do_stress_migration(vms, srcuri, desturi, stress_type, migration_type, params, thread_timeout=60): """ Migrate vms with stress. :param vms: migrated vms. """ fail_info = utils_test.load_stress(stress_type, vms, params) if len(fail_info): logging.warning("Add stress for migration failed:%s", fail_info) migtest = utlv.MigrationTest() migtest.do_migration(vms, srcuri, desturi, migration_type, options=None, thread_timeout=thread_timeout) # vms will be shutdown, so no need to do this cleanup # And migrated vms may be not login if the network is local lan if stress_type == "stress_on_host": utils_test.unload_stress(stress_type, vms) if not migtest.RET_MIGRATION: raise error.TestFail()
def do_stress_migration(vms, srcuri, desturi, stress_type, migration_type, params, thread_timeout=60): """ Migrate vms with stress. :param vms: migrated vms. :param srcuri: connect uri for source machine :param desturi: connect uri for destination machine :param stress_type: type of stress test in VM :param migration_type: type of migration to be performed :param params: Test dict params :param thread_timeout: default timeout for migration thread :raise: exceptions.TestFail if migration fails """ fail_info = utils_test.load_stress(stress_type, vms, params) migtest = utlv.MigrationTest() options = '' if migration_type == "compressed": options = "--compressed" migration_type = "orderly" shared_dir = os.path.dirname(data_dir.get_data_dir()) src_file = os.path.join(shared_dir, "scripts", "duplicate_pages.py") dest_dir = "/tmp" for vm in vms: session = vm.wait_for_login() vm.copy_files_to(src_file, dest_dir) status = session.cmd_status("cd /tmp;python duplicate_pages.py") if status: fail_info.append("Set duplicated pages for vm failed.") if len(fail_info): logging.warning("Add stress for migration failed:%s", fail_info) logging.debug("Starting migration...") migrate_options = ("--live --unsafe %s --timeout %s" % (options, params.get("virsh_migrate_timeout", 60))) migtest.do_migration(vms, srcuri, desturi, migration_type, options=migrate_options, thread_timeout=thread_timeout) # vms will be shutdown, so no need to do this cleanup # And migrated vms may be not login if the network is local lan if stress_type == "stress_on_host": utils_test.unload_stress(stress_type, vms) if not migtest.RET_MIGRATION: raise exceptions.TestFail()
def run(test, params, env): """ :param test: kvm test object :param params: Dictionary with the test parameters :param env: Dictionary with test environment. """ guest_stress = params.get("guest_stress", "no") == "yes" host_stress = params.get("host_stress", "no") == "yes" stress_events = params.get("stress_events", "reboot") vms = env.get_all_vms() vms_uptime_init = {} if "reboot" not in stress_events: for vm in vms: vms_uptime_init[vm.name] = vm.uptime() stress_event = utils_stress.VMStressEvents(params, env) if guest_stress: try: utils_test.load_stress("stress_in_vms", params=params, vms=vms) except Exception as err: test.fail("Error running stress in vms: %s" % err) if host_stress: if params.get("host_stress_args", ""): params["stress_args"] = params.get("host_stress_args") try: utils_test.load_stress("stress_on_host", params=params) except Exception as err: test.fail("Error running stress in host: %s" % err) try: stress_event.run_threads() finally: stress_event.wait_for_threads() if guest_stress: utils_test.unload_stress("stress_in_vms", params=params, vms=vms) if host_stress: utils_test.unload_stress("stress_on_host", params=params) if "reboot" not in stress_events: fail = False for vm in vms: if vm.uptime() < vms_uptime_init[vm.name]: logging.error("Unexpected reboot of VM: %s between test", vm.name) fail = True if fail: test.fail("Unexpected VM reboot detected")
def do_stress_migration(vms, srcuri, desturi, stress_type, migration_type, params, thread_timeout=60): """ Migrate vms with stress. :param vms: migrated vms. """ fail_info = utils_test.load_stress(stress_type, vms, params) if len(fail_info): logging.warning("Add stress for migration failed:%s", fail_info) migtest = utlv.MigrationTest() migtest.do_migration(vms, srcuri, desturi, migration_type, thread_timeout) utils_test.unload_stress(stress_type, vms) if not migtest.RET_MIGRATION: raise error.TestFail()
def run(test, params, env): """ Test different hmi injections with guest :param test: QEMU test object :param params: Dictionary with the test parameters :param env: Dictionary with test environment. """ def set_condn(action, recover=False): """ Set/reset guest state/action :param action: Guest state change/action :param recover: whether to recover given state default: False """ if not recover: if action == "pin_vcpu": for i in range(cur_vcpu): virsh.vcpupin(vm_name, i, hmi_cpu, "--live", ignore_status=False, debug=True) virsh.emulatorpin(vm_name, hmi_cpu, "live", ignore_status=False, debug=True) elif action == "filetrans": utils_test.run_file_transfer(test, params, env) elif action == "save": save_file = os.path.join(data_dir.get_tmp_dir(), vm_name + ".save") result = virsh.save(vm_name, save_file, ignore_status=True, debug=True) utils_test.libvirt.check_exit_status(result) time.sleep(10) if os.path.exists(save_file): result = virsh.restore(save_file, ignore_status=True, debug=True) utils_test.libvirt.check_exit_status(result) os.remove(save_file) elif action == "suspend": result = virsh.suspend(vm_name, ignore_status=True, debug=True) utils_test.libvirt.check_exit_status(result) time.sleep(10) result = virsh.resume(vm_name, ignore_status=True, debug=True) utils_test.libvirt.check_exit_status(result) return host_version = params.get("host_version") guest_version = params.get("guest_version", "") max_vcpu = int(params.get("ppchmi_vcpu_max", '1')) cur_vcpu = int(params.get("ppchmi_vcpu_cur", "1")) cores = int(params.get("ppchmi_cores", '1')) sockets = int(params.get("ppchmi_sockets", '1')) threads = int(params.get("ppchmi_threads", '1')) status_error = "yes" == params.get("status_error", "no") condition = params.get("condn", "") inject_code = params.get("inject_code", "") scom_base = params.get("scom_base", "") hmi_name = params.get("hmi_name", "") hmi_iterations = int(params.get("hmi_iterations", 1)) if host_version not in cpu.get_cpu_arch(): test.cancel("Unsupported Host cpu version") vm_name = params.get("main_vm") vm = env.get_vm(vm_name) sm = SoftwareManager() if not sm.check_installed("opal-utils") and not sm.install("opal-utils"): test.cancel("opal-utils package install failed") cpus_list = cpu.cpu_online_list() cpu_idle_state = cpu.get_cpuidle_state() cpu.set_cpuidle_state() # Lets use second available host cpu hmi_cpu = cpus_list[1] pir = int(open('/sys/devices/system/cpu/cpu%s/pir' % hmi_cpu).read().strip(), 16) if host_version == 'power9': coreid = (((pir) >> 2) & 0x3f) nodeid = (((pir) >> 8) & 0x7f) & 0xf hmi_scom_addr = hex(((coreid & 0x1f + 0x20) << 24) | int(scom_base, 16)) if host_version == 'power8': coreid = (((pir) >> 3) & 0xf) nodeid = (((pir) >> 7) & 0x3f) hmi_scom_addr = hex(((coreid & 0xf) << 24) | int(scom_base, 16)) hmi_cmd = "putscom -c %s %s %s" % (nodeid, hmi_scom_addr, inject_code) vmxml = libvirt_xml.VMXML.new_from_inactive_dumpxml(vm_name) org_xml = vmxml.copy() # Destroy the vm vm.destroy() try: session = None bgt = None libvirt_xml.VMXML.set_vm_vcpus(vm_name, max_vcpu, cur_vcpu, sockets=sockets, cores=cores, threads=threads, add_topology=True) if guest_version: libvirt_xml.VMXML.set_cpu_mode(vm_name, model=guest_version) vm.start() # Lets clear host and guest dmesg process.system("dmesg -C", verbose=False) session = vm.wait_for_login() session.cmd("dmesg -C") # Set condn if "vcpupin" in condition: set_condn("pin_vcpu") if "stress" in condition: utils_test.load_stress("stress_in_vms", params=params, vms=[vm]) if "save" in condition: set_condn("save") if "suspend" in condition: set_condn("suspend") # hmi inject logging.debug("Injecting %s HMI on cpu %s", hmi_name, hmi_cpu) logging.debug("HMI Command: %s", hmi_cmd) process.run(hmi_cmd) # Check host and guest dmesg host_dmesg = process.run("dmesg -c", verbose=False).stdout_text guest_dmesg = session.cmd_output("dmesg") if "Unrecovered" in host_dmesg: test.fail("Unrecovered host hmi\n%s", host_dmesg) else: logging.debug("Host dmesg: %s", host_dmesg) logging.debug("Guest dmesg: %s", guest_dmesg) if "save" in condition: set_condn("save") if "suspend" in condition: set_condn("suspend") finally: if "stress" in condition: utils_test.unload_stress("stress_in_vms", params=params, vms=[vm]) if session: session.close() org_xml.sync() cpu.set_cpuidle_state(setstate=cpu_idle_state)
def set_condition(vm_name, condn, reset=False, guestbt=None): """ Set domain to given state or reset it. """ bt = None if not reset: if condn == "avocado_test": testlist = utils_test.get_avocadotestlist(params) bt = utils_test.run_avocado_bg(vm, params, test, testlist) if not bt: test.cancel("guest stress failed to start") # Allow stress to start time.sleep(condn_sleep_sec) return bt elif condn == "stress": utils_test.load_stress("stress_in_vms", params=params, vms=[vm]) elif condn in ["save", "managedsave"]: # No action pass elif condn == "suspend": result = virsh.suspend(vm_name, ignore_status=True, debug=True) libvirt.check_exit_status(result) elif condn == "hotplug": result = virsh.setvcpus(vm_name, max_vcpu, "--live", ignore_status=True, debug=True) libvirt.check_exit_status(result) exp_vcpu = { 'max_config': max_vcpu, 'max_live': max_vcpu, 'cur_config': current_vcpu, 'cur_live': max_vcpu, 'guest_live': max_vcpu } result = cpu.check_vcpu_value(vm, exp_vcpu, option="--live") elif condn == "host_smt": if cpuutil.get_cpu_vendor_name() == 'power9': result = process.run("ppc64_cpu --smt=4", shell=True) else: test.cancel( "Host SMT changes not allowed during guest live") else: logging.debug("No operation for the domain") else: if condn == "save": save_file = os.path.join(data_dir.get_tmp_dir(), vm_name + ".save") result = virsh.save(vm_name, save_file, ignore_status=True, debug=True) libvirt.check_exit_status(result) time.sleep(condn_sleep_sec) if os.path.exists(save_file): result = virsh.restore(save_file, ignore_status=True, debug=True) libvirt.check_exit_status(result) os.remove(save_file) else: test.error("No save file for domain restore") elif condn == "managedsave": result = virsh.managedsave(vm_name, ignore_status=True, debug=True) libvirt.check_exit_status(result) time.sleep(condn_sleep_sec) result = virsh.start(vm_name, ignore_status=True, debug=True) libvirt.check_exit_status(result) elif condn == "suspend": result = virsh.resume(vm_name, ignore_status=True, debug=True) libvirt.check_exit_status(result) elif condn == "avocado_test": guestbt.join() elif condn == "stress": utils_test.unload_stress("stress_in_vms", params=params, vms=[vm]) elif condn == "hotplug": result = virsh.setvcpus(vm_name, current_vcpu, "--live", ignore_status=True, debug=True) libvirt.check_exit_status(result) exp_vcpu = { 'max_config': max_vcpu, 'max_live': current_vcpu, 'cur_config': current_vcpu, 'cur_live': current_vcpu, 'guest_live': current_vcpu } result = cpu.check_vcpu_value(vm, exp_vcpu, option="--live") elif condn == "host_smt": result = process.run("ppc64_cpu --smt=2", shell=True) # Change back the host smt result = process.run("ppc64_cpu --smt=4", shell=True) # Work around due to known cgroup issue after cpu hot(un)plug # sequence root_cpuset_path = utils_cgroup.get_cgroup_mountpoint("cpuset") machine_cpuset_paths = [] if os.path.isdir( os.path.join(root_cpuset_path, "machine.slice")): machine_cpuset_paths.append( os.path.join(root_cpuset_path, "machine.slice")) if os.path.isdir(os.path.join(root_cpuset_path, "machine")): machine_cpuset_paths.append( os.path.join(root_cpuset_path, "machine")) if not machine_cpuset_paths: logging.warning("cgroup cpuset might not recover properly " "for guests after host smt changes, " "restore it manually") root_cpuset_cpus = os.path.join(root_cpuset_path, "cpuset.cpus") for path in machine_cpuset_paths: machine_cpuset_cpus = os.path.join(path, "cpuset.cpus") # check if file content differs cmd = "diff %s %s" % (root_cpuset_cpus, machine_cpuset_cpus) if process.system(cmd, verbose=True, ignore_status=True): cmd = "cp %s %s" % (root_cpuset_cpus, machine_cpuset_cpus) process.system(cmd, verbose=True) else: logging.debug("No need recover the domain") return bt
def set_condition(vm_name, condn, reset=False, guestbt=None): """ Set domain to given state or reset it. """ bt = None if not reset: if condn == "avocadotest": bt = utils_test.run_avocado_bg(vm, params, test) if not bt: test.cancel("guest stress failed to start") # Allow stress to start time.sleep(condn_sleep_sec) return bt elif condn == "stress": utils_test.load_stress("stress_in_vms", params=params, vms=[vm]) elif condn in ["save", "managedsave"]: # No action pass elif condn == "suspend": result = virsh.suspend(vm_name, ignore_status=True, debug=True) libvirt.check_exit_status(result) elif condn == "hotplug": result = virsh.setvcpus(vm_name, max_vcpu, "--live", ignore_status=True, debug=True) libvirt.check_exit_status(result) exp_vcpu = {'max_config': max_vcpu, 'max_live': max_vcpu, 'cur_config': current_vcpu, 'cur_live': max_vcpu, 'guest_live': max_vcpu} result = utils_hotplug.check_vcpu_value(vm, exp_vcpu, option="--live") elif condn == "host_smt": if cpu.get_cpu_arch() == 'power9': result = process.run("ppc64_cpu --smt=4", shell=True) else: test.cancel("Host SMT changes not allowed during guest live") else: logging.debug("No operation for the domain") else: if condn == "save": save_file = os.path.join(data_dir.get_tmp_dir(), vm_name + ".save") result = virsh.save(vm_name, save_file, ignore_status=True, debug=True) libvirt.check_exit_status(result) time.sleep(condn_sleep_sec) if os.path.exists(save_file): result = virsh.restore(save_file, ignore_status=True, debug=True) libvirt.check_exit_status(result) os.remove(save_file) else: test.error("No save file for domain restore") elif condn == "managedsave": result = virsh.managedsave(vm_name, ignore_status=True, debug=True) libvirt.check_exit_status(result) time.sleep(condn_sleep_sec) result = virsh.start(vm_name, ignore_status=True, debug=True) libvirt.check_exit_status(result) elif condn == "suspend": result = virsh.resume(vm_name, ignore_status=True, debug=True) libvirt.check_exit_status(result) elif condn == "avocadotest": guestbt.join(ignore_status=True) elif condn == "stress": utils_test.unload_stress("stress_in_vms", params=params, vms=[vm]) elif condn == "hotplug": result = virsh.setvcpus(vm_name, current_vcpu, "--live", ignore_status=True, debug=True) libvirt.check_exit_status(result) exp_vcpu = {'max_config': max_vcpu, 'max_live': current_vcpu, 'cur_config': current_vcpu, 'cur_live': current_vcpu, 'guest_live': current_vcpu} result = utils_hotplug.check_vcpu_value(vm, exp_vcpu, option="--live") elif condn == "host_smt": result = process.run("ppc64_cpu --smt=2", shell=True) # Change back the host smt result = process.run("ppc64_cpu --smt=4", shell=True) # Work around due to known cgroup issue after cpu hot(un)plug # sequence root_cpuset_path = utils_cgroup.get_cgroup_mountpoint("cpuset") machine_cpuset_paths = [] if os.path.isdir(os.path.join(root_cpuset_path, "machine.slice")): machine_cpuset_paths.append(os.path.join(root_cpuset_path, "machine.slice")) if os.path.isdir(os.path.join(root_cpuset_path, "machine")): machine_cpuset_paths.append(os.path.join(root_cpuset_path, "machine")) if not machine_cpuset_paths: logging.warning("cgroup cpuset might not recover properly " "for guests after host smt changes, " "restore it manually") root_cpuset_cpus = os.path.join(root_cpuset_path, "cpuset.cpus") for path in machine_cpuset_paths: machine_cpuset_cpus = os.path.join(path, "cpuset.cpus") # check if file content differs cmd = "diff %s %s" % (root_cpuset_cpus, machine_cpuset_cpus) if process.system(cmd, verbose=True, ignore_status=True): cmd = "cp %s %s" % (root_cpuset_cpus, machine_cpuset_cpus) process.system(cmd, verbose=True) else: logging.debug("No need recover the domain") return bt
def run(test, params, env): """ Test: vcpu hotplug. The command can change the number of virtual CPUs for VM. 1.Prepare test environment,destroy or suspend a VM. 2.Perform virsh setvcpus operation. 3.Recover test environment. 4.Confirm the test result. """ vm_name = params.get("main_vm") min_count = int(params.get("setvcpus_min_count", "1")) max_count = int(params.get("setvcpus_max_count", "2")) test_times = int(params.get("setvcpus_test_times", "1")) stress_type = params.get("stress_type", "") stress_param = params.get("stress_param", "") add_by_virsh = ("yes" == params.get("add_by_virsh")) del_by_virsh = ("yes" == params.get("del_by_virsh")) hotplug_timeout = int(params.get("hotplug_timeout", 30)) test_set_max = max_count * 2 # Save original configuration orig_config_xml = libvirt_xml.VMXML.new_from_inactive_dumpxml(vm_name) # Set min/max of vcpu libvirt_xml.VMXML.set_vm_vcpus(vm_name, test_set_max, min_count, topology_correction=True) # prepare VM instance vm = libvirt_vm.VM(vm_name, params, test.bindir, env.get("address_cache")) # prepare guest-agent service vm.prepare_guest_agent() # Increase the workload load_vms = [] if stress_type in ['cpu', 'memory', 'io']: params["stress_args"] = stress_param load_vms.append(vm) if stress_type in ['cpu', 'memory']: utils_test.load_stress("stress_in_vms", params, vms=load_vms) else: utils_test.load_stress("iozone_in_vms", params, vms=load_vms) session = vm.wait_for_login() try: # Clear dmesg before set vcpu session.cmd("dmesg -c") for i in range(test_times): # 1. Add vcpu add_result = libvirt.hotplug_domain_vcpu(vm, max_count, add_by_virsh) add_status = add_result.exit_status # 1.1 check add status if add_status: if add_result.stderr.count("support"): test.cancel("vcpu hotplug not supported, " "no need to test any more:\n %s" % add_result.stderr.strip()) test.fail("Test failed for:\n %s" % add_result.stderr.strip()) if not utils_misc.wait_for(lambda: utils_misc.check_if_vm_vcpu_match(max_count, vm), hotplug_timeout, text="wait for vcpu online"): test.fail("vcpu hotplug failed") if 'ppc' not in platform.machine(): # 1.2 check dmesg domain_add_dmesg = session.cmd_output("dmesg -c") dmesg1 = "CPU%d has been hot-added" % (max_count - 1) dmesg2 = "CPU %d got hotplugged" % (max_count - 1) if (not domain_add_dmesg.count(dmesg1) and not domain_add_dmesg.count(dmesg2)): test.fail("Cannot find hotplug info in dmesg: %s" % domain_add_dmesg) # 1.3 check cpu related file online_cmd = "cat /sys/devices/system/cpu/cpu%d/online" \ % (max_count - 1) st, ot = session.cmd_status_output(online_cmd) if st: test.fail("Cannot find CPU%d after hotplug" % (max_count - 1)) # 1.4 check online if not ot.strip().count("1"): test.fail("CPU%d is not online after hotplug: %s" % ((max_count - 1), ot)) # 1.5 check online interrupts info inter_on_output = session.cmd_output("cat /proc/interrupts") if not inter_on_output.count("CPU%d" % (int(max_count) - 1)): test.fail("CPU%d can not be found in " "/proc/interrupts when it's online:%s" % ((int(max_count) - 1), inter_on_output)) # 1.6 offline vcpu off_st = session.cmd_status("echo 0 > " "/sys/devices/system/cpu/cpu%d/online" % (max_count - 1)) if off_st: test.fail("Set cpu%d offline failed!" % (max_count - 1)) # 1.7 check offline interrupts info inter_off_output = session.cmd_output("cat /proc/interrupts") if inter_off_output.count("CPU%d" % (int(max_count) - 1)): test.fail("CPU%d can be found in /proc/interrupts" " when it's offline" % (int(max_count) - 1)) # 1.8 online vcpu on_st = session.cmd_status("echo 1 > " "/sys/devices/system/cpu/cpu%d/online" % (max_count - 1)) if on_st: test.fail("Set cpu%d online failed!" % (max_count - 1)) # 2. Del vcpu del_result = libvirt.hotplug_domain_vcpu(vm, min_count, del_by_virsh, hotplug=False) del_status = del_result.exit_status if del_status: logging.info("del_result: %s" % del_result.stderr.strip()) # A qemu older than 1.5 or an unplug for 1.6 will result in # the following failure. # TODO: when CPU-hotplug feature becomes stable and strong, # remove these codes used to handle kinds of exceptions if re.search("The command cpu-del has not been found", del_result.stderr): test.cancel("vcpu hotunplug not supported") if re.search("cannot change vcpu count", del_result.stderr): test.cancel("unhotplug failed") if re.search("got wrong number of vCPU pids from QEMU monitor", del_result.stderr): test.cancel("unhotplug failed") # process all tips that contains keyword 'support' # for example, "unsupported"/"hasn't been support" and so on if re.search("support", del_result.stderr): test.cancel("vcpu hotunplug not supported") # besides above, regard it failed test.fail("Test fail for:\n %s" % del_result.stderr.strip()) if not utils_misc.wait_for(lambda: utils_misc.check_if_vm_vcpu_match(min_count, vm), hotplug_timeout, text="wait for vcpu offline"): test.fail("vcpu hotunplug failed") if 'ppc' not in platform.machine(): domain_del_dmesg = session.cmd_output("dmesg -c") if not domain_del_dmesg.count("CPU %d is now offline" % (max_count - 1)): test.fail("Cannot find hot-unplug info in dmesg: %s" % domain_del_dmesg) except exceptions.TestCancel: # So far, QEMU doesn't support unplug vcpu, # unplug operation will encounter kind of errors. pass finally: utils_test.unload_stress("stress_in_vms", params, load_vms) if session: session.close() # Cleanup orig_config_xml.sync()
def run(test, params, env): """ Test: vcpu hotplug. The command can change the number of virtual CPUs for VM. 1.Prepare test environment,destroy or suspend a VM. 2.Perform virsh setvcpus operation. 3.Recover test environment. 4.Confirm the test result. """ vm_name = params.get("main_vm") min_count = int(params.get("setvcpus_min_count", "1")) max_count = int(params.get("setvcpus_max_count", "2")) test_times = int(params.get("setvcpus_test_times", "1")) stress_type = params.get("stress_type", "") stress_param = params.get("stress_param", "") add_by_virsh = ("yes" == params.get("add_by_virsh")) del_by_virsh = ("yes" == params.get("del_by_virsh")) hotplug_timeout = int(params.get("hotplug_timeout", 30)) test_set_max = max_count * 2 # Save original configuration orig_config_xml = libvirt_xml.VMXML.new_from_inactive_dumpxml(vm_name) # Set min/max of vcpu libvirt_xml.VMXML.set_vm_vcpus(vm_name, test_set_max, min_count, topology_correction=True) # prepare VM instance vm = libvirt_vm.VM(vm_name, params, test.bindir, env.get("address_cache")) # prepare guest-agent service vm.prepare_guest_agent() # Increase the workload load_vms = [] if stress_type in ['cpu', 'memory', 'io']: params["stress_args"] = stress_param load_vms.append(vm) if stress_type in ['cpu', 'memory']: utils_test.load_stress("stress_in_vms", params, vms=load_vms) else: utils_test.load_stress("iozone_in_vms", params, vms=load_vms) session = vm.wait_for_login() try: # Clear dmesg before set vcpu session.cmd("dmesg -c") for i in range(test_times): # 1. Add vcpu add_result = cpu.hotplug_domain_vcpu(vm, max_count, add_by_virsh) add_status = add_result.exit_status # 1.1 check add status if add_status: if add_result.stderr.count("support"): test.cancel("vcpu hotplug not supported, " "no need to test any more:\n %s" % add_result.stderr.strip()) test.fail("Test failed for:\n %s" % add_result.stderr.strip()) if not utils_misc.wait_for(lambda: cpu.check_if_vm_vcpu_match(max_count, vm), hotplug_timeout, text="wait for vcpu online"): test.fail("vcpu hotplug failed") if 'ppc' not in platform.machine(): # 1.2 check dmesg domain_add_dmesg = session.cmd_output("dmesg -c") dmesg1 = "CPU%d has been hot-added" % (max_count - 1) dmesg2 = "CPU %d got hotplugged" % (max_count - 1) if (not domain_add_dmesg.count(dmesg1) and not domain_add_dmesg.count(dmesg2)): test.fail("Cannot find hotplug info in dmesg: %s" % domain_add_dmesg) # 1.3 check cpu related file online_cmd = "cat /sys/devices/system/cpu/cpu%d/online" \ % (max_count - 1) st, ot = session.cmd_status_output(online_cmd) if st: test.fail("Cannot find CPU%d after hotplug" % (max_count - 1)) # 1.4 check online if not ot.strip().count("1"): test.fail("CPU%d is not online after hotplug: %s" % ((max_count - 1), ot)) # 1.5 check online interrupts info inter_on_output = session.cmd_output("cat /proc/interrupts") if not inter_on_output.count("CPU%d" % (int(max_count) - 1)): test.fail("CPU%d can not be found in " "/proc/interrupts when it's online:%s" % ((int(max_count) - 1), inter_on_output)) # 1.6 offline vcpu off_st = session.cmd_status("echo 0 > " "/sys/devices/system/cpu/cpu%d/online" % (max_count - 1)) if off_st: test.fail("Set cpu%d offline failed!" % (max_count - 1)) # 1.7 check offline interrupts info inter_off_output = session.cmd_output("cat /proc/interrupts") if inter_off_output.count("CPU%d" % (int(max_count) - 1)): test.fail("CPU%d can be found in /proc/interrupts" " when it's offline" % (int(max_count) - 1)) # 1.8 online vcpu on_st = session.cmd_status("echo 1 > " "/sys/devices/system/cpu/cpu%d/online" % (max_count - 1)) if on_st: test.fail("Set cpu%d online failed!" % (max_count - 1)) # 2. Del vcpu del_result = cpu.hotplug_domain_vcpu(vm, min_count, del_by_virsh, hotplug=False) del_status = del_result.exit_status if del_status: logging.info("del_result: %s" % del_result.stderr.strip()) # A qemu older than 1.5 or an unplug for 1.6 will result in # the following failure. # TODO: when CPU-hotplug feature becomes stable and strong, # remove these codes used to handle kinds of exceptions if re.search("The command cpu-del has not been found", del_result.stderr): test.cancel("vcpu hotunplug not supported") if re.search("cannot change vcpu count", del_result.stderr): test.cancel("unhotplug failed") if re.search("got wrong number of vCPU pids from QEMU monitor", del_result.stderr): test.cancel("unhotplug failed") # process all tips that contains keyword 'support' # for example, "unsupported"/"hasn't been support" and so on if re.search("support", del_result.stderr): test.cancel("vcpu hotunplug not supported") # besides above, regard it failed test.fail("Test fail for:\n %s" % del_result.stderr.strip()) if not utils_misc.wait_for(lambda: cpu.check_if_vm_vcpu_match(min_count, vm), hotplug_timeout, text="wait for vcpu offline"): test.fail("vcpu hotunplug failed") if 'ppc' not in platform.machine(): domain_del_dmesg = session.cmd_output("dmesg -c") if not domain_del_dmesg.count("CPU %d is now offline" % (max_count - 1)): test.fail("Cannot find hot-unplug info in dmesg: %s" % domain_del_dmesg) except exceptions.TestCancel: # So far, QEMU doesn't support unplug vcpu, # unplug operation will encounter kind of errors. pass finally: utils_test.unload_stress("stress_in_vms", params, load_vms) if session: session.close() # Cleanup orig_config_xml.sync()
def run(test, params, env): """ Test migration under stress. """ vm_names = params.get("vms").split() if len(vm_names) < 2: test.cancel("Provide enough vms for migration") src_uri = "qemu:///system" dest_uri = libvirt_vm.complete_uri( params.get("migrate_dest_host", "EXAMPLE")) if dest_uri.count('///') or dest_uri.count('EXAMPLE'): test.cancel("The dest_uri '%s' is invalid" % dest_uri) # Migrated vms' instance vms = env.get_all_vms() params["load_vms"] = list(vms) cpu = int(params.get("smp", 1)) memory = int(params.get("mem")) * 1024 stress_tool = params.get("stress_tool", "") stress_type = params.get("migration_stress_type") require_stress_tool = "stress" in stress_tool vm_bytes = params.get("stress_vm_bytes", "128M") stress_args = params.get("stress_args") migration_type = params.get("migration_type") start_migration_vms = params.get("start_migration_vms", "yes") == "yes" thread_timeout = int(params.get("thread_timeout", 120)) # Set vm_bytes for start_cmd mem_total = utils_memory.memtotal() vm_reserved = len(vms) * memory if vm_bytes == "half": vm_bytes = (mem_total - vm_reserved) / 2 elif vm_bytes == "shortage": vm_bytes = mem_total - vm_reserved + 524288 if "vm-bytes" in stress_args: params["stress_args"] = stress_args % vm_bytes # Ensure stress tool is available in host if require_stress_tool and stress_type == "stress_on_host": utils_test.load_stress("stress_on_host", params) for vm in vms: # Keep vm dead for edit if vm.is_alive(): vm.destroy() set_cpu_memory(vm.name, cpu, memory) try: if start_migration_vms: for vm in vms: vm.start() vm.wait_for_login() # configure stress in VM if require_stress_tool and stress_type == "stress_in_vms": utils_test.load_stress("stress_in_vms", params, vms) do_stress_migration(vms, src_uri, dest_uri, migration_type, test, params, thread_timeout) finally: logging.debug("Cleanup vms...") params["connect_uri"] = src_uri for vm in vms: utils_test.libvirt.MigrationTest().cleanup_dest_vm( vm, None, dest_uri) # Try to start vms in source once vms in destination are # cleaned up if not vm.is_alive(): vm.start() vm.wait_for_login() utils_test.unload_stress(stress_type, params, vms)