def output_check(nodeinfo_output): # Check CPU model cpu_model_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU model", 3) cpu_model_os = utils.get_current_kernel_arch() if not re.match(cpu_model_nodeinfo, cpu_model_os): raise error.TestFail( "Virsh nodeinfo output didn't match CPU model") # Check number of CPUs cpus_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU(s)", 2) cpus_os = utils.count_cpus() if int(cpus_nodeinfo) != cpus_os: raise error.TestFail( "Virsh nodeinfo output didn't match number of " "CPU(s)") # Check CPU frequency cpu_frequency_nodeinfo = _check_nodeinfo(nodeinfo_output, 'CPU frequency', 3) cmd = ("cat /proc/cpuinfo | grep 'cpu MHz' | head -n1 | " "awk '{print $4}' | awk -F. '{print $1}'") cmd_result = utils.run(cmd, ignore_status=True) cpu_frequency_os = cmd_result.stdout.strip() print cpu_frequency_os if not re.match(cpu_frequency_nodeinfo, cpu_frequency_os): raise error.TestFail("Virsh nodeinfo output didn't match CPU " "frequency") # Check CPU socket(s) cpu_sockets_nodeinfo = int( _check_nodeinfo(nodeinfo_output, 'CPU socket(s)', 3)) cmd = "grep 'physical id' /proc/cpuinfo | uniq | sort | uniq |wc -l" cmd_result = utils.run(cmd, ignore_status=True) cpu_NUMA_nodeinfo = _check_nodeinfo(nodeinfo_output, 'NUMA cell(s)', 3) cpu_sockets_os = int( cmd_result.stdout.strip()) / int(cpu_NUMA_nodeinfo) if cpu_sockets_os != cpu_sockets_nodeinfo: raise error.TestFail("Virsh nodeinfo output didn't match CPU " "socket(s)") # Check Core(s) per socket cores_per_socket_nodeinfo = _check_nodeinfo(nodeinfo_output, 'Core(s) per socket', 4) cmd = "grep 'cpu cores' /proc/cpuinfo | head -n1 | awk '{print $4}'" cmd_result = utils.run(cmd, ignore_status=True) cores_per_socket_os = cmd_result.stdout.strip() if not re.match(cores_per_socket_nodeinfo, cores_per_socket_os): raise error.TestFail("Virsh nodeinfo output didn't match Core(s) " "per socket") # Check Memory size memory_size_nodeinfo = int( _check_nodeinfo(nodeinfo_output, 'Memory size', 3)) memory_size_os = utils_memory.memtotal() if memory_size_nodeinfo != memory_size_os: raise error.TestFail("Virsh nodeinfo output didn't match " "Memory size")
def output_check(nodeinfo_output): # Check CPU model cpu_model_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU model", 3) cpu_model_os = utils.get_current_kernel_arch() if not re.match(cpu_model_nodeinfo, cpu_model_os): raise error.TestFail( "Virsh nodeinfo output didn't match CPU model") # Check number of CPUs cpus_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU(s)", 2) cpus_os = utils.count_cpus() if int(cpus_nodeinfo) != cpus_os: raise error.TestFail("Virsh nodeinfo output didn't match number of " "CPU(s)") # Check CPU frequency cpu_frequency_nodeinfo = _check_nodeinfo( nodeinfo_output, 'CPU frequency', 3) cmd = ("cat /proc/cpuinfo | grep 'cpu MHz' | head -n1 | " "awk '{print $4}' | awk -F. '{print $1}'") cmd_result = utils.run(cmd, ignore_status=True) cpu_frequency_os = cmd_result.stdout.strip() print cpu_frequency_os if not re.match(cpu_frequency_nodeinfo, cpu_frequency_os): raise error.TestFail("Virsh nodeinfo output didn't match CPU " "frequency") # Check CPU socket(s) cpu_sockets_nodeinfo = int( _check_nodeinfo(nodeinfo_output, 'CPU socket(s)', 3)) cmd = "grep 'physical id' /proc/cpuinfo | uniq | sort | uniq |wc -l" cmd_result = utils.run(cmd, ignore_status=True) cpu_NUMA_nodeinfo = _check_nodeinfo(nodeinfo_output, 'NUMA cell(s)', 3) cpu_sockets_os = int( cmd_result.stdout.strip()) / int(cpu_NUMA_nodeinfo) if cpu_sockets_os != cpu_sockets_nodeinfo: raise error.TestFail("Virsh nodeinfo output didn't match CPU " "socket(s)") # Check Core(s) per socket cores_per_socket_nodeinfo = _check_nodeinfo( nodeinfo_output, 'Core(s) per socket', 4) cmd = "grep 'cpu cores' /proc/cpuinfo | head -n1 | awk '{print $4}'" cmd_result = utils.run(cmd, ignore_status=True) cores_per_socket_os = cmd_result.stdout.strip() if not re.match(cores_per_socket_nodeinfo, cores_per_socket_os): raise error.TestFail("Virsh nodeinfo output didn't match Core(s) " "per socket") # Check Memory size memory_size_nodeinfo = int( _check_nodeinfo(nodeinfo_output, 'Memory size', 3)) memory_size_os = utils_memory.memtotal() if memory_size_nodeinfo != memory_size_os: raise error.TestFail("Virsh nodeinfo output didn't match " "Memory size")
def output_check(nodeinfo_output): # Check CPU model cpu_model_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU model", 3) cpu_model_os = utils.get_current_kernel_arch() if not re.match(cpu_model_nodeinfo, cpu_model_os): raise error.TestFail("Virsh nodeinfo output didn't match CPU model") # Check number of CPUs cpus_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU(s)", 2) cpus_os = utils.count_cpus() if int(cpus_nodeinfo) != cpus_os: raise error.TestFail("Virsh nodeinfo output didn't match number of " "CPU(s)") # Check CPU frequency cpu_frequency_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU frequency", 3) cmd = "cat /proc/cpuinfo | grep 'cpu MHz' | head -n1 | " "awk '{print $4}' | awk -F. '{print $1}'" cmd_result = utils.run(cmd, ignore_status=True) cpu_frequency_os = cmd_result.stdout.strip() logging.debug("cpu_frequency_nodeinfo=%s cpu_frequency_os=%s", cpu_frequency_nodeinfo, cpu_frequency_os) # # Matching CPU Frequency is not an exact science in todays modern # processors and OS's. CPU's can have their execution speed varied # based on current workload in order to save energy and keep cool. # Thus since we're getting the values at disparate points in time, # we cannot necessarily do a pure comparison. # So, let's get the absolute value of the difference and ensure # that it's within 20 percent of each value to give us enough of # a "fudge" factor to declare "close enough". Don't return a failure # just print a debug message and move on. diffval = abs(int(cpu_frequency_nodeinfo) - int(cpu_frequency_os)) if float(diffval) / float(cpu_frequency_nodeinfo) > 0.20 or float(diffval) / float(cpu_frequency_os) > 0.20: logging.debug("Virsh nodeinfo output didn't match CPU " "frequency within 20 percent") # Check CPU socket(s) cpu_sockets_nodeinfo = int(_check_nodeinfo(nodeinfo_output, "CPU socket(s)", 3)) cmd = "grep 'physical id' /proc/cpuinfo | uniq | sort | uniq |wc -l" cmd_result = utils.run(cmd, ignore_status=True) cpu_NUMA_nodeinfo = _check_nodeinfo(nodeinfo_output, "NUMA cell(s)", 3) cpu_sockets_os = int(cmd_result.stdout.strip()) / int(cpu_NUMA_nodeinfo) if cpu_sockets_os != cpu_sockets_nodeinfo: raise error.TestFail("Virsh nodeinfo output didn't match CPU " "socket(s)") # Check Core(s) per socket cores_per_socket_nodeinfo = _check_nodeinfo(nodeinfo_output, "Core(s) per socket", 4) cmd = "grep 'cpu cores' /proc/cpuinfo | head -n1 | awk '{print $4}'" cmd_result = utils.run(cmd, ignore_status=True) cores_per_socket_os = cmd_result.stdout.strip() if not re.match(cores_per_socket_nodeinfo, cores_per_socket_os): raise error.TestFail("Virsh nodeinfo output didn't match Core(s) " "per socket") # Check Memory size memory_size_nodeinfo = int(_check_nodeinfo(nodeinfo_output, "Memory size", 3)) memory_size_os = utils_memory.memtotal() if memory_size_nodeinfo != memory_size_os: raise error.TestFail("Virsh nodeinfo output didn't match " "Memory size")
def run(test, params, env): """ Test migration under stress. """ vm_names = params.get("vms").split() if len(vm_names) < 2: test.cancel("Provide enough vms for migration") src_uri = "qemu:///system" dest_uri = libvirt_vm.complete_uri(params.get("migrate_dest_host", "EXAMPLE")) if dest_uri.count('///') or dest_uri.count('EXAMPLE'): test.cancel("The dest_uri '%s' is invalid" % dest_uri) # Migrated vms' instance vms = env.get_all_vms() params["load_vms"] = list(vms) cpu = int(params.get("smp", 1)) memory = int(params.get("mem")) * 1024 stress_tool = params.get("stress_tool", "") remote_stress = params.get("migration_stress_remote", "no") == "yes" host_stress = params.get("migration_stress_host", "no") == "yes" vms_stress = params.get("migration_stress_vms", "no") == "yes" vm_bytes = params.get("stress_vm_bytes", "128M") stress_args = params.get("%s_args" % stress_tool) migration_type = params.get("migration_type") start_migration_vms = params.get("start_migration_vms", "yes") == "yes" thread_timeout = int(params.get("thread_timeout", 120)) ubuntu_dep = ['build-essential', 'git'] hstress = rstress = None vstress = {} # Set vm_bytes for start_cmd mem_total = utils_memory.memtotal() vm_reserved = len(vms) * memory if vm_bytes == "half": vm_bytes = (mem_total - vm_reserved) / 2 elif vm_bytes == "shortage": vm_bytes = mem_total - vm_reserved + 524288 if "vm-bytes" in stress_args: params["%s_args" % stress_tool] = stress_args % vm_bytes # Ensure stress tool is available in host if host_stress: # remove package manager installed tool to avoid conflict if not utils_package.package_remove(stress_tool): logging.error("Existing %s is not removed") if "stress-ng" in stress_tool and 'Ubuntu' in utils_misc.get_distro(): params['stress-ng_dependency_packages_list'] = ubuntu_dep try: hstress = utils_test.HostStress(stress_tool, params) hstress.load_stress_tool() except utils_test.StressError, info: test.error(info)
def run(test, params, env): """ Test the command virsh nodememstats (1) Call the virsh nodememstats command (2) Get the output (3) Check the against /proc/meminfo output (4) Call the virsh nodememstats command with an unexpected option (5) Call the virsh nodememstats command with libvirtd service stop """ # Initialize the variables expected = {} actual = {} deltas = [] name_stats = ['total', 'free', 'buffers', 'cached'] itr = int(params.get("itr")) def virsh_check_nodememtats(actual_stats, expected_stats, delta): """ Check the nodememstats output value with /proc/meminfo value """ delta_stats = {} for name in name_stats: delta_stats[name] = abs(actual_stats[name] - expected_stats[name]) if 'total' in name: if not delta_stats[name] == 0: test.fail("Command 'virsh nodememstats' not" " succeeded as the value for %s is " "deviated by %d\nThe total memory " "value is deviating-check" % (name, delta_stats[name])) else: if delta_stats[name] > delta: test.fail("Command 'virsh nodememstats' not " "succeeded as the value for %s" " is deviated by %d" % (name, delta_stats[name])) return delta_stats # Prepare libvirtd service check_libvirtd = "libvirtd" in params if check_libvirtd: libvirtd = params.get("libvirtd") if libvirtd == "off": utils_libvirtd.libvirtd_stop() # Get the option for the test case option = params.get("virsh_nodememstats_options") if option == "max": cell_dict = utils_test.libvirt.get_all_cells() option = len(list(cell_dict.keys())) # Run test case for 10 iterations # (default can be changed in subtests.cfg file) # and print the final statistics for i in range(itr): output = virsh.nodememstats(option) # Get the status of the virsh command executed status = output.exit_status # Get status_error option for the test case status_error = params.get("status_error") if status_error == "yes": if status == 0: if libvirtd == "off": utils_libvirtd.libvirtd_start() test.fail("Command 'virsh nodememstats' " "succeeded with libvirtd service" " stopped, incorrect") else: test.fail("Command 'virsh nodememstats %s' " "succeeded (incorrect command)" % option) elif status_error == "no": if status == 0: if option: return # From the beginning of a line, group 1 is one or # more word-characters, followed by zero or more # whitespace characters and a ':', then one or # more whitespace characters, followed by group 2, # which is one or more digit characters, # then one or more whitespace characters followed by # a literal 'kB' or 'KiB' sequence, e.g as below # total : 3809340 kB # total : 3809340 KiB # Normalise the value to MBs regex_obj = re.compile(r"^(\w+)\s*:\s+(\d+)\s\w+") expected = {} for line in output.stdout.split('\n'): match_obj = regex_obj.search(line) # Due to the extra space in the list if match_obj is not None: name = match_obj.group(1) value = match_obj.group(2) expected[name] = int(value) // 1024 # Get the actual value from /proc/meminfo and normalise to MBs actual['total'] = int(utils_memory.memtotal()) // 1024 actual['free'] = int(utils_memory.freememtotal()) // 1024 actual['buffers'] = int( utils_memory.read_from_meminfo('Buffers')) // 1024 actual['cached'] = int( utils_memory.read_from_meminfo('Cached')) // 1024 # Currently the delta value is kept at 200 MB this can be # tuned based on the accuracy # Check subtests.cfg for more details delta = int(params.get("delta")) output = virsh_check_nodememtats(actual, expected, delta) deltas.append(output) else: test.fail("Command virsh nodememstats %s not " "succeeded:\n%s" % (option, status)) # Recover libvirtd service start if libvirtd == "off": utils_libvirtd.libvirtd_start() # Print the deviated values for all iterations if status_error == "no": logging.debug("The following is the deviations from " "the actual(/proc/meminfo) and expected" " value(output of virsh nodememstats)") for i in range(itr): logging.debug("iteration %d:", i) for index, name in enumerate(name_stats): logging.debug("%19s : %d", name, deltas[i][name])
def output_check(nodeinfo_output): # Check CPU model cpu_model_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU model", 3) cpu_arch = platform.machine() if not re.match(cpu_model_nodeinfo, cpu_arch): test.fail("Virsh nodeinfo output didn't match CPU model") # Check number of CPUs, nodeinfo CPUs represent online threads in the # system, check all online cpus in sysfs cpus_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU(s)", 2) cmd = "cat /sys/devices/system/cpu/cpu*/online | grep 1 | wc -l" cpus_online = process.run(cmd, ignore_status=True, shell=True).stdout.strip() cmd = "cat /sys/devices/system/cpu/cpu*/online | wc -l" cpus_total = process.run(cmd, ignore_status=True, shell=True).stdout.strip() if not os.path.exists('/sys/devices/system/cpu/cpu0/online'): cpus_online = str(int(cpus_online) + 1) cpus_total = str(int(cpus_total) + 1) logging.debug("host online cpus are %s", cpus_online) logging.debug("host total cpus are %s", cpus_total) if cpus_nodeinfo != cpus_online: if 'ppc' in cpu_arch: if cpus_nodeinfo != cpus_total: test.fail("Virsh nodeinfo output of CPU(s) on" " ppc did not match all threads in " "the system") else: test.fail("Virsh nodeinfo output didn't match " "number of CPU(s)") # Check CPU frequency, frequency is under clock for ppc cpu_frequency_nodeinfo = _check_nodeinfo(nodeinfo_output, 'CPU frequency', 3) cmd = ("cat /proc/cpuinfo | grep -E 'cpu MHz|clock|BogoMIPS' | " "head -n1 | awk -F: '{print $2}' | awk -F. '{print $1}'") cmd_result = process.run(cmd, ignore_status=True, shell=True) cpu_frequency_os = cmd_result.stdout_text.strip() logging.debug("cpu_frequency_nodeinfo=%s cpu_frequency_os=%s", cpu_frequency_nodeinfo, cpu_frequency_os) # # Matching CPU Frequency is not an exact science in todays modern # processors and OS's. CPU's can have their execution speed varied # based on current workload in order to save energy and keep cool. # Thus since we're getting the values at disparate points in time, # we cannot necessarily do a pure comparison. # So, let's get the absolute value of the difference and ensure # that it's within 20 percent of each value to give us enough of # a "fudge" factor to declare "close enough". Don't return a failure # just print a debug message and move on. diffval = abs(int(cpu_frequency_nodeinfo) - int(cpu_frequency_os)) if (float(diffval) / float(cpu_frequency_nodeinfo) > 0.20 or float(diffval) / float(cpu_frequency_os) > 0.20): logging.debug("Virsh nodeinfo output didn't match CPU " "frequency within 20 percent") # Get CPU topology from virsh capabilities xml cpu_topology = capability_xml.CapabilityXML()['cpu_topology'] logging.debug("Cpu topology in virsh capabilities output: %s", cpu_topology) # Check CPU socket(s) cpu_sockets_nodeinfo = int( _check_nodeinfo(nodeinfo_output, 'CPU socket(s)', 3)) # CPU socket(s) in virsh nodeinfo is Total sockets in each node, not # total sockets in the system, so get total sockets in one node and # check with it node_info = utils_misc.NumaInfo() node_online_list = node_info.get_online_nodes() cmd = "cat /sys/devices/system/node/node%s" % node_online_list[0] cmd += "/cpu*/topology/physical_package_id | uniq |wc -l" cmd_result = process.run(cmd, ignore_status=True, shell=True) total_sockets_in_node = int(cmd_result.stdout_text.strip()) if total_sockets_in_node != cpu_sockets_nodeinfo: test.fail("Virsh nodeinfo output didn't match CPU " "socket(s) of host OS") if cpu_sockets_nodeinfo != int(cpu_topology['sockets']): test.fail("Virsh nodeinfo output didn't match CPU " "socket(s) of virsh capabilities output") # Check Core(s) per socket cores_per_socket_nodeinfo = _check_nodeinfo(nodeinfo_output, 'Core(s) per socket', 4) cmd = "lscpu | grep 'Core(s) per socket' | head -n1 | awk '{print $4}'" cmd_result = process.run(cmd, ignore_status=True, shell=True) cores_per_socket_os = cmd_result.stdout_text.strip() spec_numa = False if not re.match(cores_per_socket_nodeinfo, cores_per_socket_os): # for spec NUMA arch, the output of nodeinfo is in a spec format cpus_os = utils_misc.get_cpu_info().get("CPU(s)") numa_cells_nodeinfo = _check_nodeinfo(nodeinfo_output, 'NUMA cell(s)', 3) if (re.match(cores_per_socket_nodeinfo, cpus_os) and re.match(numa_cells_nodeinfo, "1")): spec_numa = True else: test.fail("Virsh nodeinfo output didn't match " "CPU(s) or Core(s) per socket of host OS") if cores_per_socket_nodeinfo != cpu_topology['cores']: test.fail("Virsh nodeinfo output didn't match Core(s) " "per socket of virsh capabilities output") # Check Thread(s) per core threads_per_core_nodeinfo = _check_nodeinfo(nodeinfo_output, 'Thread(s) per core', 4) if not spec_numa: if threads_per_core_nodeinfo != cpu_topology['threads']: test.fail("Virsh nodeinfo output didn't match" "Thread(s) per core of virsh" "capabilities output") else: if threads_per_core_nodeinfo != "1": test.fail("Virsh nodeinfo output didn't match" "Thread(s) per core of virsh" "capabilities output") # Check Memory size memory_size_nodeinfo = int( _check_nodeinfo(nodeinfo_output, 'Memory size', 3)) memory_size_os = 0 if libvirt_version.version_compare(2, 0, 0): for i in node_online_list: node_memory = node_info.read_from_node_meminfo(i, 'MemTotal') memory_size_os += int(node_memory) else: memory_size_os = utils_memory.memtotal() logging.debug('The host total memory from nodes is %s', memory_size_os) if memory_size_nodeinfo != memory_size_os: test.fail("Virsh nodeinfo output didn't match " "Memory size")
def run_stress_kernel_compile(tests, params, env): """ Boot VMs and run kernel compile inside VM parallel. 1) Boot up VMs: Every VM has 4G vmem, the total vmem of VMs' are $overcommit times as host's mem. 2) Launch kernel compile inside every guest. :param test: QEMU test object. :param params: Dictionary with the test parameters. :param env: Dictionary with test environment. """ def kernelcompile(session, vm_name): vm = env.get_vm(vm_name) ip = vm.get_address() path = params.get("download_url") logging.info("kernel path = %s" % path) get_kernel_cmd = "wget %s" % path try: status, output = session.cmd_status_output(get_kernel_cmd, timeout=240) if status != 0: logging.error(output) raise error.TestFail("Fail to download the kernel" " in %s" % vm_name) else: logging.info("Completed download the kernel src" " in %s" % vm_name) test_cmd = params.get("test_cmd") status, output = session.cmd_status_output(test_cmd, timeout=1200) if status != 0: logging.error(output) finally: status, _ = utils_test.ping(ip, count=10, timeout=30) if status != 0: raise error.TestFail("vm no response, pls check serial log") over_c = float(params.get("overcommit", 1.5)) guest_number = int(params.get("guest_number", "1")) if guest_number < 1: logging.warn("At least boot up one guest for this test," " set up guest number to 1") guest_number = 1 for tag in range(1, guest_number): params["vms"] += " stress_guest_%s" % tag mem_host = utils_memory.memtotal() / 1024 vmem = int(mem_host * over_c / guest_number) if vmem < 256: raise error.TestNAError("The memory size set for guest is too small." " Please try less than %s guests" " in this host." % guest_number) params["mem"] = vmem params["start_vm"] = "yes" login_timeout = int(params.get("login_timeout", 360)) env_process.preprocess(tests, params, env) sessions_info = [] for vm_name in params["vms"].split(): vm = env.get_vm(vm_name) vm.verify_alive() session = vm.wait_for_login(timeout=login_timeout) if not session: raise error.TestFail("Could not log into guest %s" % vm_name) sessions_info.append([session, vm_name]) # run kernel compile in vms try: logging.info("run kernel compile in vms") bg_threads = [] for session_info in sessions_info: session = session_info[0] vm_name = session_info[1] bg_thread = utils_test.BackgroundTest(kernelcompile, (session, vm_name)) bg_thread.start() bg_threads.append(bg_thread) completed = False while not completed: completed = True for bg_thread in bg_threads: if bg_thread.is_alive(): completed = False finally: try: for bg_thread in bg_threads: if bg_thread: bg_thread.join() finally: for session_info in sessions_info: session_info[0].close()
def run(test, params, env): """ Check KSM can be started automaticly when ksmtuned threshold is reached 1. Get the memory of your host and the KSM_THRES_COEF 2. Boot a guest with memory less than KSM_THRES_COEF threshold 3. Get the memory used in host of process qemu-kvm 4. Get the free memory in host 5. If both the free memory size is not smaller than the threshold and guest used memory + threshold is not bigger than total memory in host. Check the ksm status in host. Ksm should not start in the host 6. Repeat step 2~5 under it broke the rule in step 5 :param test: kvm test object. :param params: Dictionary with test parameters. :param env: Dictionary with the test environment. """ def check_ksm(mem, threshold_reached=False): """ :param mem: Boot guest with given memory, in KB :ksmtuned_enabled: ksmtuned threshold is reached or not """ def heavyload_install(): if session.cmd_status(test_install_cmd) != 0: logging.warning("Could not find installed heavyload in guest, " "will install it via winutils.iso ") winutil_drive = utils_misc.get_winutils_vol(session) if not winutil_drive: test.cancel("WIN_UTILS CDROM not found.") install_cmd = params["install_cmd"] % winutil_drive session.cmd(install_cmd) def check_qemu_used_mem(qemu_pid, mem): qemu_used_page = process.getoutput(get_qemu_used_mem % qemu_pid, shell=True) qemu_used_mem = float(qemu_used_page) * pagesize if qemu_used_mem < mem * mem_thres: return False return True params['mem'] = mem // 1024 params['start_vm'] = 'yes' vm_name = params['main_vm'] env_process.preprocess_vm(test, params, env, vm_name) vm = env.get_vm(vm_name) session = vm.wait_for_login() qemu_pid = vm.get_pid() if params["os_type"] == "linux": params['stress_args'] = ('--cpu 4 --io 4 --vm 2 --vm-bytes %sM' % (int(params['mem']) // 2)) stress_test = VMStress(vm, "stress", params) stress_test.load_stress_tool() else: install_path = params["install_path"] test_install_cmd = 'dir "%s" | findstr /I heavyload' % install_path heavyload_install() heavyload_bin = r'"%s\heavyload.exe" ' % install_path heavyload_options = ["/MEMORY 100", "/START"] start_cmd = heavyload_bin + " ".join(heavyload_options) stress_tool = BackgroundTest( session.cmd, (start_cmd, stress_timeout, stress_timeout)) stress_tool.start() if not utils_misc.wait_for(stress_tool.is_alive, stress_timeout): test.error("Failed to start heavyload process") if not utils_misc.wait_for(lambda: check_qemu_used_mem(qemu_pid, mem), stress_timeout, 10, 10): test.error("QEMU used memory doesn't reach %s of guest mem %sM in " "%ss" % (mem_thres, mem // 1024, stress_timeout)) time.sleep(30) free_mem_host = utils_memory.freememtotal() ksm_status = process.getoutput(params['cmd_check_ksm_status']) vm.destroy() logging.info( "The ksm threshold is %sM, QEMU used memory is %sM, " "and the total free memory on host is %sM", ksm_thres // 1024, mem // 1024, free_mem_host // 1024) if threshold_reached: if free_mem_host > ksm_thres: test.error("Host memory is not consumed as much as expected") if ksm_status == '0': test.fail("KSM should be running") else: if free_mem_host < ksm_thres: test.error("Host memory is consumed too much more than " "expected") if ksm_status != '0': test.fail("KSM should not be running") total_mem_host = utils_memory.memtotal() utils_memory.drop_caches() free_mem_host = utils_memory.freememtotal() ksm_thres = process.getoutput(params['cmd_get_thres'], shell=True) ksm_thres = int(total_mem_host * (int(re.findall('\\d+', ksm_thres)[0]) / 100)) guest_mem = (free_mem_host - ksm_thres) // 2 if arch.ARCH in ('ppc64', 'ppc64le'): guest_mem = guest_mem - guest_mem % (256 * 1024) status_ksm_service = process.system(params['cmd_status_ksmtuned'], ignore_status=True) if status_ksm_service != 0: process.run(params['cmd_start_ksmtuned']) stress_timeout = params.get("stress_timeout", 1800) mem_thres = float(params.get("mem_thres", 0.95)) get_qemu_used_mem = params['cmd_get_qemu_used_mem'] pagesize = utils_memory.getpagesize() check_ksm(guest_mem) ksm_config_file = params['ksm_config_file'] backup_file = ksm_config_file + '.backup' copyfile(ksm_config_file, backup_file) threshold = params.get_numeric('ksm_threshold') with open(ksm_config_file, "a+") as f: f.write('%s=%s' % (params['ksm_thres_conf'], threshold)) process.run(params['cmd_restart_ksmtuned']) ksm_thres = total_mem_host * (threshold / 100) guest_mem = total_mem_host - ksm_thres // 2 if arch.ARCH in ('ppc64', 'ppc64le'): guest_mem = guest_mem - guest_mem % (256 * 1024) try: check_ksm(guest_mem, threshold_reached=True) finally: copyfile(backup_file, ksm_config_file) os.remove(backup_file) if status_ksm_service != 0: process.run(params['cmd_stop_ksmtuned']) else: process.run(params['cmd_restart_ksmtuned'])
if os.path.exists(e_rh): utils.run("echo 'never' > %s" % e_rh) new_ksm = True else: try: utils.run("modprobe ksm") utils.run("ksmctl start 5000 100") except error.CmdError, details: raise error.TestFail("Failed to load KSM: %s" % details) # host_reserve: mem reserve kept for the host system to run host_reserve = int(params.get("ksm_host_reserve", -1)) if (host_reserve == -1): # default host_reserve = MemAvailable + one_minimal_guest(128MB) # later we add 64MB per additional guest host_reserve = ((utils_memory.memtotal() - utils_memory.read_from_meminfo("MemFree")) / 1024 + 128) # using default reserve _host_reserve = True else: _host_reserve = False # guest_reserve: mem reserve kept to avoid guest OS to kill processes guest_reserve = int(params.get("ksm_guest_reserve", -1)) if (guest_reserve == -1): # default guest_reserve = minimal_system_mem(256MB) # later we add tmpfs overhead guest_reserve = 256 # using default reserve _guest_reserve = True
def run(test, params, env): """ KVM multi test: 1) Log into guests 2) Check all the nics available or not 3) Ping among guest nic and host 3.1) Ping with different packet size 3.2) Flood ping test 3.3) Final ping test 4) Transfer files among guest nics and host 4.1) Create file by dd command in guest 4.2) Transfer file between nics 4.3) Compare original file and transferred file 5) ping among different nics 5.1) Ping with different packet size 5.2) Flood ping test 5.3) Final ping test 6) Transfer files among different nics 6.1) Create file by dd command in guest 6.2) Transfer file between nics 6.3) Compare original file and transferred file 7) Repeat step 3 - 6 on every nic. :param test: QEMU test object :param params: Dictionary with the test parameters :param env: Dictionary with test environment. """ def ping(session, nic, dst_ip, strick_check, flood_minutes): d_packet_size = [1, 4, 48, 512, 1440, 1500, 1505, 4054, 4055, 4096, 4192, 8878, 9000, 32767, 65507] packet_size = params.get("packet_size", "").split() or d_packet_size for size in packet_size: error.context("Ping with packet size %s" % size, logging.info) status, output = utils_test.ping(dst_ip, 10, interface=nic, packetsize=size, timeout=30, session=session) if strict_check: ratio = utils_test.get_loss_ratio(output) if ratio != 0: raise error.TestFail("Loss ratio is %s for packet size" " %s" % (ratio, size)) else: if status != 0: raise error.TestFail("Ping returns non-zero value %s" % output) error.context("Flood ping test", logging.info) utils_test.ping(dst_ip, None, interface=nic, flood=True, output_func=None, timeout=flood_minutes * 60, session=session) error.context("Final ping test", logging.info) counts = params.get("ping_counts", 100) status, output = utils_test.ping(dst_ip, counts, interface=nic, timeout=float(counts) * 1.5, session=session) if strick_check == "yes": ratio = utils_test.get_loss_ratio(output) if ratio != 0: raise error.TestFail("Packet loss ratio is %s after flood" % ratio) else: if status != 0: raise error.TestFail("Ping returns non-zero value %s" % output) def file_transfer(session, src, dst): username = params.get("username", "") password = params.get("password", "") src_path = "/tmp/1" dst_path = "/tmp/2" port = int(params["file_transfer_port"]) cmd = "dd if=/dev/urandom of=%s bs=100M count=1" % src_path cmd = params.get("file_create_cmd", cmd) error.context("Create file by dd command, cmd: %s" % cmd, logging.info) session.cmd(cmd) transfer_timeout = int(params.get("transfer_timeout")) log_filename = "scp-from-%s-to-%s.log" % (src, dst) error.context("Transfer file from %s to %s" % (src, dst), logging.info) remote.scp_between_remotes(src, dst, port, password, password, username, username, src_path, dst_path, log_filename=log_filename, timeout=transfer_timeout) src_path = dst_path dst_path = "/tmp/3" log_filename = "scp-from-%s-to-%s.log" % (dst, src) error.context("Transfer file from %s to %s" % (dst, src), logging.info) remote.scp_between_remotes(dst, src, port, password, password, username, username, src_path, dst_path, log_filename=log_filename, timeout=transfer_timeout) error.context("Compare original file and transferred file", logging.info) cmd1 = "md5sum /tmp/1" cmd2 = "md5sum /tmp/3" md5sum1 = session.cmd(cmd1).split()[0] md5sum2 = session.cmd(cmd2).split()[0] if md5sum1 != md5sum2: raise error.TestError("File changed after transfer") nic_interface_list = [] check_irqbalance_cmd = params.get("check_irqbalance_cmd") stop_irqbalance_cmd = params.get("stop_irqbalance_cmd") start_irqbalance_cmd = params.get("start_irqbalance_cmd") status_irqbalance = params.get("status_irqbalance") vms = params["vms"].split() host_mem = utils_memory.memtotal() / (1024 * 1024) host_cpu_count = len(utils_misc.get_cpu_processors()) vhost_count = 0 if params.get("vhost"): vhost_count = 1 if host_cpu_count < (1 + vhost_count) * len(vms): raise error.TestError("The host don't have enough cpus to start guest" "pcus: %d, minimum of vcpus and vhost: %d" % (host_cpu_count, (1 + vhost_count) * len(vms))) params['mem'] = host_mem / len(vms) * 1024 params['smp'] = host_cpu_count / len(vms) - vhost_count if params['smp'] % 2 != 0: params['vcpu_sockets'] = 1 params["start_vm"] = "yes" for vm_name in vms: env_process.preprocess_vm(test, params, env, vm_name) timeout = float(params.get("login_timeout", 360)) strict_check = params.get("strick_check", "no") host_ip = utils_net.get_ip_address_by_interface(params.get("netdst")) host_ip = params.get("srchost", host_ip) flood_minutes = float(params["flood_minutes"]) error.context("Check irqbalance service status", logging.info) o = process.system_output(check_irqbalance_cmd, ignore_status=True) check_stop_irqbalance = False if re.findall(status_irqbalance, o): logging.debug("stop irqbalance") process.run(stop_irqbalance_cmd) check_stop_irqbalance = True o = process.system_output(check_irqbalance_cmd, ignore_status=True) if re.findall(status_irqbalance, o): raise error.TestError("Can not stop irqbalance") thread_list = [] nic_interface = [] for vm_name in vms: guest_ifname = "" guest_ip = "" vm = env.get_vm(vm_name) session = vm.wait_for_login(timeout=timeout) thread_list.extend(vm.vcpu_threads) thread_list.extend(vm.vhost_threads) error.context("Check all the nics available or not", logging.info) for index, nic in enumerate(vm.virtnet): guest_ifname = utils_net.get_linux_ifname(session, nic.mac) guest_ip = vm.get_address(index) if not (guest_ifname and guest_ip): err_log = "vms %s get ip or ifname failed." % vm_name err_log = "ifname: %s, ip: %s." % (guest_ifname, guest_ip) raise error.TestFail(err_log) nic_interface = [guest_ifname, guest_ip, session] nic_interface_list.append(nic_interface) error.context("Pin vcpus and vhosts to host cpus", logging.info) host_numa_nodes = utils_misc.NumaInfo() vthread_num = 0 for numa_node_id in host_numa_nodes.nodes: numa_node = host_numa_nodes.nodes[numa_node_id] for _ in range(len(numa_node.cpus)): if vthread_num >= len(thread_list): break vcpu_tid = thread_list[vthread_num] logging.debug("pin vcpu/vhost thread(%s) to cpu(%s)" % (vcpu_tid, numa_node.pin_cpu(vcpu_tid))) vthread_num += 1 nic_interface_list_len = len(nic_interface_list) # ping and file transfer test for src_ip_index in range(nic_interface_list_len): error.context("Ping test from guest to host", logging.info) src_ip_info = nic_interface_list[src_ip_index] ping(src_ip_info[2], src_ip_info[0], host_ip, strict_check, flood_minutes) error.context("File transfer test between guest and host", logging.info) file_transfer(src_ip_info[2], src_ip_info[1], host_ip) for dst_ip in nic_interface_list[src_ip_index:]: if src_ip_info[1] == dst_ip[1]: continue txt = "Ping test between %s and %s" % (src_ip_info[1], dst_ip[1]) error.context(txt, logging.info) ping(src_ip_info[2], src_ip_info[0], dst_ip[1], strict_check, flood_minutes) txt = "File transfer test between %s " % src_ip_info[1] txt += "and %s" % dst_ip[1] error.context(txt, logging.info) file_transfer(src_ip_info[2], src_ip_info[1], dst_ip[1]) if check_stop_irqbalance: process.run(start_irqbalance_cmd)
def output_check(nodeinfo_output): # Check CPU model cpu_model_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU model", 3) cpu_model_os = utils.get_current_kernel_arch() if not re.match(cpu_model_nodeinfo, cpu_model_os): raise error.TestFail( "Virsh nodeinfo output didn't match CPU model") # Check number of CPUs cpus_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU(s)", 2) cpus_os = utils.count_cpus() if int(cpus_nodeinfo) != cpus_os: raise error.TestFail( "Virsh nodeinfo output didn't match number of " "CPU(s)") # Check CPU frequency cpu_frequency_nodeinfo = _check_nodeinfo(nodeinfo_output, 'CPU frequency', 3) cmd = ("cat /proc/cpuinfo | grep 'cpu MHz' | head -n1 | " "awk '{print $4}' | awk -F. '{print $1}'") cmd_result = utils.run(cmd, ignore_status=True) cpu_frequency_os = cmd_result.stdout.strip() logging.debug("cpu_frequency_nodeinfo=%s cpu_frequency_os=%s", cpu_frequency_nodeinfo, cpu_frequency_os) # # Matching CPU Frequency is not an exact science in todays modern # processors and OS's. CPU's can have their execution speed varied # based on current workload in order to save energy and keep cool. # Thus since we're getting the values at disparate points in time, # we cannot necessarily do a pure comparison. # So, let's get the absolute value of the difference and ensure # that it's within 20 percent of each value to give us enough of # a "fudge" factor to declare "close enough". Don't return a failure # just print a debug message and move on. diffval = abs(int(cpu_frequency_nodeinfo) - int(cpu_frequency_os)) if float(diffval) / float(cpu_frequency_nodeinfo) > 0.20 or \ float(diffval) / float(cpu_frequency_os) > 0.20: logging.debug("Virsh nodeinfo output didn't match CPU " "frequency within 20 percent") # Get CPU topolopy from virsh capabilities xml cpu_topolopy = capability_xml.CapabilityXML()['cpu_topolopy'] logging.debug("Cpu topolopy in virsh capabilities output: %s", cpu_topolopy) # Check CPU socket(s) cpu_sockets_nodeinfo = int( _check_nodeinfo(nodeinfo_output, 'CPU socket(s)', 3)) cmd = "grep 'physical id' /proc/cpuinfo | uniq | sort | uniq |wc -l" cmd_result = utils.run(cmd, ignore_status=True) cpu_NUMA_nodeinfo = _check_nodeinfo(nodeinfo_output, 'NUMA cell(s)', 3) cpu_sockets_os = int( cmd_result.stdout.strip()) / int(cpu_NUMA_nodeinfo) if cpu_sockets_os != cpu_sockets_nodeinfo: raise error.TestFail("Virsh nodeinfo output didn't match CPU " "socket(s) of host OS") if cpu_sockets_nodeinfo != int(cpu_topolopy['sockets']): raise error.TestFail("Virsh nodeinfo output didn't match CPU " "socket(s) of virsh capabilities output") # Check Core(s) per socket cores_per_socket_nodeinfo = _check_nodeinfo(nodeinfo_output, 'Core(s) per socket', 4) cmd = "grep 'cpu cores' /proc/cpuinfo | head -n1 | awk '{print $4}'" cmd_result = utils.run(cmd, ignore_status=True) cores_per_socket_os = cmd_result.stdout.strip() if not re.match(cores_per_socket_nodeinfo, cores_per_socket_os): raise error.TestFail("Virsh nodeinfo output didn't match Core(s) " "per socket of host OS") if cores_per_socket_nodeinfo != cpu_topolopy['cores']: raise error.TestFail("Virsh nodeinfo output didn't match Core(s) " "per socket of virsh capabilities output") # Ckeck Thread(s) per core threads_per_core_nodeinfo = _check_nodeinfo(nodeinfo_output, 'Thread(s) per core', 4) if threads_per_core_nodeinfo != cpu_topolopy['threads']: raise error.TestFail( "Virsh nodeinfo output didn't match Thread(s) " "per core of virsh capabilities output") # Check Memory size memory_size_nodeinfo = int( _check_nodeinfo(nodeinfo_output, 'Memory size', 3)) memory_size_os = utils_memory.memtotal() if memory_size_nodeinfo != memory_size_os: raise error.TestFail("Virsh nodeinfo output didn't match " "Memory size")
def run(test, params, env): """ Check KSM can be started automaticly when ksmtuned threshold is reached 1. Get the memory of your host and the KSM_THRES_COEF 2. Boot a guest with memory less than KSM_THRES_COEF threshold 3. Get the memory used in host of process qemu-kvm 4. Get the free memory in host 5. If both the free memory size is not smaller than the threshold and guest used memory + threshold is not bigger than total memory in host. Check the ksm status in host. Ksm should not start in the host 6. Repeat step 2~5 under it broke the rule in step 5 :param test: kvm test object. :param params: Dictionary with test parameters. :param env: Dictionary with the test environment. """ def check_ksm(mem, stress=False): """ :param mem: Boot guest with given memory, in KB :param stress: Load stress or not """ params['mem'] = mem // 1024 params['start_vm'] = 'yes' vm_name = params['main_vm'] env_process.preprocess_vm(test, params, env, vm_name) vm = env.get_vm(vm_name) vm.wait_for_login() if stress: params['stress_args'] = ('--cpu 4 --io 4 --vm 2 --vm-bytes %sM' % (int(params['mem']) // 2)) stress_test = VMStress(vm, "stress", params) stress_test.load_stress_tool() time.sleep(30) qemu_pid = vm.get_pid() qemu_used_page = utils_misc.normalize_data_size(process.getoutput( params['cmd_get_qemu_used_mem'] % qemu_pid, shell=True) + 'K', 'B') pagesize = utils_memory.getpagesize() qemu_used_mem = int(float(qemu_used_page)) * pagesize free_mem_host = utils_memory.freememtotal() ksm_status = process.getoutput(params['cmd_check_ksm_status']) vm.destroy() logging.info('The ksm threshold is %s, the memory allocated by qemu is' ' %s, and the total free memory on host is %s.' % (ksm_thres, qemu_used_mem, free_mem_host)) if free_mem_host >= ksm_thres: if ksm_status != '0': test.fail('Ksm should not start.') if stress: test.error('The host resource is not consumed as expected.') elif ksm_status == '0': test.fail('Ksm should start but it does not.') total_mem_host = utils_memory.memtotal() utils_memory.drop_caches() free_mem_host = utils_memory.freememtotal() ksm_thres = process.getoutput(params['cmd_get_thres'], shell=True) ksm_thres = int(total_mem_host * (int(re.findall('\\d+', ksm_thres)[0]) / 100)) guest_mem = (free_mem_host - ksm_thres) // 2 if arch.ARCH in ('ppc64', 'ppc64le'): guest_mem = guest_mem - guest_mem % (256 * 1024) status_ksm_service = process.system( params['cmd_status_ksmtuned'], ignore_status=True) if status_ksm_service != 0: process.run(params['cmd_start_ksmtuned']) check_ksm(guest_mem) ksm_config_file = params['ksm_config_file'] backup_file = ksm_config_file + '.backup' copyfile(ksm_config_file, backup_file) threshold = params.get_numeric('ksm_threshold') with open(ksm_config_file, "a+") as f: f.write('%s=%s' % (params['ksm_thres_conf'], threshold)) process.run(params['cmd_restart_ksmtuned']) ksm_thres = total_mem_host * (threshold / 100) guest_mem = total_mem_host - ksm_thres // 2 if arch.ARCH in ('ppc64', 'ppc64le'): guest_mem = guest_mem - guest_mem % (256 * 1024) try: check_ksm(guest_mem, stress=True) finally: copyfile(backup_file, ksm_config_file) os.remove(backup_file) if status_ksm_service != 0: process.run(params['cmd_stop_ksmtuned']) else: process.run(params['cmd_restart_ksmtuned'])
def run(test, params, env): """ Test migration under stress. """ vm_names = params.get("vms").split() if len(vm_names) < 2: test.cancel("Provide enough vms for migration") src_uri = "qemu:///system" dest_uri = libvirt_vm.complete_uri( params.get("migrate_dest_host", "EXAMPLE")) if dest_uri.count('///') or dest_uri.count('EXAMPLE'): test.cancel("The dest_uri '%s' is invalid" % dest_uri) # Migrated vms' instance vms = env.get_all_vms() params["load_vms"] = list(vms) cpu = int(params.get("smp", 1)) memory = int(params.get("mem")) * 1024 stress_tool = params.get("stress_tool", "") stress_type = params.get("migration_stress_type") require_stress_tool = "stress" in stress_tool vm_bytes = params.get("stress_vm_bytes", "128M") stress_args = params.get("stress_args") migration_type = params.get("migration_type") start_migration_vms = params.get("start_migration_vms", "yes") == "yes" thread_timeout = int(params.get("thread_timeout", 120)) # Set vm_bytes for start_cmd mem_total = utils_memory.memtotal() vm_reserved = len(vms) * memory if vm_bytes == "half": vm_bytes = (mem_total - vm_reserved) / 2 elif vm_bytes == "shortage": vm_bytes = mem_total - vm_reserved + 524288 if "vm-bytes" in stress_args: params["stress_args"] = stress_args % vm_bytes # Ensure stress tool is available in host if require_stress_tool and stress_type == "stress_on_host": utils_test.load_stress("stress_on_host", params) for vm in vms: # Keep vm dead for edit if vm.is_alive(): vm.destroy() set_cpu_memory(vm.name, cpu, memory) try: if start_migration_vms: for vm in vms: vm.start() vm.wait_for_login() # configure stress in VM if require_stress_tool and stress_type == "stress_in_vms": utils_test.load_stress("stress_in_vms", params, vms) do_stress_migration(vms, src_uri, dest_uri, migration_type, test, params, thread_timeout) finally: logging.debug("Cleanup vms...") params["connect_uri"] = src_uri for vm in vms: utils_test.libvirt.MigrationTest().cleanup_dest_vm( vm, None, dest_uri) # Try to start vms in source once vms in destination are # cleaned up if not vm.is_alive(): vm.start() vm.wait_for_login() utils_test.unload_stress(stress_type, params, vms)
def output_check(nodeinfo_output): # Check CPU model cpu_model_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU model", 3) cpu_model_os = utils.get_current_kernel_arch() if not re.match(cpu_model_nodeinfo, cpu_model_os): raise error.TestFail( "Virsh nodeinfo output didn't match CPU model") # Check number of CPUs, nodeinfo CPUs represent online threads in the # system, check all online cpus in sysfs cpus_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU(s)", 2) cmd = "cat /sys/devices/system/cpu/cpu*/online | grep 1 | wc -l" cpus_online = utils.run(cmd, ignore_status=True) cmd = "cat /sys/devices/system/cpu/cpu*/online | wc -l" cpus_total = utils.run(cmd, ignore_status=True) if cpus_nodeinfo != cpus_online.stdout.strip(): if 'power' in cpu_util.get_cpu_arch(): if cpus_nodeinfo != cpus_total.stdout.strip(): raise error.TestFail("Virsh nodeinfo output of CPU(s) on" " ppc did not match all threads in " "the system") else: raise error.TestFail("Virsh nodeinfo output didn't match " "number of CPU(s)") # Check CPU frequency, frequency is under clock for ppc cpu_frequency_nodeinfo = _check_nodeinfo( nodeinfo_output, 'CPU frequency', 3) cmd = ("cat /proc/cpuinfo | grep -E 'cpu MHz|clock' | head -n1 | " "awk -F: '{print $2}' | awk -F. '{print $1}'") cmd_result = utils.run(cmd, ignore_status=True) cpu_frequency_os = cmd_result.stdout.strip() logging.debug("cpu_frequency_nodeinfo=%s cpu_frequency_os=%s", cpu_frequency_nodeinfo, cpu_frequency_os) # # Matching CPU Frequency is not an exact science in todays modern # processors and OS's. CPU's can have their execution speed varied # based on current workload in order to save energy and keep cool. # Thus since we're getting the values at disparate points in time, # we cannot necessarily do a pure comparison. # So, let's get the absolute value of the difference and ensure # that it's within 20 percent of each value to give us enough of # a "fudge" factor to declare "close enough". Don't return a failure # just print a debug message and move on. diffval = abs(int(cpu_frequency_nodeinfo) - int(cpu_frequency_os)) if float(diffval) / float(cpu_frequency_nodeinfo) > 0.20 or \ float(diffval) / float(cpu_frequency_os) > 0.20: logging.debug("Virsh nodeinfo output didn't match CPU " "frequency within 20 percent") # Get CPU topology from virsh capabilities xml cpu_topology = capability_xml.CapabilityXML()['cpu_topology'] logging.debug("Cpu topology in virsh capabilities output: %s", cpu_topology) # Check CPU socket(s) cpu_sockets_nodeinfo = int( _check_nodeinfo(nodeinfo_output, 'CPU socket(s)', 3)) # CPU socket(s) in virsh nodeinfo is Total sockets in each node, not # total sockets in the system, so get total sockets in one node and # check with it node_info = utils_misc.NumaInfo() node_online_list = node_info.get_online_nodes() cmd = "cat /sys/devices/system/node/node%s" % node_online_list[0] cmd += "/cpu*/topology/physical_package_id | uniq |wc -l" cmd_result = utils.run(cmd, ignore_status=True) total_sockets_in_node = int(cmd_result.stdout.strip()) if total_sockets_in_node != cpu_sockets_nodeinfo: raise error.TestFail("Virsh nodeinfo output didn't match CPU " "socket(s) of host OS") if cpu_sockets_nodeinfo != int(cpu_topology['sockets']): raise error.TestFail("Virsh nodeinfo output didn't match CPU " "socket(s) of virsh capabilities output") # Check Core(s) per socket cores_per_socket_nodeinfo = _check_nodeinfo( nodeinfo_output, 'Core(s) per socket', 4) cmd = "lscpu | grep 'Core(s) per socket' | head -n1 | awk '{print $4}'" cmd_result = utils.run(cmd, ignore_status=True) cores_per_socket_os = cmd_result.stdout.strip() if not re.match(cores_per_socket_nodeinfo, cores_per_socket_os): raise error.TestFail("Virsh nodeinfo output didn't match Core(s) " "per socket of host OS") if cores_per_socket_nodeinfo != cpu_topology['cores']: raise error.TestFail("Virsh nodeinfo output didn't match Core(s) " "per socket of virsh capabilities output") # Ckeck Thread(s) per core threads_per_core_nodeinfo = _check_nodeinfo(nodeinfo_output, 'Thread(s) per core', 4) if threads_per_core_nodeinfo != cpu_topology['threads']: raise error.TestFail("Virsh nodeinfo output didn't match Thread(s) " "per core of virsh capabilities output") # Check Memory size memory_size_nodeinfo = int( _check_nodeinfo(nodeinfo_output, 'Memory size', 3)) memory_size_os = utils_memory.memtotal() if memory_size_nodeinfo != memory_size_os: raise error.TestFail("Virsh nodeinfo output didn't match " "Memory size")
def run(test, params, env): """ Test migration under stress. """ vm_names = params.get("migration_vms").split() if len(vm_names) < 2: raise exceptions.TestSkipError("Provide enough vms for migration") src_uri = libvirt_vm.complete_uri( params.get("migrate_source_host", "EXAMPLE")) if src_uri.count('///') or src_uri.count('EXAMPLE'): raise exceptions.TestSkipError("The src_uri '%s' is invalid" % src_uri) dest_uri = libvirt_vm.complete_uri( params.get("migrate_dest_host", "EXAMPLE")) if dest_uri.count('///') or dest_uri.count('EXAMPLE'): raise exceptions.TestSkipError("The dest_uri '%s' is invalid" % dest_uri) # Params for NFS and SSH setup params["server_ip"] = params.get("migrate_dest_host") params["server_user"] = "******" params["server_pwd"] = params.get("migrate_dest_pwd") params["client_ip"] = params.get("migrate_source_host") params["client_user"] = "******" params["client_pwd"] = params.get("migrate_source_pwd") params["nfs_client_ip"] = params.get("migrate_dest_host") params["nfs_server_ip"] = params.get("migrate_source_host") # Configure NFS client on remote host nfs_client = nfs.NFSClient(params) nfs_client.setup() # Migrated vms' instance vms = [] for vm_name in vm_names: vms.append( libvirt_vm.VM(vm_name, params, test.bindir, env.get("address_cache"))) load_vm_names = params.get("load_vms").split() # vms for load load_vms = [] for vm_name in load_vm_names: load_vms.append( libvirt_vm.VM(vm_name, params, test.bindir, env.get("address_cache"))) params['load_vms'] = load_vms cpu = int(params.get("smp", 1)) memory = int(params.get("mem")) * 1024 stress_type = params.get("migration_stress_type") vm_bytes = params.get("stress_vm_bytes") stress_args = params.get("stress_args") migration_type = params.get("migration_type") start_migration_vms = "yes" == params.get("start_migration_vms", "yes") thread_timeout = int(params.get("thread_timeout", 120)) remote_host = params.get("migrate_dest_host") username = params.get("migrate_dest_user", "root") password = params.get("migrate_dest_pwd") prompt = params.get("shell_prompt", r"[\#\$]") # Set vm_bytes for start_cmd mem_total = utils_memory.memtotal() vm_reserved = len(vms) * memory if vm_bytes == "half": vm_bytes = (mem_total - vm_reserved) / 2 elif vm_bytes == "shortage": vm_bytes = mem_total - vm_reserved + 524288 if vm_bytes is not None: params["stress_args"] = stress_args % vm_bytes for vm in vms: # Keep vm dead for edit if vm.is_alive(): vm.destroy() set_cpu_memory(vm.name, cpu, memory) try: vm_ipaddr = {} if start_migration_vms: for vm in vms: vm.start() vm.wait_for_login() vm_ipaddr[vm.name] = vm.get_address() # TODO: recover vm if start failed? # Config ssh autologin for remote host ssh_key.setup_ssh_key(remote_host, username, password, port=22) do_stress_migration(vms, src_uri, dest_uri, stress_type, migration_type, params, thread_timeout) # Check network of vms on destination if start_migration_vms and migration_type != "cross": for vm in vms: utils_test.check_dest_vm_network(vm, vm_ipaddr[vm.name], remote_host, username, password, prompt) finally: logging.debug("Cleanup vms...") for vm_name in vm_names: vm = libvirt_vm.VM(vm_name, params, test.bindir, env.get("address_cache")) utlv.MigrationTest().cleanup_dest_vm(vm, None, dest_uri) if vm.is_alive(): vm.destroy(gracefully=False) if nfs_client: logging.info("Cleanup NFS client environment...") nfs_client.cleanup() env.clean_objects()
if not (mb_enable and not tlbfs_enable): logging.debug("starting analyzing the hugepage usage...") pid = vms[-1].get_pid() started_free = utils_memory.get_num_huge_pages_free() # Get the thp usage from /proc/pid/smaps started_anon = utils_memory.get_num_anon_huge_pages(pid) static_used = non_started_free - started_free hugepage_used = static_used * page_size if test_type == "contrast": # get qemu-kvm memory consumption by top cmd = "top -b -n 1|awk '$1 == %s {print $10}'" % pid rate = utils.run(cmd, ignore_status=False, verbose=True).stdout.strip() qemu_kvm_used = (utils_memory.memtotal() * float(rate)) / 100 logging.debug("rate: %s, used-by-qemu-kvm: %f, used-by-vm: %d", rate, qemu_kvm_used, hugepage_used) if abs(qemu_kvm_used - hugepage_used) > hugepage_used * (err_range - 1): raise error.TestFail("Error for hugepage usage") if test_type == "stress": if non_started_free <= started_free: logging.debug("hugepage usage:%d -> %d", non_started_free, started_free) raise error.TestFail("Error for hugepage usage with stress") if mb_enable is not True: if static_used > 0: raise error.TestFail("VM use static hugepage without" " memoryBacking element") if thp_enable is not True and started_anon > 0: raise error.TestFail("VM use transparent hugepage, while"
def run(test, params, env): """ Test steps: 1) Get the params from params. 2) check the environment 3) Strat the VM and check whether the VM been started successfully 4) Compare the Hugepage memory size to the Guest memory setted. 5) Check the hugepage memory usage. 6) Clean up """ test_type = params.get("test_type", 'normal') tlbfs_enable = 'yes' == params.get("hugetlbfs_enable", 'no') shp_num = int(params.get("static_hugepage_num", 1024)) thp_enable = 'yes' == params.get("trans_hugepage_enable", 'no') mb_enable = 'yes' == params.get("mb_enable", 'yes') delay = int(params.get("delay_time", 10)) # Skip cases early vm_names = [] if test_type == "contrast": vm_names = params.get("vms").split()[:2] if len(vm_names) < 2: test.cancel("This test requires two VMs") # confirm no VM running allvms = virsh.dom_list('--name').stdout.strip() if allvms != '': test.cancel("one or more VMs are alive") err_range = float(params.get("mem_error_range", 1.25)) else: vm_names.append(params.get("main_vm")) if test_type == "stress": target_path = params.get("target_path", "/tmp/test.out") elif test_type == "unixbench": unixbench_control_file = params.get("unixbench_controle_file", "unixbench5.control") # backup orignal setting shp_orig_num = utils_memory.get_num_huge_pages() thp_orig_status = utils_memory.get_transparent_hugepage() page_size = utils_memory.get_huge_page_size() # mount/umount hugetlbfs tlbfs_status = utils_misc.is_mounted("hugetlbfs", "/dev/hugepages", "hugetlbfs") if tlbfs_enable is True: if tlbfs_status is not True: utils_misc.mount("hugetlbfs", "/dev/hugepages", "hugetlbfs") else: if tlbfs_status is True: utils_misc.umount("hugetlbfs", "/dev/hugepages", "hugetlbfs") # set static hugepage utils_memory.set_num_huge_pages(shp_num) # enable/disable transparent hugepage if thp_enable: utils_memory.set_transparent_hugepage('always') else: utils_memory.set_transparent_hugepage('never') # set/del memoryBacking tag for vm_name in vm_names: if mb_enable: vm_xml.VMXML.set_memoryBacking_tag(vm_name) else: vm_xml.VMXML.del_memoryBacking_tag(vm_name) utils_libvirtd.libvirtd_restart() non_started_free = utils_memory.get_num_huge_pages_free() vms = [] sessions = [] try: for vm_name in vm_names: # try to start vm and login try: vm = env.get_vm(vm_name) vm.start() except VMError as e: if mb_enable and not tlbfs_enable: # if hugetlbfs not be mounted, # VM start with memoryBacking tag will fail logging.debug(e) else: error_msg = "Test failed in positive case. error: %s\n" % e test.fail(error_msg) if vm.is_alive() is not True: break vms.append(vm) # try to login and run some program try: session = vm.wait_for_login() except (LoginError, ShellError) as e: error_msg = "Test failed in positive case.\n error: %s\n" % e test.fail(error_msg) sessions.append(session) if test_type == "stress": # prepare file for increasing stress stress_path = prepare_c_file() remote.scp_to_remote(vm.get_address(), 22, 'root', params.get('password'), stress_path, "/tmp/") # Try to install gcc on guest first utils_package.package_install(["gcc"], session, 360) # increasing workload session.cmd("gcc %s -o %s" % (stress_path, target_path)) session.cmd("%s &" % target_path) if test_type == "unixbench": params["main_vm"] = vm_name params["test_control_file"] = unixbench_control_file control_path = os.path.join(test.virtdir, "control", unixbench_control_file) # unixbench test need 'patch' and 'perl' commands installed utils_package.package_install(["patch", "perl"], session, 360) command = utils_test.run_autotest(vm, session, control_path, None, None, params, copy_only=True) session.cmd("%s &" % command, ignore_all_errors=True) # wait for autotest running on vm time.sleep(delay) def _is_unixbench_running(): cmd = "ps -ef | grep perl | grep Run" return not session.cmd_status(cmd) if not utils_misc.wait_for(_is_unixbench_running, timeout=240): test.cancel("Failed to run unixbench in guest," " please make sure some necessary" " packages are installed in guest," " such as gcc, tar, bzip2") logging.debug("Unixbench test is running in VM") if test_type == "contrast": # wait for vm finish starting completely time.sleep(delay) if not (mb_enable and not tlbfs_enable): logging.debug("starting analyzing the hugepage usage...") pid = vms[-1].get_pid() started_free = utils_memory.get_num_huge_pages_free() # Get the thp usage from /proc/pid/smaps started_anon = utils_memory.get_num_anon_huge_pages(pid) static_used = non_started_free - started_free hugepage_used = static_used * page_size if test_type == "contrast": # get qemu-kvm memory consumption by top cmd = "top -b -n 1|awk '$1 == %s {print $10}'" % pid rate = process.run(cmd, ignore_status=False, verbose=True, shell=True).stdout_text.strip() qemu_kvm_used = (utils_memory.memtotal() * float(rate)) / 100 logging.debug("rate: %s, used-by-qemu-kvm: %f, used-by-vm: %d", rate, qemu_kvm_used, hugepage_used) if abs(qemu_kvm_used - hugepage_used) > hugepage_used * (err_range - 1): test.fail("Error for hugepage usage") if test_type == "stress": if non_started_free <= started_free: logging.debug("hugepage usage:%d -> %d", non_started_free, started_free) test.fail("Error for hugepage usage with stress") if mb_enable is not True: if static_used > 0: test.fail("VM use static hugepage without" " memoryBacking element") if thp_enable is not True and started_anon > 0: test.fail("VM use transparent hugepage, while" " it's disabled") else: if tlbfs_enable is not True: if static_used > 0: test.fail("VM use static hugepage without tlbfs" " mounted") if thp_enable and started_anon <= 0: test.fail("VM doesn't use transparent" " hugepage") else: if shp_num > 0: if static_used <= 0: test.fail("VM doesn't use static" " hugepage") else: if static_used > 0: test.fail("VM use static hugepage," " while it's set to zero") if thp_enable is not True: if started_anon > 0: test.fail("VM use transparent hugepage," " while it's disabled") else: if shp_num == 0 and started_anon <= 0: test.fail("VM doesn't use transparent" " hugepage, while static" " hugepage is disabled") finally: # end up session for session in sessions: session.close() for vm in vms: if vm.is_alive(): vm.destroy() for vm_name in vm_names: if mb_enable: vm_xml.VMXML.del_memoryBacking_tag(vm_name) else: vm_xml.VMXML.set_memoryBacking_tag(vm_name) utils_libvirtd.libvirtd_restart() if tlbfs_enable is True: if tlbfs_status is not True: utils_misc.umount("hugetlbfs", "/dev/hugepages", "hugetlbfs") else: if tlbfs_status is True: utils_misc.mount("hugetlbfs", "/dev/hugepages", "hugetlbfs") utils_memory.set_num_huge_pages(shp_orig_num) utils_memory.set_transparent_hugepage(thp_orig_status)
def run(test, params, env): """ Network stress with multi nics test with netperf. 1) Start multi vm(s) guest. 2) Select multi vm(s) or host to setup netperf server/client. 3) Execute netperf stress on multi nics. 4) Ping test after netperf testing, check whether nics still work. :param test: QEMU test object :param params: Dictionary with the test parameters :param env: Dictionary with test environment. """ login_timeout = float(params.get("login_timeout", 360)) netperf_server = params.get("netperf_server").split() netperf_client = params.get("netperf_client") guest_username = params.get("username", "") guest_password = params.get("password", "") shell_client = params.get("shell_client") shell_port = params.get("shell_port") os_type = params.get("os_type") shell_prompt = params.get("shell_prompt", r"^root@.*[\#\$]\s*$|#") disable_firewall = params.get("disable_firewall", "") linesep = params.get("shell_linesep", "\n").encode().decode('unicode_escape') status_test_command = params.get("status_test_command", "echo $?") ping_count = int(params.get("ping_count", 10)) compile_option_client = params.get("compile_option_client", "") compile_option_server = params.get("compile_option_server", "") vms = params.get("vms") server_infos = [] client_infos = [] server_ips = [] client_ips = [] os_type = params.get("os_type") if os_type == "windows": host_mem = utils_memory.memtotal() // (1024 * 1024) vm_mem = host_mem / (len(vms.split()) + 1) * 1024 if vm_mem < params.get_numeric("min_mem"): test.cancel("Host total memory is insufficient for this test case," "each VM's memory can not meet guest OS's requirement") params["mem"] = vm_mem params["start_vm"] = "yes" env_process.preprocess(test, params, env) for server in netperf_server: s_info = {} if server in vms: server_vm = env.get_vm(server) server_vm.verify_alive() server_ctl = server_vm.wait_for_serial_login(timeout=login_timeout) error_context.context("Stop fireware on netperf server guest.", logging.info) server_ctl.cmd(disable_firewall, ignore_all_errors=True) server_ip = server_vm.get_address() server_ips.append(server_ip) s_info["ip"] = server_ip s_info["os_type"] = params.get("os_type_%s" % server, os_type) s_info["username"] = params.get("username_%s" % server, guest_username) s_info["password"] = params.get("password_%s" % server, guest_password) s_info["shell_client"] = params.get("shell_client_%s" % server, shell_client) s_info["shell_port"] = params.get("shell_port_%s" % server, shell_port) s_info["shell_prompt"] = params.get("shell_prompt_%s" % server, shell_prompt) s_info["linesep"] = params.get("linesep_%s" % server, linesep) s_info["status_test_command"] = params.get( "status_test_command_%s" % server, status_test_command) else: err = "Only support setup netperf server in guest." test.error(err) server_infos.append(s_info) client = netperf_client.strip() c_info = {} if client in vms: client_vm = env.get_vm(client) client_vm.verify_alive() client_ctl = client_vm.wait_for_serial_login(timeout=login_timeout) if params.get("dhcp_cmd"): status, output = client_ctl.cmd_status_output(params["dhcp_cmd"], timeout=600) if status: logging.warn("Failed to execute dhcp-command, output:\n %s", output) error_context.context("Stop fireware on netperf client guest.", logging.info) client_ctl.cmd(disable_firewall, ignore_all_errors=True) client_ip = client_vm.get_address() client_ips.append(client_ip) params_client_nic = params.object_params(client) nics_count = len(params_client_nic.get("nics", "").split()) if nics_count > 1: for i in range(nics_count)[1:]: client_vm.wait_for_login(nic_index=i, timeout=login_timeout) client_ips.append(client_vm.get_address(index=i)) c_info["ip"] = client_ip c_info["os_type"] = params.get("os_type_%s" % client, os_type) c_info["username"] = params.get("username_%s" % client, guest_username) c_info["password"] = params.get("password_%s" % client, guest_password) c_info["shell_client"] = params.get("shell_client_%s" % client, shell_client) c_info["shell_port"] = params.get("shell_port_%s" % client, shell_port) c_info["shell_prompt"] = params.get("shell_prompt_%s" % client, shell_prompt) c_info["linesep"] = params.get("linesep_%s" % client, linesep) c_info["status_test_command"] = params.get( "status_test_command_%s" % client, status_test_command) else: err = "Only support setup netperf client in guest." test.error(err) client_infos.append(c_info) if params.get("os_type") == "linux": error_context.context("Config static route in netperf server guest.", logging.info) nics_list = utils_net.get_linux_ifname(client_ctl) for ip in server_ips: index = server_ips.index(ip) % len(nics_list) client_ctl.cmd("route add -host %s %s" % (ip, nics_list[index])) netperf_link = params.get("netperf_link") netperf_link = os.path.join(data_dir.get_deps_dir("netperf"), netperf_link) md5sum = params.get("pkg_md5sum") netperf_server_link = params.get("netperf_server_link_win", netperf_link) netperf_server_link = os.path.join(data_dir.get_deps_dir("netperf"), netperf_server_link) server_md5sum = params.get("server_md5sum") netperf_client_link = params.get("netperf_client_link_win", netperf_link) netperf_client_link = os.path.join(data_dir.get_deps_dir("netperf"), netperf_client_link) client_md5sum = params.get("client_md5sum") server_path_linux = params.get("server_path", "/var/tmp") client_path_linux = params.get("client_path", "/var/tmp") server_path_win = params.get("server_path_win", "c:\\") client_path_win = params.get("client_path_win", "c:\\") netperf_clients = [] netperf_servers = [] error_context.context("Setup netperf guest.", logging.info) for c_info in client_infos: if c_info["os_type"] == "windows": netperf_link_c = netperf_client_link client_path = client_path_win md5sum = client_md5sum else: netperf_link_c = netperf_link client_path = client_path_linux n_client = utils_netperf.NetperfClient( c_info["ip"], client_path, md5sum, netperf_link_c, client=c_info["shell_client"], port=c_info["shell_port"], username=c_info["username"], password=c_info["password"], prompt=c_info["shell_prompt"], linesep=c_info["linesep"], status_test_command=c_info["status_test_command"], compile_option=compile_option_client) netperf_clients.append(n_client) error_context.context("Setup netperf server.", logging.info) for s_info in server_infos: if s_info["os_type"] == "windows": netperf_link_s = netperf_server_link server_path = server_path_win md5sum = server_md5sum else: netperf_link_s = netperf_link server_path = server_path_linux n_server = utils_netperf.NetperfServer( s_info["ip"], server_path, md5sum, netperf_link_s, client=s_info["shell_client"], port=s_info["shell_port"], username=s_info["username"], password=s_info["password"], prompt=s_info["shell_prompt"], linesep=s_info["linesep"], status_test_command=s_info["status_test_command"], compile_option=compile_option_server) netperf_servers.append(n_server) try: error_context.context("Start netperf server.", logging.info) for n_server in netperf_servers: n_server.start() test_duration = int(params.get("netperf_test_duration", 60)) test_protocols = params.get("test_protocols", "TCP_STREAM") netperf_sessions = params.get("netperf_sessions", "1") p_sizes = params.get("package_sizes") netperf_cmd_prefix = params.get("netperf_cmd_prefix", "") error_context.context("Start netperf clients.", logging.info) for protocol in test_protocols.split(): error_context.context("Testing %s protocol" % protocol, logging.info) sessions_test = netperf_sessions.split() sizes_test = p_sizes.split() for size in sizes_test: for sess in sessions_test: test_option = params.get("test_option", "") test_option += " -t %s -l %s " % (protocol, test_duration) test_option += " -- -m %s" % size launch_netperf_client(test, server_ips, netperf_clients, test_option, test_duration, sess, netperf_cmd_prefix, params) error_context.context("Ping test after netperf testing.", logging.info) for s_ip in server_ips: status, output = utils_test.ping(s_ip, ping_count, timeout=float(ping_count) * 1.5) if status != 0: test.fail("Ping returns non-zero value %s" % output) package_lost = utils_test.get_loss_ratio(output) if package_lost != 0: test.fail("%s packeage lost when ping server ip %s " % (package_lost, server)) finally: for n_server in netperf_servers: n_server.stop() n_server.cleanup(True) for n_client in netperf_clients: n_client.stop() n_client.cleanup(True) if server_ctl: server_ctl.close() if client_ctl: client_ctl.close()
def run(test, params, env): """ Test migration under stress. """ vm_names = params.get("vms").split() if len(vm_names) < 2: test.cancel("Provide enough vms for migration") src_uri = "qemu:///system" dest_uri = libvirt_vm.complete_uri( params.get("migrate_dest_host", "EXAMPLE")) if dest_uri.count('///') or dest_uri.count('EXAMPLE'): test.cancel("The dest_uri '%s' is invalid" % dest_uri) # Migrated vms' instance vms = env.get_all_vms() params["load_vms"] = list(vms) cpu = int(params.get("smp", 1)) memory = int(params.get("mem")) * 1024 stress_tool = params.get("stress_tool", "") remote_stress = params.get("migration_stress_remote", "no") == "yes" host_stress = params.get("migration_stress_host", "no") == "yes" vms_stress = params.get("migration_stress_vms", "no") == "yes" vm_bytes = params.get("stress_vm_bytes", "128M") stress_args = params.get("%s_args" % stress_tool) migration_type = params.get("migration_type") start_migration_vms = params.get("start_migration_vms", "yes") == "yes" thread_timeout = int(params.get("thread_timeout", 120)) ubuntu_dep = ['build-essential', 'git'] hstress = rstress = None vstress = {} # Set vm_bytes for start_cmd mem_total = utils_memory.memtotal() vm_reserved = len(vms) * memory if vm_bytes == "half": vm_bytes = (mem_total - vm_reserved) / 2 elif vm_bytes == "shortage": vm_bytes = mem_total - vm_reserved + 524288 if "vm-bytes" in stress_args: params["%s_args" % stress_tool] = stress_args % vm_bytes # Ensure stress tool is available in host if host_stress: # remove package manager installed tool to avoid conflict if not utils_package.package_remove(stress_tool): logging.error("Existing %s is not removed") if "stress-ng" in stress_tool and 'Ubuntu' in utils_misc.get_distro(): params['stress-ng_dependency_packages_list'] = ubuntu_dep try: hstress = utils_test.HostStress(stress_tool, params) hstress.load_stress_tool() except utils_test.StressError as info: test.error(info) if remote_stress: try: server_ip = params['remote_ip'] server_pwd = params['remote_pwd'] server_user = params.get('remote_user', 'root') remote_session = remote.wait_for_login('ssh', server_ip, '22', server_user, server_pwd, r"[\#\$]\s*$") # remove package manager installed tool to avoid conflict if not utils_package.package_remove(stress_tool, session=remote_session): logging.error("Existing %s is not removed") if ("stess-ng" in stress_tool and 'Ubuntu' in utils_misc.get_distro(session=remote_session)): params['stress-ng_dependency_packages_list'] = ubuntu_dep rstress = utils_test.HostStress(stress_tool, params, remote_server=True) rstress.load_stress_tool() remote_session.close() except utils_test.StressError as info: remote_session.close() test.error(info) for vm in vms: # Keep vm dead for edit if vm.is_alive(): vm.destroy() set_cpu_memory(vm.name, cpu, memory) try: if start_migration_vms: for vm in vms: vm.start() session = vm.wait_for_login() # remove package manager installed tool to avoid conflict if not utils_package.package_remove(stress_tool, session=session): logging.error("Existing %s is not removed") # configure stress in VM if vms_stress: if ("stress-ng" in stress_tool and 'Ubuntu' in utils_misc.get_distro(session=session)): params[ 'stress-ng_dependency_packages_list'] = ubuntu_dep try: vstress[vm.name] = utils_test.VMStress( vm, stress_tool, params) vstress[vm.name].load_stress_tool() except utils_test.StressError as info: session.close() test.error(info) session.close() do_stress_migration(vms, src_uri, dest_uri, migration_type, test, params, thread_timeout) finally: logging.debug("Cleanup vms...") for vm in vms: utils_test.libvirt.MigrationTest().cleanup_dest_vm( vm, None, dest_uri) # Try to start vms in source once vms in destination are # cleaned up if not vm.is_alive(): vm.start() vm.wait_for_login() try: if vstress[vm.name]: vstress[vm.name].unload_stress() except KeyError: continue if rstress: rstress.unload_stress() if hstress: hstress.unload_stress()
def run(test, params, env): """ KVM multi test: 1) Log into guests 2) Check all the nics available or not 3) Ping among guest nic and host 3.1) Ping with different packet size 3.2) Flood ping test 3.3) Final ping test 4) Transfer files among guest nics and host 4.1) Create file by dd command in guest 4.2) Transfer file between nics 4.3) Compare original file and transferred file 5) ping among different nics 5.1) Ping with different packet size 5.2) Flood ping test 5.3) Final ping test 6) Transfer files among different nics 6.1) Create file by dd command in guest 6.2) Transfer file between nics 6.3) Compare original file and transferred file 7) Repeat step 3 - 6 on every nic. :param test: QEMU test object :param params: Dictionary with the test parameters :param env: Dictionary with test environment. """ def ping(session, nic, dst_ip, strick_check, flood_minutes): d_packet_size = [ 1, 4, 48, 512, 1440, 1500, 1505, 4054, 4055, 4096, 4192, 8878, 9000, 32767, 65507 ] packet_size = params.get("packet_size", "").split() or d_packet_size for size in packet_size: error_context.context("Ping with packet size %s" % size, logging.info) status, output = utils_test.ping(dst_ip, 10, interface=nic, packetsize=size, timeout=30, session=session) if strict_check: ratio = utils_test.get_loss_ratio(output) if ratio != 0: test.fail("Loss ratio is %s for packet size" " %s" % (ratio, size)) else: if status != 0: test.fail("Ping returns non-zero value %s" % output) error_context.context("Flood ping test", logging.info) utils_test.ping(dst_ip, None, interface=nic, flood=True, output_func=None, timeout=flood_minutes * 60, session=session) error_context.context("Final ping test", logging.info) counts = params.get("ping_counts", 100) status, output = utils_test.ping(dst_ip, counts, interface=nic, timeout=float(counts) * 1.5, session=session) if strick_check == "yes": ratio = utils_test.get_loss_ratio(output) if ratio != 0: test.fail("Packet loss ratio is %s after flood" % ratio) else: if status != 0: test.fail("Ping returns non-zero value %s" % output) def file_transfer(session, src, dst): username = params.get("username", "") password = params.get("password", "") src_path = "/tmp/1" dst_path = "/tmp/2" port = int(params["file_transfer_port"]) cmd = "dd if=/dev/urandom of=%s bs=100M count=1" % src_path cmd = params.get("file_create_cmd", cmd) error_context.context("Create file by dd command, cmd: %s" % cmd, logging.info) session.cmd(cmd) transfer_timeout = int(params.get("transfer_timeout")) log_filename = "scp-from-%s-to-%s.log" % (src, dst) error_context.context("Transfer file from %s to %s" % (src, dst), logging.info) remote.scp_between_remotes(src, dst, port, password, password, username, username, src_path, dst_path, log_filename=log_filename, timeout=transfer_timeout) src_path = dst_path dst_path = "/tmp/3" log_filename = "scp-from-%s-to-%s.log" % (dst, src) error_context.context("Transfer file from %s to %s" % (dst, src), logging.info) remote.scp_between_remotes(dst, src, port, password, password, username, username, src_path, dst_path, log_filename=log_filename, timeout=transfer_timeout) error_context.context("Compare original file and transferred file", logging.info) cmd1 = "md5sum /tmp/1" cmd2 = "md5sum /tmp/3" md5sum1 = session.cmd(cmd1).split()[0] md5sum2 = session.cmd(cmd2).split()[0] if md5sum1 != md5sum2: test.error("File changed after transfer") nic_interface_list = [] check_irqbalance_cmd = params.get("check_irqbalance_cmd") stop_irqbalance_cmd = params.get("stop_irqbalance_cmd") start_irqbalance_cmd = params.get("start_irqbalance_cmd") status_irqbalance = params.get("status_irqbalance") vms = params["vms"].split() host_mem = utils_memory.memtotal() / (1024 * 1024) host_cpu_count = len(utils_misc.get_cpu_processors()) vhost_count = 0 if params.get("vhost"): vhost_count = 1 if host_cpu_count < (1 + vhost_count) * len(vms): test.error("The host don't have enough cpus to start guest" "pcus: %d, minimum of vcpus and vhost: %d" % (host_cpu_count, (1 + vhost_count) * len(vms))) params['mem'] = host_mem / len(vms) * 1024 params['smp'] = host_cpu_count / len(vms) - vhost_count if params['smp'] % 2 != 0: params['vcpu_sockets'] = 1 params["start_vm"] = "yes" for vm_name in vms: env_process.preprocess_vm(test, params, env, vm_name) timeout = float(params.get("login_timeout", 360)) strict_check = params.get("strick_check", "no") host_ip = utils_net.get_ip_address_by_interface(params.get("netdst")) host_ip = params.get("srchost", host_ip) flood_minutes = float(params["flood_minutes"]) error_context.context("Check irqbalance service status", logging.info) o = process.system_output(check_irqbalance_cmd, ignore_status=True, shell=True) check_stop_irqbalance = False if re.findall(status_irqbalance, o): logging.debug("stop irqbalance") process.run(stop_irqbalance_cmd, shell=True) check_stop_irqbalance = True o = process.system_output(check_irqbalance_cmd, ignore_status=True, shell=True) if re.findall(status_irqbalance, o): test.error("Can not stop irqbalance") thread_list = [] nic_interface = [] for vm_name in vms: guest_ifname = "" guest_ip = "" vm = env.get_vm(vm_name) session = vm.wait_for_login(timeout=timeout) thread_list.extend(vm.vcpu_threads) thread_list.extend(vm.vhost_threads) error_context.context("Check all the nics available or not", logging.info) for index, nic in enumerate(vm.virtnet): guest_ifname = utils_net.get_linux_ifname(session, nic.mac) guest_ip = vm.get_address(index) if not (guest_ifname and guest_ip): err_log = "vms %s get ip or ifname failed." % vm_name err_log = "ifname: %s, ip: %s." % (guest_ifname, guest_ip) test.fail(err_log) nic_interface = [guest_ifname, guest_ip, session] nic_interface_list.append(nic_interface) error_context.context("Pin vcpus and vhosts to host cpus", logging.info) host_numa_nodes = utils_misc.NumaInfo() vthread_num = 0 for numa_node_id in host_numa_nodes.nodes: numa_node = host_numa_nodes.nodes[numa_node_id] for _ in range(len(numa_node.cpus)): if vthread_num >= len(thread_list): break vcpu_tid = thread_list[vthread_num] logging.debug("pin vcpu/vhost thread(%s) to cpu(%s)" % (vcpu_tid, numa_node.pin_cpu(vcpu_tid))) vthread_num += 1 nic_interface_list_len = len(nic_interface_list) # ping and file transfer test for src_ip_index in range(nic_interface_list_len): error_context.context("Ping test from guest to host", logging.info) src_ip_info = nic_interface_list[src_ip_index] ping(src_ip_info[2], src_ip_info[0], host_ip, strict_check, flood_minutes) error_context.context("File transfer test between guest and host", logging.info) file_transfer(src_ip_info[2], src_ip_info[1], host_ip) for dst_ip in nic_interface_list[src_ip_index:]: if src_ip_info[1] == dst_ip[1]: continue txt = "Ping test between %s and %s" % (src_ip_info[1], dst_ip[1]) error_context.context(txt, logging.info) ping(src_ip_info[2], src_ip_info[0], dst_ip[1], strict_check, flood_minutes) txt = "File transfer test between %s " % src_ip_info[1] txt += "and %s" % dst_ip[1] error_context.context(txt, logging.info) file_transfer(src_ip_info[2], src_ip_info[1], dst_ip[1]) if check_stop_irqbalance: process.run(start_irqbalance_cmd, shell=True)
def output_check(nodeinfo_output): # Check CPU model cpu_model_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU model", 3) cpu_arch = platform.machine() if not re.match(cpu_model_nodeinfo, cpu_arch): test.fail( "Virsh nodeinfo output didn't match CPU model") # Check number of CPUs, nodeinfo CPUs represent online threads in the # system, check all online cpus in sysfs cpus_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU(s)", 2) cmd = "cat /sys/devices/system/cpu/cpu*/online | grep 1 | wc -l" cpus_online = process.run(cmd, ignore_status=True, shell=True).stdout.strip() cmd = "cat /sys/devices/system/cpu/cpu*/online | wc -l" cpus_total = process.run(cmd, ignore_status=True, shell=True).stdout.strip() if not os.path.exists('/sys/devices/system/cpu/cpu0/online'): cpus_online = str(int(cpus_online) + 1) cpus_total = str(int(cpus_total) + 1) logging.debug("host online cpus are %s", cpus_online) logging.debug("host total cpus are %s", cpus_total) if cpus_nodeinfo != cpus_online: if 'ppc' in cpu_arch: if cpus_nodeinfo != cpus_total: test.fail("Virsh nodeinfo output of CPU(s) on" " ppc did not match all threads in " "the system") else: test.fail("Virsh nodeinfo output didn't match " "number of CPU(s)") # Check CPU frequency, frequency is under clock for ppc cpu_frequency_nodeinfo = _check_nodeinfo( nodeinfo_output, 'CPU frequency', 3) cmd = ("cat /proc/cpuinfo | grep -E 'cpu MHz|clock|BogoMIPS' | " "head -n1 | awk -F: '{print $2}' | awk -F. '{print $1}'") cmd_result = process.run(cmd, ignore_status=True, shell=True) cpu_frequency_os = cmd_result.stdout_text.strip() logging.debug("cpu_frequency_nodeinfo=%s cpu_frequency_os=%s", cpu_frequency_nodeinfo, cpu_frequency_os) # # Matching CPU Frequency is not an exact science in todays modern # processors and OS's. CPU's can have their execution speed varied # based on current workload in order to save energy and keep cool. # Thus since we're getting the values at disparate points in time, # we cannot necessarily do a pure comparison. # So, let's get the absolute value of the difference and ensure # that it's within 20 percent of each value to give us enough of # a "fudge" factor to declare "close enough". Don't return a failure # just print a debug message and move on. diffval = abs(int(cpu_frequency_nodeinfo) - int(cpu_frequency_os)) if (float(diffval) / float(cpu_frequency_nodeinfo) > 0.20 or float(diffval) / float(cpu_frequency_os) > 0.20): logging.debug("Virsh nodeinfo output didn't match CPU " "frequency within 20 percent") # Get CPU topology from virsh capabilities xml cpu_topology = capability_xml.CapabilityXML()['cpu_topology'] logging.debug("Cpu topology in virsh capabilities output: %s", cpu_topology) # Check CPU socket(s) cpu_sockets_nodeinfo = int( _check_nodeinfo(nodeinfo_output, 'CPU socket(s)', 3)) # CPU socket(s) in virsh nodeinfo is Total sockets in each node, not # total sockets in the system, so get total sockets in one node and # check with it node_info = utils_misc.NumaInfo() node_online_list = node_info.get_online_nodes() cmd = "cat /sys/devices/system/node/node%s" % node_online_list[0] cmd += "/cpu*/topology/physical_package_id | uniq |wc -l" cmd_result = process.run(cmd, ignore_status=True, shell=True) total_sockets_in_node = int(cmd_result.stdout_text.strip()) if total_sockets_in_node != cpu_sockets_nodeinfo: test.fail("Virsh nodeinfo output didn't match CPU " "socket(s) of host OS") if cpu_sockets_nodeinfo != int(cpu_topology['sockets']): test.fail("Virsh nodeinfo output didn't match CPU " "socket(s) of virsh capabilities output") # Check Core(s) per socket cores_per_socket_nodeinfo = _check_nodeinfo( nodeinfo_output, 'Core(s) per socket', 4) cmd = "lscpu | grep 'Core(s) per socket' | head -n1 | awk '{print $4}'" cmd_result = process.run(cmd, ignore_status=True, shell=True) cores_per_socket_os = cmd_result.stdout_text.strip() spec_numa = False if not re.match(cores_per_socket_nodeinfo, cores_per_socket_os): # for spec NUMA arch, the output of nodeinfo is in a spec format cpus_os = utils_misc.get_cpu_info().get("CPU(s)") numa_cells_nodeinfo = _check_nodeinfo( nodeinfo_output, 'NUMA cell(s)', 3) if (re.match(cores_per_socket_nodeinfo, cpus_os) and re.match(numa_cells_nodeinfo, "1")): spec_numa = True else: test.fail("Virsh nodeinfo output didn't match " "CPU(s) or Core(s) per socket of host OS") if cores_per_socket_nodeinfo != cpu_topology['cores']: test.fail("Virsh nodeinfo output didn't match Core(s) " "per socket of virsh capabilities output") # Check Thread(s) per core threads_per_core_nodeinfo = _check_nodeinfo(nodeinfo_output, 'Thread(s) per core', 4) if not spec_numa: if threads_per_core_nodeinfo != cpu_topology['threads']: test.fail("Virsh nodeinfo output didn't match" "Thread(s) per core of virsh" "capabilities output") else: if threads_per_core_nodeinfo != "1": test.fail("Virsh nodeinfo output didn't match" "Thread(s) per core of virsh" "capabilities output") # Check Memory size memory_size_nodeinfo = int( _check_nodeinfo(nodeinfo_output, 'Memory size', 3)) memory_size_os = 0 if libvirt_version.version_compare(2, 0, 0): for i in node_online_list: node_memory = node_info.read_from_node_meminfo(i, 'MemTotal') memory_size_os += int(node_memory) else: memory_size_os = utils_memory.memtotal() logging.debug('The host total memory from nodes is %s', memory_size_os) if memory_size_nodeinfo != memory_size_os: test.fail("Virsh nodeinfo output didn't match " "Memory size")
def run(test, params, env): """ Tests KSM (Kernel Shared Memory) capability by allocating and filling KVM guests memory using various values. KVM sets the memory as MADV_MERGEABLE so all VM's memory can be merged. The workers in guest writes to tmpfs filesystem thus allocations are not limited by process max memory, only by VM's memory. Two test modes are supported - serial and parallel. Serial mode - uses multiple VMs, allocates memory per guest and always verifies the correct number of shared memory. 0) Prints out the setup and initialize guest(s) 1) Fills guest with the same number (S1) 2) Random fill on the first guest 3) Random fill of the remaining VMs one by one until the memory is completely filled (KVM stops machines which asks for additional memory until there is available memory) (S2, shouldn't finish) 4) Destroy all VMs but the last one 5) Checks the last VMs memory for corruption Parallel mode - uses one VM with multiple allocator workers. Executes scenarios in parallel to put more stress on the KVM. 0) Prints out the setup and initialize guest(s) 1) Fills memory with the same number (S1) 2) Fills memory with random numbers (S2) 3) Verifies all pages 4) Fills memory with the same number (S2) 5) Changes the last 96B (S3) Scenarios: S1) Fill all vms with the same value (all pages should be merged into 1) S2) Random fill (all pages should be splitted) S3) Fill last 96B (change only last 96B of each page; some pages will be merged; there was a bug with data corruption) Every worker has unique random key so we are able to verify the filled values. :param test: kvm test object. :param params: Dictionary with test parameters. :param env: Dictionary with the test environment. :param cfg: ksm_swap - use swap? :param cfg: ksm_overcommit_ratio - memory overcommit (serial mode only) :param cfg: ksm_parallel_ratio - number of workers (parallel mode only) :param cfg: ksm_host_reserve - override memory reserve on host in MB :param cfg: ksm_guest_reserve - override memory reserve on guests in MB :param cfg: ksm_mode - test mode {serial, parallel} :param cfg: ksm_perf_ratio - performance ratio, increase it when your machine is too slow """ def _start_allocator(vm, session, timeout): """ Execute ksm_overcommit_guest.py on guest, wait until it's initialized. :param vm: VM object. :param session: Remote session to a VM object. :param timeout: Timeout that will be used to verify if ksm_overcommit_guest.py started properly. """ logging.debug("Starting ksm_overcommit_guest.py on guest %s", vm.name) session.sendline("python /tmp/ksm_overcommit_guest.py") try: session.read_until_last_line_matches(["PASS:"******"FAIL:"], timeout) except aexpect.ExpectProcessTerminatedError as details: e_msg = ("Command ksm_overcommit_guest.py on vm '%s' failed: %s" % (vm.name, str(details))) test.fail(e_msg) def _execute_allocator(command, vm, session, timeout): """ Execute a given command on ksm_overcommit_guest.py main loop, indicating the vm the command was executed on. :param command: Command that will be executed. :param vm: VM object. :param session: Remote session to VM object. :param timeout: Timeout used to verify expected output. :return: Tuple (match index, data) """ logging.debug("Executing '%s' on ksm_overcommit_guest.py loop, " "vm: %s, timeout: %s", command, vm.name, timeout) session.sendline(command) try: (match, data) = session.read_until_last_line_matches( ["PASS:"******"FAIL:"], timeout) except aexpect.ExpectProcessTerminatedError as details: e_msg = ("Failed to execute command '%s' on " "ksm_overcommit_guest.py, vm '%s': %s" % (command, vm.name, str(details))) test.fail(e_msg) return (match, data) def get_ksmstat(): """ Return sharing memory by ksm in MB :return: memory in MB """ fpages = open('/sys/kernel/mm/ksm/pages_sharing') ksm_pages = int(fpages.read()) fpages.close() return ((ksm_pages * 4096) / 1e6) def initialize_guests(): """ Initialize guests (fill their memories with specified patterns). """ logging.info("Phase 1: filling guest memory pages") for session in lsessions: vm = lvms[lsessions.index(session)] logging.debug("Turning off swap on vm %s", vm.name) session.cmd("swapoff -a", timeout=300) # Start the allocator _start_allocator(vm, session, 60 * perf_ratio) # Execute allocator on guests for i in range(0, vmsc): vm = lvms[i] cmd = "mem = MemFill(%d, %s, %s)" % (ksm_size, skeys[i], dkeys[i]) _execute_allocator(cmd, vm, lsessions[i], 60 * perf_ratio) cmd = "mem.value_fill(%d)" % skeys[0] _execute_allocator(cmd, vm, lsessions[i], fill_base_timeout * 2 * perf_ratio) # Let ksm_overcommit_guest.py do its job # (until shared mem reaches expected value) shm = 0 j = 0 logging.debug("Target shared meminfo for guest %s: %s", vm.name, ksm_size) while ((new_ksm and (shm < (ksm_size * (i + 1)))) or (not new_ksm and (shm < (ksm_size)))): if j > 64: logging.debug(utils_test.get_memory_info(lvms)) test.error("SHM didn't merge the memory until " "the DL on guest: %s" % vm.name) pause = ksm_size / 200 * perf_ratio logging.debug("Waiting %ds before proceeding...", pause) time.sleep(pause) if (new_ksm): shm = get_ksmstat() else: shm = vm.get_shared_meminfo() logging.debug("Shared meminfo for guest %s after " "iteration %s: %s", vm.name, j, shm) j += 1 # Keep some reserve pause = ksm_size / 200 * perf_ratio logging.debug("Waiting %ds before proceeding...", pause) time.sleep(pause) logging.debug(utils_test.get_memory_info(lvms)) logging.info("Phase 1: PASS") def separate_first_guest(): """ Separate memory of the first guest by generating special random series """ logging.info("Phase 2: Split the pages on the first guest") cmd = "mem.static_random_fill()" data = _execute_allocator(cmd, lvms[0], lsessions[0], fill_base_timeout * 2 * perf_ratio)[1] r_msg = data.splitlines()[-1] logging.debug("Return message of static_random_fill: %s", r_msg) out = int(r_msg.split()[4]) logging.debug("Performance: %dMB * 1000 / %dms = %dMB/s", ksm_size, out, (ksm_size * 1000 / out)) logging.debug(utils_test.get_memory_info(lvms)) logging.debug("Phase 2: PASS") def split_guest(): """ Sequential split of pages on guests up to memory limit """ logging.info("Phase 3a: Sequential split of pages on guests up to " "memory limit") last_vm = 0 session = None vm = None for i in range(1, vmsc): # Check VMs for j in range(0, vmsc): if not lvms[j].is_alive: e_msg = ("VM %d died while executing static_random_fill on" " VM %d in allocator loop" % (j, i)) test.fail(e_msg) vm = lvms[i] session = lsessions[i] cmd = "mem.static_random_fill()" logging.debug("Executing %s on ksm_overcommit_guest.py loop, " "vm: %s", cmd, vm.name) session.sendline(cmd) out = "" try: logging.debug("Watching host mem while filling vm %s memory", vm.name) while (not out.startswith("PASS") and not out.startswith("FAIL")): if not vm.is_alive(): e_msg = ("VM %d died while executing " "static_random_fill on allocator loop" % i) test.fail(e_msg) free_mem = int(utils_memory.read_from_meminfo("MemFree")) if (ksm_swap): free_mem = (free_mem + int(utils_memory.read_from_meminfo("SwapFree"))) logging.debug("Free memory on host: %d", free_mem) # We need to keep some memory for python to run. if (free_mem < 64000) or (ksm_swap and free_mem < (450000 * perf_ratio)): vm.pause() for j in range(0, i): lvms[j].destroy(gracefully=False) time.sleep(20) vm.resume() logging.debug("Only %s free memory, killing %d guests", free_mem, (i - 1)) last_vm = i out = session.read_nonblocking(0.1, 1) time.sleep(2) except OSError: logging.debug("Only %s host free memory, killing %d guests", free_mem, (i - 1)) logging.debug("Stopping %s", vm.name) vm.pause() for j in range(0, i): logging.debug("Destroying %s", lvms[j].name) lvms[j].destroy(gracefully=False) time.sleep(20) vm.resume() last_vm = i if last_vm != 0: break logging.debug("Memory filled for guest %s", vm.name) logging.info("Phase 3a: PASS") logging.info("Phase 3b: Verify memory of the max stressed VM") for i in range(last_vm + 1, vmsc): lsessions[i].close() if i == (vmsc - 1): logging.debug(utils_test.get_memory_info([lvms[i]])) logging.debug("Destroying guest %s", lvms[i].name) lvms[i].destroy(gracefully=False) # Verify last machine with randomly generated memory cmd = "mem.static_random_verify()" _execute_allocator(cmd, lvms[last_vm], lsessions[last_vm], (mem / 200 * 50 * perf_ratio)) logging.debug(utils_test.get_memory_info([lvms[last_vm]])) lsessions[last_vm].cmd_output("die()", 20) lvms[last_vm].destroy(gracefully=False) logging.info("Phase 3b: PASS") def split_parallel(): """ Parallel page spliting """ logging.info("Phase 1: parallel page spliting") # We have to wait until allocator is finished (it waits 5 seconds to # clean the socket session = lsessions[0] vm = lvms[0] for i in range(1, max_alloc): lsessions.append(vm.wait_for_login(timeout=360)) session.cmd("swapoff -a", timeout=300) for i in range(0, max_alloc): # Start the allocator _start_allocator(vm, lsessions[i], 60 * perf_ratio) logging.info("Phase 1: PASS") logging.info("Phase 2a: Simultaneous merging") logging.debug("Memory used by allocator on guests = %dMB", (ksm_size / max_alloc)) for i in range(0, max_alloc): cmd = "mem = MemFill(%d, %s, %s)" % ((ksm_size / max_alloc), skeys[i], dkeys[i]) _execute_allocator(cmd, vm, lsessions[i], 60 * perf_ratio) cmd = "mem.value_fill(%d)" % (skeys[0]) _execute_allocator(cmd, vm, lsessions[i], fill_base_timeout * perf_ratio) # Wait until ksm_overcommit_guest.py merges pages (3 * ksm_size / 3) shm = 0 i = 0 logging.debug("Target shared memory size: %s", ksm_size) while (shm < ksm_size): if i > 64: logging.debug(utils_test.get_memory_info(lvms)) test.error("SHM didn't merge the memory until DL") pause = ksm_size / 200 * perf_ratio logging.debug("Waiting %ds before proceed...", pause) time.sleep(pause) if (new_ksm): shm = get_ksmstat() else: shm = vm.get_shared_meminfo() logging.debug("Shared meminfo after attempt %s: %s", i, shm) i += 1 logging.debug(utils_test.get_memory_info([vm])) logging.info("Phase 2a: PASS") logging.info("Phase 2b: Simultaneous spliting") # Actual splitting for i in range(0, max_alloc): cmd = "mem.static_random_fill()" data = _execute_allocator(cmd, vm, lsessions[i], fill_base_timeout * perf_ratio)[1] data = data.splitlines()[-1] logging.debug(data) out = int(data.split()[4]) logging.debug("Performance: %dMB * 1000 / %dms = %dMB/s", (ksm_size / max_alloc), out, (ksm_size * 1000 / out / max_alloc)) logging.debug(utils_test.get_memory_info([vm])) logging.info("Phase 2b: PASS") logging.info("Phase 2c: Simultaneous verification") for i in range(0, max_alloc): cmd = "mem.static_random_verify()" data = _execute_allocator(cmd, vm, lsessions[i], (mem / 200 * 50 * perf_ratio))[1] logging.info("Phase 2c: PASS") logging.info("Phase 2d: Simultaneous merging") # Actual splitting for i in range(0, max_alloc): cmd = "mem.value_fill(%d)" % skeys[0] data = _execute_allocator(cmd, vm, lsessions[i], fill_base_timeout * 2 * perf_ratio)[1] logging.debug(utils_test.get_memory_info([vm])) logging.info("Phase 2d: PASS") logging.info("Phase 2e: Simultaneous verification") for i in range(0, max_alloc): cmd = "mem.value_check(%d)" % skeys[0] data = _execute_allocator(cmd, vm, lsessions[i], (mem / 200 * 50 * perf_ratio))[1] logging.info("Phase 2e: PASS") logging.info("Phase 2f: Simultaneous spliting last 96B") for i in range(0, max_alloc): cmd = "mem.static_random_fill(96)" data = _execute_allocator(cmd, vm, lsessions[i], fill_base_timeout * perf_ratio)[1] data = data.splitlines()[-1] out = int(data.split()[4]) logging.debug("Performance: %dMB * 1000 / %dms = %dMB/s", ksm_size / max_alloc, out, (ksm_size * 1000 / out / max_alloc)) logging.debug(utils_test.get_memory_info([vm])) logging.info("Phase 2f: PASS") logging.info("Phase 2g: Simultaneous verification last 96B") for i in range(0, max_alloc): cmd = "mem.static_random_verify(96)" _, data = _execute_allocator(cmd, vm, lsessions[i], (mem / 200 * 50 * perf_ratio)) logging.debug(utils_test.get_memory_info([vm])) logging.info("Phase 2g: PASS") logging.debug("Cleaning up...") for i in range(0, max_alloc): lsessions[i].cmd_output("die()", 20) session.close() vm.destroy(gracefully=False) # Main test code logging.info("Starting phase 0: Initialization") if process.run("ps -C ksmtuned", ignore_status=True).exit_status == 0: logging.info("Killing ksmtuned...") process.run("killall ksmtuned") new_ksm = False if (os.path.exists("/sys/kernel/mm/ksm/run")): process.run("echo 50 > /sys/kernel/mm/ksm/sleep_millisecs", shell=True) process.run("echo 5000 > /sys/kernel/mm/ksm/pages_to_scan", shell=True) process.run("echo 1 > /sys/kernel/mm/ksm/run", shell=True) e_up = "/sys/kernel/mm/transparent_hugepage/enabled" e_rh = "/sys/kernel/mm/redhat_transparent_hugepage/enabled" if os.path.exists(e_up): process.run("echo 'never' > %s" % e_up, shell=True) if os.path.exists(e_rh): process.run("echo 'never' > %s" % e_rh, shell=True) new_ksm = True else: try: process.run("modprobe ksm") process.run("ksmctl start 5000 100") except process.CmdError as details: test.fail("Failed to load KSM: %s" % details) # host_reserve: mem reserve kept for the host system to run host_reserve = int(params.get("ksm_host_reserve", -1)) if (host_reserve == -1): try: available = utils_memory.read_from_meminfo("MemAvailable") except process.CmdError: # ancient kernels utils_memory.drop_caches() available = utils_memory.read_from_meminfo("MemFree") # default host_reserve = UsedMem + one_minimal_guest(128MB) # later we add 64MB per additional guest host_reserve = ((utils_memory.memtotal() - available) / 1024 + 128) # using default reserve _host_reserve = True else: _host_reserve = False # guest_reserve: mem reserve kept to avoid guest OS to kill processes guest_reserve = int(params.get("ksm_guest_reserve", -1)) if (guest_reserve == -1): # default guest_reserve = minimal_system_mem(256MB) # later we add tmpfs overhead guest_reserve = 256 # using default reserve _guest_reserve = True else: _guest_reserve = False max_vms = int(params.get("max_vms", 2)) overcommit = float(params.get("ksm_overcommit_ratio", 2.0)) max_alloc = int(params.get("ksm_parallel_ratio", 1)) # vmsc: count of all used VMs vmsc = int(overcommit) + 1 vmsc = max(vmsc, max_vms) if (params['ksm_mode'] == "serial"): max_alloc = vmsc if _host_reserve: # First round of additional guest reserves host_reserve += vmsc * 64 _host_reserve = vmsc host_mem = (int(utils_memory.memtotal()) / 1024 - host_reserve) ksm_swap = False if params.get("ksm_swap") == "yes": ksm_swap = True # Performance ratio perf_ratio = params.get("ksm_perf_ratio") if perf_ratio: perf_ratio = float(perf_ratio) else: perf_ratio = 1 if (params['ksm_mode'] == "parallel"): vmsc = 1 overcommit = 1 mem = host_mem # 32bit system adjustment if "64" not in params.get("vm_arch_name"): logging.debug("Probably i386 guest architecture, " "max allocator mem = 2G") # Guest can have more than 2G but # kvm mem + 1MB (allocator itself) can't if (host_mem > 3100): mem = 3100 if os.popen("uname -i").readline().startswith("i386"): logging.debug("Host is i386 architecture, max guest mem is 2G") # Guest system with qemu overhead (64M) can't have more than 2G if mem > 3100 - 64: mem = 3100 - 64 else: # mem: Memory of the guest systems. Maximum must be less than # host's physical ram mem = int(overcommit * host_mem / vmsc) # 32bit system adjustment if not params['image_name'].endswith("64"): logging.debug("Probably i386 guest architecture, " "max allocator mem = 2G") # Guest can have more than 2G but # kvm mem + 1MB (allocator itself) can't if mem - guest_reserve - 1 > 3100: vmsc = int(math.ceil((host_mem * overcommit) / (3100 + guest_reserve))) if _host_reserve: host_reserve += (vmsc - _host_reserve) * 64 host_mem -= (vmsc - _host_reserve) * 64 _host_reserve = vmsc mem = int(math.floor(host_mem * overcommit / vmsc)) if os.popen("uname -i").readline().startswith("i386"): logging.debug("Host is i386 architecture, max guest mem is 2G") # Guest system with qemu overhead (64M) can't have more than 2G if mem > 3100 - 64: vmsc = int(math.ceil((host_mem * overcommit) / (3100 - 64.0))) if _host_reserve: host_reserve += (vmsc - _host_reserve) * 64 host_mem -= (vmsc - _host_reserve) * 64 _host_reserve = vmsc mem = int(math.floor(host_mem * overcommit / vmsc)) # 0.055 represents OS + TMPFS additional reserve per guest ram MB if _guest_reserve: guest_reserve += math.ceil(mem * 0.055) swap = int(utils_memory.read_from_meminfo("SwapTotal")) / 1024 logging.debug("Overcommit = %f", overcommit) logging.debug("True overcommit = %f ", (float(vmsc * mem) / float(host_mem))) logging.debug("Host memory = %dM", host_mem) logging.debug("Guest memory = %dM", mem) logging.debug("Using swap = %s", ksm_swap) logging.debug("Swap = %dM", swap) logging.debug("max_vms = %d", max_vms) logging.debug("Count of all used VMs = %d", vmsc) logging.debug("Performance_ratio = %f", perf_ratio) # Generate unique keys for random series skeys = [] dkeys = [] for i in range(0, max(vmsc, max_alloc)): key = random.randrange(0, 255) while key in skeys: key = random.randrange(0, 255) skeys.append(key) key = random.randrange(0, 999) while key in dkeys: key = random.randrange(0, 999) dkeys.append(key) logging.debug("skeys: %s", skeys) logging.debug("dkeys: %s", dkeys) lvms = [] lsessions = [] # As we don't know the number and memory amount of VMs in advance, # we need to specify and create them here vm_name = params["main_vm"] params['mem'] = mem params['vms'] = vm_name # Associate pidfile name params['pid_' + vm_name] = utils_misc.generate_tmp_file_name(vm_name, 'pid') if not params.get('extra_params'): params['extra_params'] = ' ' params['extra_params_' + vm_name] = params.get('extra_params') params['extra_params_' + vm_name] += (" -pidfile %s" % (params.get('pid_' + vm_name))) params['extra_params'] = params.get('extra_params_' + vm_name) # ksm_size: amount of memory used by allocator ksm_size = mem - guest_reserve logging.debug("Memory used by allocator on guests = %dM", ksm_size) fill_base_timeout = ksm_size / 10 # Creating the first guest env_process.preprocess_vm(test, params, env, vm_name) lvms.append(env.get_vm(vm_name)) if not lvms[0]: test.error("VM object not found in environment") if not lvms[0].is_alive(): test.error("VM seems to be dead; Test requires a living VM") logging.debug("Booting first guest %s", lvms[0].name) lsessions.append(lvms[0].wait_for_login(timeout=360)) # Associate vm PID try: tmp = open(params.get('pid_' + vm_name), 'r') params['pid_' + vm_name] = int(tmp.readline()) except Exception: test.fail("Could not get PID of %s" % (vm_name)) # Creating other guest systems for i in range(1, vmsc): vm_name = "vm" + str(i + 1) params['pid_' + vm_name] = utils_misc.generate_tmp_file_name(vm_name, 'pid') params['extra_params_' + vm_name] = params.get('extra_params') params['extra_params_' + vm_name] += (" -pidfile %s" % (params.get('pid_' + vm_name))) params['extra_params'] = params.get('extra_params_' + vm_name) # Last VM is later used to run more allocators simultaneously lvms.append(lvms[0].clone(vm_name, params)) env.register_vm(vm_name, lvms[i]) params['vms'] += " " + vm_name logging.debug("Booting guest %s", lvms[i].name) lvms[i].create() if not lvms[i].is_alive(): test.error("VM %s seems to be dead; Test requires a" "living VM" % lvms[i].name) lsessions.append(lvms[i].wait_for_login(timeout=360)) try: tmp = open(params.get('pid_' + vm_name), 'r') params['pid_' + vm_name] = int(tmp.readline()) except Exception: test.fail("Could not get PID of %s" % (vm_name)) # Let guests rest a little bit :-) pause = vmsc * 2 * perf_ratio logging.debug("Waiting %ds before proceed", pause) time.sleep(vmsc * 2 * perf_ratio) logging.debug(utils_test.get_memory_info(lvms)) # Copy ksm_overcommit_guest.py into guests vksmd_src = os.path.join(data_dir.get_shared_dir(), "scripts", "ksm_overcommit_guest.py") dst_dir = "/tmp" for vm in lvms: vm.copy_files_to(vksmd_src, dst_dir) logging.info("Phase 0: PASS") if params['ksm_mode'] == "parallel": logging.info("Starting KSM test parallel mode") split_parallel() logging.info("KSM test parallel mode: PASS") elif params['ksm_mode'] == "serial": logging.info("Starting KSM test serial mode") initialize_guests() separate_first_guest() split_guest() logging.info("KSM test serial mode: PASS")
def run(test, params, env): """ Tests KSM (Kernel Shared Memory) capability by allocating and filling KVM guests memory using various values. KVM sets the memory as MADV_MERGEABLE so all VM's memory can be merged. The workers in guest writes to tmpfs filesystem thus allocations are not limited by process max memory, only by VM's memory. Two test modes are supported - serial and parallel. Serial mode - uses multiple VMs, allocates memory per guest and always verifies the correct number of shared memory. 0) Prints out the setup and initialize guest(s) 1) Fills guest with the same number (S1) 2) Random fill on the first guest 3) Random fill of the remaining VMs one by one until the memory is completely filled (KVM stops machines which asks for additional memory until there is available memory) (S2, shouldn't finish) 4) Destroy all VMs but the last one 5) Checks the last VMs memory for corruption Parallel mode - uses one VM with multiple allocator workers. Executes scenarios in parallel to put more stress on the KVM. 0) Prints out the setup and initialize guest(s) 1) Fills memory with the same number (S1) 2) Fills memory with random numbers (S2) 3) Verifies all pages 4) Fills memory with the same number (S2) 5) Changes the last 96B (S3) Scenarios: S1) Fill all vms with the same value (all pages should be merged into 1) S2) Random fill (all pages should be splitted) S3) Fill last 96B (change only last 96B of each page; some pages will be merged; there was a bug with data corruption) Every worker has unique random key so we are able to verify the filled values. :param test: kvm test object. :param params: Dictionary with test parameters. :param env: Dictionary with the test environment. :param cfg: ksm_swap - use swap? :param cfg: ksm_overcommit_ratio - memory overcommit (serial mode only) :param cfg: ksm_parallel_ratio - number of workers (parallel mode only) :param cfg: ksm_host_reserve - override memory reserve on host in MB :param cfg: ksm_guest_reserve - override memory reserve on guests in MB :param cfg: ksm_mode - test mode {serial, parallel} :param cfg: ksm_perf_ratio - performance ratio, increase it when your machine is too slow """ def _start_allocator(vm, session, timeout): """ Execute ksm_overcommit_guest.py on guest, wait until it's initialized. :param vm: VM object. :param session: Remote session to a VM object. :param timeout: Timeout that will be used to verify if ksm_overcommit_guest.py started properly. """ logging.debug("Starting ksm_overcommit_guest.py on guest %s", vm.name) session.sendline("python /tmp/ksm_overcommit_guest.py") try: session.read_until_last_line_matches(["PASS:"******"FAIL:"], timeout) except aexpect.ExpectProcessTerminatedError as details: e_msg = ("Command ksm_overcommit_guest.py on vm '%s' failed: %s" % (vm.name, str(details))) test.fail(e_msg) def _execute_allocator(command, vm, session, timeout): """ Execute a given command on ksm_overcommit_guest.py main loop, indicating the vm the command was executed on. :param command: Command that will be executed. :param vm: VM object. :param session: Remote session to VM object. :param timeout: Timeout used to verify expected output. :return: Tuple (match index, data) """ logging.debug( "Executing '%s' on ksm_overcommit_guest.py loop, " "vm: %s, timeout: %s", command, vm.name, timeout) session.sendline(command) try: (match, data) = session.read_until_last_line_matches(["PASS:"******"FAIL:"], timeout) except aexpect.ExpectProcessTerminatedError as details: e_msg = ("Failed to execute command '%s' on " "ksm_overcommit_guest.py, vm '%s': %s" % (command, vm.name, str(details))) test.fail(e_msg) return (match, data) def get_ksmstat(): """ Return sharing memory by ksm in MB :return: memory in MB """ fpages = open('/sys/kernel/mm/ksm/pages_sharing') ksm_pages = int(fpages.read()) fpages.close() return ((ksm_pages * 4096) / 1e6) def initialize_guests(): """ Initialize guests (fill their memories with specified patterns). """ logging.info("Phase 1: filling guest memory pages") for session in lsessions: vm = lvms[lsessions.index(session)] logging.debug("Turning off swap on vm %s", vm.name) session.cmd("swapoff -a", timeout=300) # Start the allocator _start_allocator(vm, session, 60 * perf_ratio) # Execute allocator on guests for i in range(0, vmsc): vm = lvms[i] cmd = "mem = MemFill(%d, %s, %s)" % (ksm_size, skeys[i], dkeys[i]) _execute_allocator(cmd, vm, lsessions[i], 60 * perf_ratio) cmd = "mem.value_fill(%d)" % skeys[0] _execute_allocator(cmd, vm, lsessions[i], fill_base_timeout * 2 * perf_ratio) # Let ksm_overcommit_guest.py do its job # (until shared mem reaches expected value) shm = 0 j = 0 logging.debug("Target shared meminfo for guest %s: %s", vm.name, ksm_size) while ((new_ksm and (shm < (ksm_size * (i + 1)))) or (not new_ksm and (shm < (ksm_size)))): if j > 64: logging.debug(utils_test.get_memory_info(lvms)) test.error("SHM didn't merge the memory until " "the DL on guest: %s" % vm.name) pause = ksm_size / 200 * perf_ratio logging.debug("Waiting %ds before proceeding...", pause) time.sleep(pause) if (new_ksm): shm = get_ksmstat() else: shm = vm.get_shared_meminfo() logging.debug( "Shared meminfo for guest %s after " "iteration %s: %s", vm.name, j, shm) j += 1 # Keep some reserve pause = ksm_size / 200 * perf_ratio logging.debug("Waiting %ds before proceeding...", pause) time.sleep(pause) logging.debug(utils_test.get_memory_info(lvms)) logging.info("Phase 1: PASS") def separate_first_guest(): """ Separate memory of the first guest by generating special random series """ logging.info("Phase 2: Split the pages on the first guest") cmd = "mem.static_random_fill()" data = _execute_allocator(cmd, lvms[0], lsessions[0], fill_base_timeout * 2 * perf_ratio)[1] r_msg = data.splitlines()[-1] logging.debug("Return message of static_random_fill: %s", r_msg) out = int(r_msg.split()[4]) logging.debug("Performance: %dMB * 1000 / %dms = %dMB/s", ksm_size, out, (ksm_size * 1000 / out)) logging.debug(utils_test.get_memory_info(lvms)) logging.debug("Phase 2: PASS") def split_guest(): """ Sequential split of pages on guests up to memory limit """ logging.info("Phase 3a: Sequential split of pages on guests up to " "memory limit") last_vm = 0 session = None vm = None for i in range(1, vmsc): # Check VMs for j in range(0, vmsc): if not lvms[j].is_alive: e_msg = ("VM %d died while executing static_random_fill on" " VM %d in allocator loop" % (j, i)) test.fail(e_msg) vm = lvms[i] session = lsessions[i] cmd = "mem.static_random_fill()" logging.debug( "Executing %s on ksm_overcommit_guest.py loop, " "vm: %s", cmd, vm.name) session.sendline(cmd) out = "" try: logging.debug("Watching host mem while filling vm %s memory", vm.name) while (not out.startswith("PASS") and not out.startswith("FAIL")): if not vm.is_alive(): e_msg = ("VM %d died while executing " "static_random_fill on allocator loop" % i) test.fail(e_msg) free_mem = int(utils_memory.read_from_meminfo("MemFree")) if (ksm_swap): free_mem = ( free_mem + int(utils_memory.read_from_meminfo("SwapFree"))) logging.debug("Free memory on host: %d", free_mem) # We need to keep some memory for python to run. if (free_mem < 64000) or (ksm_swap and free_mem < (450000 * perf_ratio)): vm.pause() for j in range(0, i): lvms[j].destroy(gracefully=False) time.sleep(20) vm.resume() logging.debug("Only %s free memory, killing %d guests", free_mem, (i - 1)) last_vm = i out = session.read_nonblocking(0.1, 1) time.sleep(2) except OSError: logging.debug("Only %s host free memory, killing %d guests", free_mem, (i - 1)) logging.debug("Stopping %s", vm.name) vm.pause() for j in range(0, i): logging.debug("Destroying %s", lvms[j].name) lvms[j].destroy(gracefully=False) time.sleep(20) vm.resume() last_vm = i if last_vm != 0: break logging.debug("Memory filled for guest %s", vm.name) logging.info("Phase 3a: PASS") logging.info("Phase 3b: Verify memory of the max stressed VM") for i in range(last_vm + 1, vmsc): lsessions[i].close() if i == (vmsc - 1): logging.debug(utils_test.get_memory_info([lvms[i]])) logging.debug("Destroying guest %s", lvms[i].name) lvms[i].destroy(gracefully=False) # Verify last machine with randomly generated memory cmd = "mem.static_random_verify()" _execute_allocator(cmd, lvms[last_vm], lsessions[last_vm], (mem / 200 * 50 * perf_ratio)) logging.debug(utils_test.get_memory_info([lvms[last_vm]])) lsessions[last_vm].cmd_output("die()", 20) lvms[last_vm].destroy(gracefully=False) logging.info("Phase 3b: PASS") def split_parallel(): """ Parallel page spliting """ logging.info("Phase 1: parallel page spliting") # We have to wait until allocator is finished (it waits 5 seconds to # clean the socket session = lsessions[0] vm = lvms[0] for i in range(1, max_alloc): lsessions.append(vm.wait_for_login(timeout=360)) session.cmd("swapoff -a", timeout=300) for i in range(0, max_alloc): # Start the allocator _start_allocator(vm, lsessions[i], 60 * perf_ratio) logging.info("Phase 1: PASS") logging.info("Phase 2a: Simultaneous merging") logging.debug("Memory used by allocator on guests = %dMB", (ksm_size / max_alloc)) for i in range(0, max_alloc): cmd = "mem = MemFill(%d, %s, %s)" % ( (ksm_size / max_alloc), skeys[i], dkeys[i]) _execute_allocator(cmd, vm, lsessions[i], 60 * perf_ratio) cmd = "mem.value_fill(%d)" % (skeys[0]) _execute_allocator(cmd, vm, lsessions[i], fill_base_timeout * perf_ratio) # Wait until ksm_overcommit_guest.py merges pages (3 * ksm_size / 3) shm = 0 i = 0 logging.debug("Target shared memory size: %s", ksm_size) while (shm < ksm_size): if i > 64: logging.debug(utils_test.get_memory_info(lvms)) test.error("SHM didn't merge the memory until DL") pause = ksm_size / 200 * perf_ratio logging.debug("Waiting %ds before proceed...", pause) time.sleep(pause) if (new_ksm): shm = get_ksmstat() else: shm = vm.get_shared_meminfo() logging.debug("Shared meminfo after attempt %s: %s", i, shm) i += 1 logging.debug(utils_test.get_memory_info([vm])) logging.info("Phase 2a: PASS") logging.info("Phase 2b: Simultaneous spliting") # Actual splitting for i in range(0, max_alloc): cmd = "mem.static_random_fill()" data = _execute_allocator(cmd, vm, lsessions[i], fill_base_timeout * perf_ratio)[1] data = data.splitlines()[-1] logging.debug(data) out = int(data.split()[4]) logging.debug("Performance: %dMB * 1000 / %dms = %dMB/s", (ksm_size / max_alloc), out, (ksm_size * 1000 / out / max_alloc)) logging.debug(utils_test.get_memory_info([vm])) logging.info("Phase 2b: PASS") logging.info("Phase 2c: Simultaneous verification") for i in range(0, max_alloc): cmd = "mem.static_random_verify()" data = _execute_allocator(cmd, vm, lsessions[i], (mem / 200 * 50 * perf_ratio))[1] logging.info("Phase 2c: PASS") logging.info("Phase 2d: Simultaneous merging") # Actual splitting for i in range(0, max_alloc): cmd = "mem.value_fill(%d)" % skeys[0] data = _execute_allocator(cmd, vm, lsessions[i], fill_base_timeout * 2 * perf_ratio)[1] logging.debug(utils_test.get_memory_info([vm])) logging.info("Phase 2d: PASS") logging.info("Phase 2e: Simultaneous verification") for i in range(0, max_alloc): cmd = "mem.value_check(%d)" % skeys[0] data = _execute_allocator(cmd, vm, lsessions[i], (mem / 200 * 50 * perf_ratio))[1] logging.info("Phase 2e: PASS") logging.info("Phase 2f: Simultaneous spliting last 96B") for i in range(0, max_alloc): cmd = "mem.static_random_fill(96)" data = _execute_allocator(cmd, vm, lsessions[i], fill_base_timeout * perf_ratio)[1] data = data.splitlines()[-1] out = int(data.split()[4]) logging.debug("Performance: %dMB * 1000 / %dms = %dMB/s", ksm_size / max_alloc, out, (ksm_size * 1000 / out / max_alloc)) logging.debug(utils_test.get_memory_info([vm])) logging.info("Phase 2f: PASS") logging.info("Phase 2g: Simultaneous verification last 96B") for i in range(0, max_alloc): cmd = "mem.static_random_verify(96)" _, data = _execute_allocator(cmd, vm, lsessions[i], (mem / 200 * 50 * perf_ratio)) logging.debug(utils_test.get_memory_info([vm])) logging.info("Phase 2g: PASS") logging.debug("Cleaning up...") for i in range(0, max_alloc): lsessions[i].cmd_output("die()", 20) session.close() vm.destroy(gracefully=False) # Main test code logging.info("Starting phase 0: Initialization") if process.run("ps -C ksmtuned", ignore_status=True).exit_status == 0: logging.info("Killing ksmtuned...") process.run("killall ksmtuned") new_ksm = False if (os.path.exists("/sys/kernel/mm/ksm/run")): process.run("echo 50 > /sys/kernel/mm/ksm/sleep_millisecs", shell=True) process.run("echo 5000 > /sys/kernel/mm/ksm/pages_to_scan", shell=True) process.run("echo 1 > /sys/kernel/mm/ksm/run", shell=True) e_up = "/sys/kernel/mm/transparent_hugepage/enabled" e_rh = "/sys/kernel/mm/redhat_transparent_hugepage/enabled" if os.path.exists(e_up): process.run("echo 'never' > %s" % e_up, shell=True) if os.path.exists(e_rh): process.run("echo 'never' > %s" % e_rh, shell=True) new_ksm = True else: try: process.run("modprobe ksm") process.run("ksmctl start 5000 100") except process.CmdError as details: test.fail("Failed to load KSM: %s" % details) # host_reserve: mem reserve kept for the host system to run host_reserve = int(params.get("ksm_host_reserve", -1)) if (host_reserve == -1): try: available = utils_memory.read_from_meminfo("MemAvailable") except process.CmdError: # ancient kernels utils_memory.drop_caches() available = utils_memory.read_from_meminfo("MemFree") # default host_reserve = UsedMem + one_minimal_guest(128MB) # later we add 64MB per additional guest host_reserve = ((utils_memory.memtotal() - available) / 1024 + 128) # using default reserve _host_reserve = True else: _host_reserve = False # guest_reserve: mem reserve kept to avoid guest OS to kill processes guest_reserve = int(params.get("ksm_guest_reserve", -1)) if (guest_reserve == -1): # default guest_reserve = minimal_system_mem(256MB) # later we add tmpfs overhead guest_reserve = 256 # using default reserve _guest_reserve = True else: _guest_reserve = False max_vms = int(params.get("max_vms", 2)) overcommit = float(params.get("ksm_overcommit_ratio", 2.0)) max_alloc = int(params.get("ksm_parallel_ratio", 1)) # vmsc: count of all used VMs vmsc = int(overcommit) + 1 vmsc = max(vmsc, max_vms) if (params['ksm_mode'] == "serial"): max_alloc = vmsc if _host_reserve: # First round of additional guest reserves host_reserve += vmsc * 64 _host_reserve = vmsc host_mem = (int(utils_memory.memtotal()) / 1024 - host_reserve) ksm_swap = False if params.get("ksm_swap") == "yes": ksm_swap = True # Performance ratio perf_ratio = params.get("ksm_perf_ratio") if perf_ratio: perf_ratio = float(perf_ratio) else: perf_ratio = 1 if (params['ksm_mode'] == "parallel"): vmsc = 1 overcommit = 1 mem = host_mem # 32bit system adjustment if "64" not in params.get("vm_arch_name"): logging.debug("Probably i386 guest architecture, " "max allocator mem = 2G") # Guest can have more than 2G but # kvm mem + 1MB (allocator itself) can't if (host_mem > 3100): mem = 3100 if os.popen("uname -i").readline().startswith("i386"): logging.debug("Host is i386 architecture, max guest mem is 2G") # Guest system with qemu overhead (64M) can't have more than 2G if mem > 3100 - 64: mem = 3100 - 64 else: # mem: Memory of the guest systems. Maximum must be less than # host's physical ram mem = int(overcommit * host_mem / vmsc) # 32bit system adjustment if not params['image_name'].endswith("64"): logging.debug("Probably i386 guest architecture, " "max allocator mem = 2G") # Guest can have more than 2G but # kvm mem + 1MB (allocator itself) can't if mem - guest_reserve - 1 > 3100: vmsc = int( math.ceil( (host_mem * overcommit) / (3100 + guest_reserve))) if _host_reserve: host_reserve += (vmsc - _host_reserve) * 64 host_mem -= (vmsc - _host_reserve) * 64 _host_reserve = vmsc mem = int(math.floor(host_mem * overcommit / vmsc)) if os.popen("uname -i").readline().startswith("i386"): logging.debug("Host is i386 architecture, max guest mem is 2G") # Guest system with qemu overhead (64M) can't have more than 2G if mem > 3100 - 64: vmsc = int(math.ceil((host_mem * overcommit) / (3100 - 64.0))) if _host_reserve: host_reserve += (vmsc - _host_reserve) * 64 host_mem -= (vmsc - _host_reserve) * 64 _host_reserve = vmsc mem = int(math.floor(host_mem * overcommit / vmsc)) # 0.055 represents OS + TMPFS additional reserve per guest ram MB if _guest_reserve: guest_reserve += math.ceil(mem * 0.055) swap = int(utils_memory.read_from_meminfo("SwapTotal")) / 1024 logging.debug("Overcommit = %f", overcommit) logging.debug("True overcommit = %f ", (float(vmsc * mem) / float(host_mem))) logging.debug("Host memory = %dM", host_mem) logging.debug("Guest memory = %dM", mem) logging.debug("Using swap = %s", ksm_swap) logging.debug("Swap = %dM", swap) logging.debug("max_vms = %d", max_vms) logging.debug("Count of all used VMs = %d", vmsc) logging.debug("Performance_ratio = %f", perf_ratio) # Generate unique keys for random series skeys = [] dkeys = [] for i in range(0, max(vmsc, max_alloc)): key = random.randrange(0, 255) while key in skeys: key = random.randrange(0, 255) skeys.append(key) key = random.randrange(0, 999) while key in dkeys: key = random.randrange(0, 999) dkeys.append(key) logging.debug("skeys: %s", skeys) logging.debug("dkeys: %s", dkeys) lvms = [] lsessions = [] # As we don't know the number and memory amount of VMs in advance, # we need to specify and create them here vm_name = params["main_vm"] params['mem'] = mem params['vms'] = vm_name # Associate pidfile name params['pid_' + vm_name] = utils_misc.generate_tmp_file_name( vm_name, 'pid') if not params.get('extra_params'): params['extra_params'] = ' ' params['extra_params_' + vm_name] = params.get('extra_params') params['extra_params_' + vm_name] += (" -pidfile %s" % (params.get('pid_' + vm_name))) params['extra_params'] = params.get('extra_params_' + vm_name) # ksm_size: amount of memory used by allocator ksm_size = mem - guest_reserve logging.debug("Memory used by allocator on guests = %dM", ksm_size) fill_base_timeout = ksm_size / 10 # Creating the first guest env_process.preprocess_vm(test, params, env, vm_name) lvms.append(env.get_vm(vm_name)) if not lvms[0]: test.error("VM object not found in environment") if not lvms[0].is_alive(): test.error("VM seems to be dead; Test requires a living VM") logging.debug("Booting first guest %s", lvms[0].name) lsessions.append(lvms[0].wait_for_login(timeout=360)) # Associate vm PID try: tmp = open(params.get('pid_' + vm_name), 'r') params['pid_' + vm_name] = int(tmp.readline()) except Exception: test.fail("Could not get PID of %s" % (vm_name)) # Creating other guest systems for i in range(1, vmsc): vm_name = "vm" + str(i + 1) params['pid_' + vm_name] = utils_misc.generate_tmp_file_name( vm_name, 'pid') params['extra_params_' + vm_name] = params.get('extra_params') params['extra_params_' + vm_name] += (" -pidfile %s" % (params.get('pid_' + vm_name))) params['extra_params'] = params.get('extra_params_' + vm_name) # Last VM is later used to run more allocators simultaneously lvms.append(lvms[0].clone(vm_name, params)) env.register_vm(vm_name, lvms[i]) params['vms'] += " " + vm_name logging.debug("Booting guest %s", lvms[i].name) lvms[i].create() if not lvms[i].is_alive(): test.error("VM %s seems to be dead; Test requires a" "living VM" % lvms[i].name) lsessions.append(lvms[i].wait_for_login(timeout=360)) try: tmp = open(params.get('pid_' + vm_name), 'r') params['pid_' + vm_name] = int(tmp.readline()) except Exception: test.fail("Could not get PID of %s" % (vm_name)) # Let guests rest a little bit :-) pause = vmsc * 2 * perf_ratio logging.debug("Waiting %ds before proceed", pause) time.sleep(vmsc * 2 * perf_ratio) logging.debug(utils_test.get_memory_info(lvms)) # Copy ksm_overcommit_guest.py into guests vksmd_src = os.path.join(data_dir.get_shared_dir(), "scripts", "ksm_overcommit_guest.py") dst_dir = "/tmp" for vm in lvms: vm.copy_files_to(vksmd_src, dst_dir) logging.info("Phase 0: PASS") if params['ksm_mode'] == "parallel": logging.info("Starting KSM test parallel mode") split_parallel() logging.info("KSM test parallel mode: PASS") elif params['ksm_mode'] == "serial": logging.info("Starting KSM test serial mode") initialize_guests() separate_first_guest() split_guest() logging.info("KSM test serial mode: PASS")
def run_virsh_nodememstats(test, params, env): """ Test the command virsh nodememstats (1) Call the virsh nodememstats command (2) Get the output (3) Check the against /proc/meminfo output (4) Call the virsh nodememstats command with an unexpected option (5) Call the virsh nodememstats command with libvirtd service stop """ # Initialize the variables expected = {} actual = {} deltas = [] name_stats = ['total', 'free', 'buffers', 'cached'] itr = int(params.get("itr")) def virsh_check_nodememtats(actual_stats, expected_stats, delta): """ Check the nodememstats output value with /proc/meminfo value """ delta_stats = {} for name in name_stats: delta_stats[name] = abs(actual_stats[name] - expected_stats[name]) if 'total' in name: if not delta_stats[name] == 0: raise error.TestFail("Command 'virsh nodememstats' not" " succeeded as the value for %s is " "deviated by %d\nThe total memory " "value is deviating-check" % (name, delta_stats[name])) else: if delta_stats[name] > delta: raise error.TestFail("Command 'virsh nodememstats' not " "succeeded as the value for %s" " is deviated by %d" % (name, delta_stats[name])) return delta_stats # Prepare libvirtd service check_libvirtd = params.has_key("libvirtd") if check_libvirtd: libvirtd = params.get("libvirtd") if libvirtd == "off": utils_libvirtd.libvirtd_stop() # Get the option for the test case option = params.get("virsh_nodememstats_options") # Run test case for 10 iterations # (default can be changed in subtests.cfg file) # and print the final statistics for i in range(itr): output = virsh.nodememstats(option) # Get the status of the virsh command executed status = output.exit_status # Get status_error option for the test case status_error = params.get("status_error") if status_error == "yes": if status == 0: if libvirtd == "off": utils_libvirtd.libvirtd_start() raise error.TestFail("Command 'virsh nodememstats' " "succeeded with libvirtd service" " stopped, incorrect") else: raise error.TestFail("Command 'virsh nodememstats %s' " "succeeded (incorrect command)" % option) elif status_error == "no": if status == 0: # From the beginning of a line, group 1 is one or # more word-characters, followed by zero or more # whitespace characters and a ':', then one or # more whitespace characters, followed by group 2, # which is one or more digit characters, # then one or more whitespace characters followed by # a literal 'kB' or 'KiB' sequence, e.g as below # total : 3809340 kB # total : 3809340 KiB # Normalise the value to MBs regex_obj = re.compile(r"^(\w+)\s*:\s+(\d+)\s\w+") expected = {} for line in output.stdout.split('\n'): match_obj = regex_obj.search(line) # Due to the extra space in the list if match_obj is not None: name = match_obj.group(1) value = match_obj.group(2) expected[name] = int(value) / 1024 # Get the actual value from /proc/meminfo and normalise to MBs actual['total'] = int(utils_memory.memtotal()) / 1024 actual['free'] = int(utils_memory.freememtotal()) / 1024 actual['buffers'] = int( utils_memory.read_from_meminfo('Buffers')) / 1024 actual['cached'] = int( utils_memory.read_from_meminfo('Cached')) / 1024 # Currently the delta value is kept at 200 MB this can be # tuned based on the accuracy # Check subtests.cfg for more details delta = int(params.get("delta")) output = virsh_check_nodememtats(actual, expected, delta) deltas.append(output) else: raise error.TestFail("Command virsh nodememstats %s not " "succeeded:\n%s" % (option, status)) # Recover libvirtd service start if libvirtd == "off": utils_libvirtd.libvirtd_start() # Print the deviated values for all iterations if status_error == "no": logging.debug("The following is the deviations from " "the actual(/proc/meminfo) and expected" " value(output of virsh nodememstats)") for i in range(itr): logging.debug("iteration %d:", i) for index, name in enumerate(name_stats): logging.debug("%19s : %d", name, deltas[i][name])
def run(test, params, env): """ Test migration under stress. """ vm_names = params.get("migration_vms").split() if len(vm_names) < 2: raise exceptions.TestSkipError("Provide enough vms for migration") src_uri = libvirt_vm.complete_uri(params.get("migrate_source_host", "EXAMPLE")) if src_uri.count('///') or src_uri.count('EXAMPLE'): raise exceptions.TestSkipError("The src_uri '%s' is invalid" % src_uri) dest_uri = libvirt_vm.complete_uri(params.get("migrate_dest_host", "EXAMPLE")) if dest_uri.count('///') or dest_uri.count('EXAMPLE'): raise exceptions.TestSkipError("The dest_uri '%s' is invalid" % dest_uri) # Params for NFS and SSH setup params["server_ip"] = params.get("migrate_dest_host") params["server_user"] = "******" params["server_pwd"] = params.get("migrate_dest_pwd") params["client_ip"] = params.get("migrate_source_host") params["client_user"] = "******" params["client_pwd"] = params.get("migrate_source_pwd") params["nfs_client_ip"] = params.get("migrate_dest_host") params["nfs_server_ip"] = params.get("migrate_source_host") # Configure NFS client on remote host nfs_client = nfs.NFSClient(params) nfs_client.setup() # Migrated vms' instance vms = [] for vm_name in vm_names: vms.append(libvirt_vm.VM(vm_name, params, test.bindir, env.get("address_cache"))) load_vm_names = params.get("load_vms").split() # vms for load load_vms = [] for vm_name in load_vm_names: load_vms.append(libvirt_vm.VM(vm_name, params, test.bindir, env.get("address_cache"))) params['load_vms'] = load_vms cpu = int(params.get("smp", 1)) memory = int(params.get("mem")) * 1024 stress_type = params.get("migration_stress_type") vm_bytes = params.get("stress_vm_bytes") stress_args = params.get("stress_args") migration_type = params.get("migration_type") start_migration_vms = "yes" == params.get("start_migration_vms", "yes") thread_timeout = int(params.get("thread_timeout", 120)) remote_host = params.get("migrate_dest_host") username = params.get("migrate_dest_user", "root") password = params.get("migrate_dest_pwd") prompt = params.get("shell_prompt", r"[\#\$]") # Set vm_bytes for start_cmd mem_total = utils_memory.memtotal() vm_reserved = len(vms) * memory if vm_bytes == "half": vm_bytes = (mem_total - vm_reserved) / 2 elif vm_bytes == "shortage": vm_bytes = mem_total - vm_reserved + 524288 if vm_bytes is not None: params["stress_args"] = stress_args % vm_bytes for vm in vms: # Keep vm dead for edit if vm.is_alive(): vm.destroy() set_cpu_memory(vm.name, cpu, memory) try: vm_ipaddr = {} if start_migration_vms: for vm in vms: vm.start() vm.wait_for_login() vm_ipaddr[vm.name] = vm.get_address() # TODO: recover vm if start failed? # Config ssh autologin for remote host ssh_key.setup_ssh_key(remote_host, username, password, port=22) do_stress_migration(vms, src_uri, dest_uri, stress_type, migration_type, params, thread_timeout) # Check network of vms on destination if start_migration_vms and migration_type != "cross": for vm in vms: utils_test.check_dest_vm_network(vm, vm_ipaddr[vm.name], remote_host, username, password, prompt) finally: logging.debug("Cleanup vms...") for vm_name in vm_names: vm = libvirt_vm.VM(vm_name, params, test.bindir, env.get("address_cache")) utlv.MigrationTest().cleanup_dest_vm(vm, None, dest_uri) if vm.is_alive(): vm.destroy(gracefully=False) if nfs_client: logging.info("Cleanup NFS client environment...") nfs_client.cleanup() env.clean_objects()