def get_hugepage_pod_file(): """ Fixture used to return the hugepage deployment file - Get the compute-0 if exist, else standby controller - Check 2M hugepages configured, elsif check 1G is configured else lock,configure 2G of 1G hugepages and unlock host - Call modify_yaml function to modify the yaml file with the values - Modified file scps to host to deploy hugepages pod - Deletes the hugepages pod from the host after the test """ if system_helper.is_aio_duplex(): hostname = system_helper.get_standby_controller_name() else: hostname = system_helper.get_hypervisors()[0] LOG.fixture_step("Checking hugepage values on {}".format(hostname)) proc_id = 0 out = host_helper.get_host_memories(hostname, ('app_hp_avail_2M', 'app_hp_avail_1G'), proc_id) if out[proc_id][0] > 0: hugepage_val = "{}Mi".format(out[proc_id][0]) hugepage_str = "hugepages-2Mi" elif out[proc_id][1] > 0: hugepage_val = "{}Gi".format(out[proc_id][1]) hugepage_str = "hugepages-1Gi" else: hugepage_val = "{}Gi".format(2) cmd = "{} -1G {}".format(proc_id, 2) hugepage_str = "hugepages-1Gi" HostsToRecover.add(hostname) host_helper.lock_host(hostname) LOG.fixture_step("Configuring hugepage values {} on {}".format( hugepage_val, hostname)) cli.system('host-memory-modify {} {}'.format(hostname, cmd), ssh_client=None, auth_info=Tenant.get('admin_platform')) host_helper.unlock_host(hostname) LOG.fixture_step("{} {} pod will be configured on {} proc id {}".format( hugepage_str, hugepage_val, hostname, proc_id)) file_dir, file_name = modify_yaml("utils/test_files/", "hugepages_pod.yaml", hugepage_str, hugepage_val) source_path = "{}/{}".format(file_dir, file_name) home_dir = HostLinuxUser.get_home() common.scp_from_localhost_to_active_controller(source_path, dest_path=home_dir) yield file_name LOG.fixture_step("Delete hugepages pod") kube_helper.delete_resources(resource_names="hugepages-pod")
def get_hosts_to_configure(candidates): hosts_selected = [None, None] hosts_to_configure = [None, None] expt_4k, expt_1g = VM_MEM_GIB * 1048576 / 4, VM_MEM_GIB headers = list(MEMPAGE_HEADERS) + ['mem_avail(MiB)'] final_candidates = list(candidates) for host in candidates: host_mems = host_helper.get_host_memories(host, headers=headers) if 1 not in host_mems: LOG.info("{} has only 1 processor".format(host)) continue proc0_mems, proc1_mems = host_mems[0], host_mems[1] p0_4k, p1_4k, p0_1g, p1_1g = proc0_mems[0], proc1_mems[0], proc0_mems[ 2], proc1_mems[2] if p0_4k < expt_4k and p0_1g < expt_1g: if not hosts_selected[1] and p1_4k >= expt_4k and p1_1g < expt_1g: hosts_selected[1] = host elif not hosts_selected[0] and p1_4k < expt_4k and p1_1g >= expt_1g: hosts_selected[0] = host if host not in hosts_selected: p1_avail, p0_avail = proc1_mems[-1], proc0_mems[-1] if p1_avail / 1024 * 0.1 >= VM_MEM_GIB or p0_avail / 1024 * 0.1 >= VM_MEM_GIB: final_candidates.remove(host) if None not in hosts_selected: LOG.info( "1G and 4k hosts already configured and selected: {}".format( hosts_selected)) break else: for i in range(len(hosts_selected)): if hosts_selected[i] is None: possible_hosts = list( set(final_candidates) - set(hosts_selected)) if not possible_hosts: skip('No host suitable to be reconfigured') hosts_selected[i] = \ hosts_to_configure[i] = possible_hosts[0] LOG.info("Hosts selected: {}; To be configured: {}".format( hosts_selected, hosts_to_configure)) return hosts_selected, hosts_to_configure
def check_mempage_change(vm, host, prev_host_mems, mempage_size=None, mem_gib=VM_MEM_GIB, numa_node=None, timeout=360): expt_mempage_indics = _get_expt_indices(mempage_size) if numa_node is None: numa_node = vm_helper.get_vm_numa_nodes_via_ps(vm_id=vm, host=host)[0] prev_host_mems = prev_host_mems[numa_node] end_time = time.time() + timeout + 30 while time.time() < end_time: current_host_mems = host_helper.get_host_memories( host, headers=MEMPAGE_HEADERS)[numa_node] if 0 in expt_mempage_indics: if current_host_mems[1:] == prev_host_mems[1:] and \ abs(prev_host_mems[0] - current_host_mems[0]) <= mem_gib*512*1024/4: return for i in expt_mempage_indics: if i == 0: continue expt_pagediff = mem_gib if i == 2 else mem_gib * 1024 / 2 if prev_host_mems[i] - expt_pagediff == current_host_mems[i]: LOG.info( "{} {} memory page reduced by {}GiB as expected".format( host, MEMPAGE_HEADERS[i], mem_gib)) return LOG.info("{} {} memory pages - Previous: {}, current: {}".format( host, MEMPAGE_HEADERS[i], prev_host_mems[i], current_host_mems[i])) time.sleep(30) assert 0, "{} available vm {} memory page count did not change to expected within {}s".format( host, mempage_size, timeout)
def check_host_cpu_and_memory(host, expt_shared_cpu, expt_1g_page): """ Check host cpu and memory configs via sysinv cli Args: host: expt_shared_cpu (dict): {<proc>: <shared_core_count>, ...} expt_1g_page (dict): {<proc>: <page_count>, ...} Returns: """ LOG.info("Check {} shared core config: {}".format(host, expt_shared_cpu)) shared_cores_ = host_helper.get_host_cpu_cores_for_function(hostname=host, func='shared') for proc in expt_shared_cpu: assert len(shared_cores_[proc]) == expt_shared_cpu[proc], "Actual shared cpu count is different than expected" LOG.info("Check {} 1g page config: {}".format(host, expt_1g_page)) mempages_1g = host_helper.get_host_memories(host, headers=('app_hp_total_1G',)) for proc in expt_1g_page: assert mempages_1g[proc][0] == expt_1g_page[proc], "Actual 1g page is differnt than expected"
def is_host_mem_sufficient(host, mempage_size=None, mem_gib=1): host_mems_per_proc = host_helper.get_host_memories(host, headers=MEMPAGE_HEADERS) mempage_size = 'small' if not mempage_size else mempage_size expt_mempage_indices = _get_expt_indices(mempage_size) for proc, mems_for_proc in host_mems_per_proc.items(): pages_4k, pages_2m, pages_1g = mems_for_proc mems_for_proc = (int(pages_4k * 4 / 1048576), int(pages_2m * 2 / 1024), int(pages_1g)) for index in expt_mempage_indices: avail_g_for_memsize = mems_for_proc[index] if avail_g_for_memsize >= mem_gib: LOG.info("{} has sufficient {} mempages to launch {}G " "vm".format(host, mempage_size, mem_gib)) return True, host_mems_per_proc LOG.info("{} does not have sufficient {} mempages to launch {}G " "vm".format(host, mempage_size, mem_gib)) return False, host_mems_per_proc
def check_meminfo_via_sysinv_nova_cli(host, headers): end_time = time.time() + 306 err = None while time.time() < end_time: sysinv_mems = host_helper.get_host_memories(host, headers, rtn_dict=False) proc_vars = [] for proc in range(len(sysinv_mems)): proc_mems = sysinv_mems[proc] vs_size, vs_page, vm_4k, vm_2m, vm_1g, vm_avail_2m, vm_avail_1g = proc_mems proc_vars.append((vs_size, vs_page, vm_4k, vm_2m, vm_1g, vm_avail_2m, vm_avail_1g)) else: LOG.info( "{} mempage info in system host-memory-list is in-sync with " "nova hypervisor-show".format(host)) return proc_vars assert 0, err
def _wait_for_all_app_hp_avail(host, timeout=360): headers = [ 'app_hp_total_2M', 'app_hp_avail_2M', 'app_hp_total_1G', 'app_hp_avail_1G' ] end_time = time.time() + timeout + 20 while time.time() < end_time: mems = host_helper.get_host_memories(host, headers=headers) for proc, mems_for_proc in mems.items(): total_2m, avail_2m, total_1g, avail_1g = mems_for_proc if total_2m != avail_2m or total_1g != avail_1g: break else: LOG.info('All app huge pages are available on {}'.format(host)) return time.sleep(20) assert 0, 'app_hp_total is not the same as app_hp_avail within {}s'.format( timeout)
def ensure_sufficient_4k_pages(request): """ Check if there is enough 4k pages on any compute node on any processors is a bit hassle Returns: """ # check if any 4k pages greater than 600000 means more than 2G(~536871 4k pages) total. storage_backing = request.param hypervisors = host_helper.get_hosts_in_storage_backing( storage_backing=storage_backing) if len(hypervisors) < 2: skip("Less than two hypersvisors with {} instance backing".format( storage_backing)) hypervisors = hypervisors[:2] LOG.fixture_step( "Configure {} with sufficient 4k pages".format(hypervisors)) for host in hypervisors: LOG.fixture_step( "Modify 4k page numbers to 600000 for {}".format(host)) num_4k_pages = host_helper.get_host_memories(host, 'app_total_4K') for proc, pages_4k in num_4k_pages.items(): if pages_4k[0] > 1024 * 1024 / 4: break else: proc_to_set = 1 if len(num_4k_pages) > 1 else 0 HostsToRecover.add(host, scope='module') host_helper.lock_host(host, swact=True) host_helper.modify_host_memory(host, proc=proc_to_set, gib_4k_range=(2, 4)) host_helper.unlock_host(host, check_hypervisor_up=True, check_webservice_up=True) return storage_backing, hypervisors
def get_hosts_to_configure(candidates): hosts_selected = [None, None] hosts_to_configure = [None, None] max_4k, expt_p1_4k, max_1g, expt_p1_1g = \ 1.5 * 1048576 / 4, 2.5 * 1048576 / 4, 1, 2 for host in candidates: host_mems = host_helper.get_host_memories(host, headers=MEMPAGE_HEADERS) if 1 not in host_mems: LOG.info("{} has only 1 processor".format(host)) continue proc0_mems, proc1_mems = host_mems[0], host_mems[1] p0_4k, p1_4k, p0_1g, p1_1g = \ proc0_mems[0], proc1_mems[0], proc0_mems[2], proc1_mems[2] if p0_4k <= max_4k and p0_1g <= max_1g: if not hosts_selected[1] and p1_4k >= expt_p1_4k and \ p1_1g <= max_1g: hosts_selected[1] = host elif not hosts_selected[0] and p1_4k <= max_4k and \ p1_1g >= expt_p1_1g: hosts_selected[0] = host if None not in hosts_selected: LOG.info("1G and 4k hosts already configured and selected: " "{}".format(hosts_selected)) break else: for i in range(len(hosts_selected)): if hosts_selected[i] is None: hosts_selected[i] = hosts_to_configure[i] = \ list(set(candidates) - set(hosts_selected))[0] LOG.info("Hosts selected: {}; To be configured: " "{}".format(hosts_selected, hosts_to_configure)) return hosts_selected, hosts_to_configure
def check_mempage_change(vm, host, prev_host_mems, mempage_size=None, mem_gib=1, numa_node=None): expt_mempage_indics = _get_expt_indices(mempage_size) if numa_node is None: numa_node = vm_helper.get_vm_numa_nodes_via_ps(vm_id=vm, host=host)[0] prev_host_mems = prev_host_mems[numa_node] current_host_mems = host_helper.get_host_memories( host, headers=MEMPAGE_HEADERS)[numa_node] if 0 in expt_mempage_indics: if current_host_mems[1:] == prev_host_mems[1:] and \ abs(prev_host_mems[0] - current_host_mems[ 0]) <= mem_gib * 512 * 1024 / 4: return for i in expt_mempage_indics: if i == 0: continue expt_pagecount = 1 if i == 2 else 1024 if prev_host_mems[i] - expt_pagecount == current_host_mems[i]: LOG.info("{} {} memory page reduced by {}GiB as " "expected".format(host, MEMPAGE_HEADERS[i], mem_gib)) return LOG.info("{} {} memory pages - Previous: {}, current: " "{}".format(host, MEMPAGE_HEADERS[i], prev_host_mems[i], current_host_mems[i])) assert 0, "{} available vm {} memory page count did not change as " \ "expected".format(host, mempage_size)
def ovs_dpdk_1_core(): LOG.fixture_step("Review the ovs-dpdk vswitch be in just 1 core") vswitch_type = "ovs-dpdk" cpu_function = "vswitch" proc = "0" host_list = host_helper.get_hypervisors() for host in host_list: with host_helper.ssh_to_host(host) as node_ssh: cmd = "cat /proc/meminfo | grep Hugepagesize | awk '{print $2}'" hp = int( node_ssh.exec_cmd(cmd=cmd, fail_ok=False, get_exit_code=False)[1]) mem = host_helper.get_host_memories( host=host, headers=("app_hp_avail_2M", "app_hp_avail_1G", "mem_avail(MiB)", "vs_hp_total")) if hp == 1048576: if int(mem[proc][3]) < 2 or mem[proc][1] < 10: HostsToRecover.add(hostnames=host, scope="module") host_helper.lock_host(host=host) if int(mem[proc][3]) < 2: args = ' -f vswitch -1G {} {} {}'.format(2, host, proc) cli.system('host-memory-modify', args) host_helper.modify_host_cpu(host=host, cpu_function=cpu_function, **{"p{}".format(proc): 1}) # TODO maybe find a better option than sleep since we can't wait for applyying # container_helper.wait_for_apps_status(apps='stx-openstack', # status=AppStatus.APPLYING) time.sleep(60) container_helper.wait_for_apps_status( apps='stx-openstack', status=AppStatus.APPLIED, check_interval=30) if mem[proc][1] < 10: args = ' -1G {} {} {}'.format(10, host, proc) cli.system('host-memory-modify', args) host_helper.unlock_host(host=host) if hp == 2048: if int(mem[proc][3]) < 512 or mem[proc][0] < 2500: host_helper.lock_host(host=host) if int(mem[proc][3]) < 512: system_helper.modify_system( **{"vswitch_type": vswitch_type}) vswitch_args = ' -f vswitch -2M {} {} {}'.format( 512, host, proc) cli.system('host-memory-modify', vswitch_args) host_helper.modify_host_cpu(host=host, cpu_function=cpu_function, **{"p{}".format(proc): 1}) # TODO maybe find a better option than sleep since we can't wait for applyying # container_helper.wait_for_apps_status(apps='stx-openstack', # status=AppStatus.APPLIED) time.sleep(60) container_helper.wait_for_apps_status( apps='stx-openstack', status=AppStatus.APPLIED, check_interval=30) if mem[proc][0] < 2500: args = ' -2M {} {} {}'.format(2500, host, proc) cli.system('host-memory-modify', args) host_helper.unlock_host(host=host) test_table = host_helper.get_host_cpu_list_table(host=host) curr_assigned_function_list = table_parser.get_values( test_table, "assigned_function") assert "vSwitch" in curr_assigned_function_list
def test_schedule_vm_mempage_config(self, flavor_2g, mem_page_size): """ Test memory used by vm is taken from the expected memory pool and the vm was scheduled on the correct host/processor Args: flavor_2g (tuple): flavor id of a flavor with ram set to 2G, hosts, storage_backing mem_page_size (str): mem page size setting in flavor Setup: - Create host aggregate - Add two hypervisors to the host aggregate - Host-0 configuration: - Processor-0: - Insufficient 1g pages to boot vm that requires 2g - Insufficient 4k pages to boot vm that requires 2g - Processor-1: - Sufficient 1g pages to boot vm that requires 2g - Insufficient 4k pages to boot vm that requires 2g - Host-1 configuration: - Processor-0: - Insufficient 1g pages to boot vm that requires 2g - Insufficient 4k pages to boot vm that requires 2g - Processor-1: - Insufficient 1g pages to boot vm that requires 2g - Sufficient 4k pages to boot vm that requires 2g - Configure a compute to have 4 1G hugepages (module) - Create a flavor with 2G RAM (module) - Create a volume with default values (module) Test Steps: - Set memory page size flavor spec to given value - Boot a vm with above flavor and a basic volume - Calculate the available/used memory change on the vm host - Verify the memory is taken from 1G hugepage memory pool - Verify the vm was booted on a supporting host Teardown: - Delete created vm - Delete created volume and flavor (module) - Re-Configure the compute to have 0 hugepages (module) - Revert host mem pages back to original """ skip_4k_for_ovs(mem_page_size) flavor_id, hosts_configured, storage_backing = flavor_2g LOG.tc_step("Set memory page size extra spec in flavor") nova_helper.set_flavor( flavor_id, **{ FlavorSpec.CPU_POLICY: 'dedicated', FlavorSpec.MEM_PAGE_SIZE: mem_page_size }) host_helper.wait_for_hypervisors_up(hosts_configured) prev_computes_mems = {} for host in hosts_configured: prev_computes_mems[host] = host_helper.get_host_memories( host=host, headers=MEMPAGE_HEADERS) LOG.tc_step( "Boot a vm with mem page size spec - {}".format(mem_page_size)) host_1g, host_4k = hosts_configured code, vm_id, msg = vm_helper.boot_vm('mempool_configured', flavor_id, fail_ok=True, avail_zone='stxauto', cleanup='function') assert 0 == code, "VM is not successfully booted." instance_name, vm_host = vm_helper.get_vm_values( vm_id, fields=[":instance_name", ":host"], strict=False) vm_node = vm_helper.get_vm_numa_nodes_via_ps( vm_id=vm_id, instance_name=instance_name, host=vm_host) if mem_page_size == '1048576': assert host_1g == vm_host, \ "VM is not created on the configured host " \ "{}".format(hosts_configured[0]) assert vm_node == [1], "VM (huge) did not boot on the correct " \ "processor" elif mem_page_size == 'small': assert host_4k == vm_host, "VM is not created on the configured " \ "host {}".format(hosts_configured[1]) assert vm_node == [1], "VM (small) did not boot on the correct " \ "processor" else: assert vm_host in hosts_configured LOG.tc_step("Calculate memory change on vm host - {}".format(vm_host)) check_mempage_change(vm_id, vm_host, prev_host_mems=prev_computes_mems[vm_host], mempage_size=mem_page_size, mem_gib=2, numa_node=vm_node[0]) LOG.tc_step("Ensure vm is pingable from NatBox") vm_helper.wait_for_vm_pingable_from_natbox(vm_id)
def check_alarms(self, add_1g_and_4k_pages): hosts, storage_backing = add_1g_and_4k_pages host_helper.get_hypervisor_info(hosts=hosts) for host in hosts: host_helper.get_host_memories(host, wait_for_update=False)