def upgrade_controller0(): """ Upgrades controller-0 Returns: """ # upgrade controller-0 LOG.tc_step("Upgrading controller-0......") controller0 = 'controller-0' LOG.info("Ensure controller-0 is provisioned before upgrade.....") ensure_host_provisioned(controller0) LOG.info("Host {} is provisioned for upgrade.....".format(controller0)) # open vlm console for controller-0 for boot through mgmt interface LOG.info("Opening a vlm console for controller-0 .....") install_helper.open_vlm_console_thread(controller0) LOG.info("Starting {} upgrade.....".format(controller0)) upgrade_host(controller0, lock=True) LOG.info("controller-0 is upgraded successfully.....") # unlock upgraded controller-0 LOG.tc_step("Unlocking controller-0 after upgrade......") host_helper.unlock_host(controller0, available_only=True) LOG.info("Host {} unlocked after upgrade......".format(controller0))
def test_lock_with_max_vms_simplex(self, simplex_only): vms_num = host_helper.get_max_vms_supported(host='controller-0') vm_helper.ensure_vms_quotas(vms_num=vms_num) LOG.tc_step( "Boot {} vms with various storage settings".format(vms_num)) vms = vm_helper.boot_vms_various_types(cleanup='function', vms_num=vms_num) LOG.tc_step("Lock vm host on simplex system") HostsToRecover.add('controller-0') host_helper.lock_host('controller-0') LOG.tc_step("Ensure vms are in {} state after locked host come " "online".format(VMStatus.STOPPED)) vm_helper.wait_for_vms_values(vms, value=VMStatus.STOPPED, fail_ok=False) LOG.tc_step("Unlock host on simplex system") host_helper.unlock_host(host='controller-0') LOG.tc_step("Ensure vms are Active and Pingable from NatBox") vm_helper.wait_for_vms_values(vms, value=VMStatus.ACTIVE, fail_ok=False, timeout=600) for vm in vms: vm_helper.wait_for_vm_pingable_from_natbox( vm, timeout=VMTimeout.DHCP_RETRY)
def manual_upgrade_hosts(manual_nodes): """ Upgrades hosts in manual_nodes list one by one. Args: manual_nodes (list): - specifies the list of nodes to be upgraded one at a time. Returns: """ if len(manual_nodes) > 0: LOG.info("Starting upgrade of the other system hosts: {}".format(manual_nodes)) nodes_to_upgrade = list(manual_nodes) if 'controller-0' in nodes_to_upgrade: upgrade_controller('controller-0') nodes_to_upgrade.remove('controller-0') for host in nodes_to_upgrade: LOG.tc_step("Starting {} upgrade.....".format(host)) if "storage" in host: # wait for replication to be healthy storage_helper.wait_for_ceph_health_ok() upgrade_host(host, lock=True) LOG.info("{} is upgraded successfully.....".format(host)) LOG.tc_step("Unlocking {} after upgrade......".format(host)) host_helper.unlock_host(host, available_only=True) LOG.info("Host {} unlocked after upgrade......".format(host)) LOG.info("Host {} upgrade complete.....".format(host))
def sys_lock_unlock_standby(number_of_times=1): """ This is to identify the storage nodes and turn them off and on via vlm :return: """ timeout = VMTimeout.DHCP_RETRY if system_helper.is_aio_system( ) else VMTimeout.PING_VM for i in range(0, number_of_times): active, standby = system_helper.get_active_standby_controllers() LOG.tc_step("Doing iteration of {} of total iteration {}".format( i, number_of_times)) LOG.tc_step("'sudo reboot -f' from {}".format(standby)) host_helper.lock_host(host=standby) LOG.tc_step("Check vms status after locking standby") vms = get_all_vms() vm_helper.wait_for_vms_values(vms, fail_ok=False, timeout=600) for vm in vms: vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm, timeout=timeout) host_helper.unlock_host(host=standby) vms = get_all_vms() vm_helper.wait_for_vms_values(vms, fail_ok=False, timeout=600) for vm in vms: vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm)
def config_host_func(host, modify_func, revert_func=None, *args, **kwargs): HostsToRecover.add(host, scope=scope) LOG.fixture_step("({}) Lock host: {}".format(scope, host)) host_helper.lock_host(host=host, swact=True) # add teardown before running modify (as long as host is locked # successfully) in case modify or unlock fails. if revert_func is not None: def revert_host(): LOG.fixture_step("({}) Lock host: {}".format(scope, host)) host_helper.lock_host(host=host, swact=True) try: LOG.fixture_step("({}) Execute revert function: {}".format( scope, revert_func)) revert_func(host) finally: LOG.fixture_step("({}) Unlock host: {}".format( scope, host)) # Put it in finally block in case revert_func fails - # host will still be unlocked for other tests. host_helper.unlock_host(host=host) request.addfinalizer(revert_host) LOG.fixture_step("({}) Execute modify function: {}".format( scope, modify_func)) modify_func(host, *args, **kwargs) LOG.fixture_step("({}) Unlock host: {}".format(scope, host)) host_helper.unlock_host(host=host)
def teardown(): LOG.info("Delete all created vms and unlock target host(s)...") for vm_to_del in all_vms: vm_helper.delete_vms(vm_to_del) nova_helper.delete_flavors(all_new_flavors) for host_to_unlock in self.hosts_locked: host_helper.unlock_host(host_to_unlock, check_hypervisor_up=True)
def delete_patch(): LOG.fixture_step("Removing {} from patching controller".format( patch_name)) con_ssh.exec_sudo_cmd('sw-patch remove {}'.format(patch_name)) con_ssh.exec_sudo_cmd('sw-patch delete {}'.format(patch_name)) LOG.fixture_step("Reinstalling {} to revert the patch".format(patch_name)) con_ssh.exec_sudo_cmd('sw-patch host-install {}'.format(host), expect_timeout=timeout.CLI_TIMEOUT) host_helper.unlock_host(host)
def test_lock_unlock_secure_boot_vm(): """ This is to test host lock with secure boot vm. :return: """ guests_os = ['trusty_uefi', 'uefi_shell'] disk_format = ['qcow2', 'raw'] image_ids = [] volume_ids = [] for guest_os, disk_format in zip(guests_os, disk_format): image_ids.append( create_image_with_metadata( guest_os=guest_os, property_key=ImageMetadata.FIRMWARE_TYPE, values=['uefi'], disk_format=disk_format, container_format='bare')) # create a flavor flavor_id = nova_helper.create_flavor(vcpus=2, ram=1024, root_disk=5)[1] ResourceCleanup.add('flavor', flavor_id) # boot a vm using the above image for image_id in image_ids: volume_ids.append( cinder_helper.create_volume(source_id=image_id[0], size=5, cleanup='function')[1]) block_device_dic = [{ 'id': volume_ids[1], 'source': 'volume', 'bootindex': 0 }, { 'id': volume_ids[0], 'source': 'volume', 'bootindex': 1 }] vm_id = vm_helper.boot_vm(name='sec-boot-vm', source='block_device', flavor=flavor_id, block_device=block_device_dic, cleanup='function', guest_os=guests_os[0])[1] _check_secure_boot_on_vm(vm_id=vm_id) # Lock the compute node with the secure Vms compute_host = vm_helper.get_vm_host(vm_id=vm_id) host_helper.lock_host(compute_host, timeout=800) if not system_helper.is_aio_simplex(): _check_secure_boot_on_vm(vm_id=vm_id) host_helper.unlock_host(compute_host, timeout=800) if system_helper.is_aio_simplex(): _check_secure_boot_on_vm(vm_id=vm_id)
def test_force_lock_with_non_mig_vms(add_host_to_zone): """ Test force lock host with non-migrate-able vms on it Prerequisites: - Minimum of two up hypervisors Test Setups: - Add admin role to primary tenant - Create cgcsauto aggregate - Add host_under_test to cgcsauto aggregate - Create flavor for vms_to_test with storage_backing support by host_under_test - Create vms_to_test on host_under_test that can be live migrated Test Steps: - Force lock target host - Verify force lock returns 0 - Verify VMs cannot find a host to boot and are in error state - Unlock locked target host - Verify VMs are active on host once it is up and available - Verify VMs can be pinged Test Teardown: - Remove admin role from primary tenant - Delete created vms - Remove host_under_test from cgcsauto aggregate """ storage_backing, host_under_test = add_host_to_zone # Create flavor with storage_backing the host_under_test supports flavor_id = nova_helper.create_flavor(storage_backing=storage_backing)[1] # Boot VMs on the host using the above flavor. LOG.tc_step("Boot VM on {}".format(host_under_test)) vm_id = vm_helper.boot_vm(vm_host=host_under_test, flavor=flavor_id, avail_zone='cgcsauto', cleanup='function')[1] vm_helper.wait_for_vm_pingable_from_natbox(vm_id) # Force lock host that VMs are booted on. LOG.tc_step("Force lock {}".format(host_under_test)) HostsToRecover.add(host_under_test) lock_code, lock_output = host_helper.lock_host(host_under_test, force=True) assert lock_code == 0, "Failed to lock {}. Details: {}".format( host_under_test, lock_output) vm_helper.wait_for_vm_values(vm_id, fail_ok=False, **{'status': 'ERROR'}) host_helper.unlock_host(host_under_test) vm_helper.wait_for_vm_values(vm_id, timeout=300, fail_ok=False, **{'status': 'ACTIVE'}) vm_helper.wait_for_vm_pingable_from_natbox(vm_id, timeout=VMTimeout.DHCP_RETRY)
def revert_host(): LOG.fixture_step("({}) Lock host: {}".format(scope, host)) host_helper.lock_host(host=host, swact=True) try: LOG.fixture_step("({}) Execute revert function: {}".format( scope, revert_func)) revert_func(host) finally: LOG.fixture_step("({}) Unlock host: {}".format( scope, host)) # Put it in finally block in case revert_func fails - host will still be unlocked for other tests. host_helper.unlock_host(host=host)
def get_hugepage_pod_file(): """ Fixture used to return the hugepage deployment file - Get the compute-0 if exist, else standby controller - Check 2M hugepages configured, elsif check 1G is configured else lock,configure 2G of 1G hugepages and unlock host - Call modify_yaml function to modify the yaml file with the values - Modified file scps to host to deploy hugepages pod - Deletes the hugepages pod from the host after the test """ if system_helper.is_aio_duplex(): hostname = system_helper.get_standby_controller_name() else: hostname = system_helper.get_hypervisors()[0] LOG.fixture_step("Checking hugepage values on {}".format(hostname)) proc_id = 0 out = host_helper.get_host_memories(hostname, ('app_hp_avail_2M', 'app_hp_avail_1G'), proc_id) if out[proc_id][0] > 0: hugepage_val = "{}Mi".format(out[proc_id][0]) hugepage_str = "hugepages-2Mi" elif out[proc_id][1] > 0: hugepage_val = "{}Gi".format(out[proc_id][1]) hugepage_str = "hugepages-1Gi" else: hugepage_val = "{}Gi".format(2) cmd = "{} -1G {}".format(proc_id, 2) hugepage_str = "hugepages-1Gi" HostsToRecover.add(hostname) host_helper.lock_host(hostname) LOG.fixture_step("Configuring hugepage values {} on {}".format( hugepage_val, hostname)) cli.system('host-memory-modify {} {}'.format(hostname, cmd), ssh_client=None, auth_info=Tenant.get('admin_platform')) host_helper.unlock_host(hostname) LOG.fixture_step("{} {} pod will be configured on {} proc id {}".format( hugepage_str, hugepage_val, hostname, proc_id)) file_dir, file_name = modify_yaml("utils/test_files/", "hugepages_pod.yaml", hugepage_str, hugepage_val) source_path = "{}/{}".format(file_dir, file_name) home_dir = HostLinuxUser.get_home() common.scp_from_localhost_to_active_controller(source_path, dest_path=home_dir) yield file_name LOG.fixture_step("Delete hugepages pod") kube_helper.delete_resources(resource_names="hugepages-pod")
def clear_config_out_of_date_alarm(): active, standby = system_helper.get_active_standby_controllers() for host in (standby, active): if host and system_helper.wait_for_alarm( alarm_id=EventLogID.CONFIG_OUT_OF_DATE, timeout=5, entity_id=host, fail_ok=True)[0]: host_helper.lock_host(host, swact=True) time.sleep(60) host_helper.unlock_host(host) system_helper.wait_for_alarm_gone( alarm_id=EventLogID.CONFIG_OUT_OF_DATE, entity_id=host, fail_ok=False)
def install_non_active_node(node_name, lab): """ Install the non-active controller node, usually it is controller-1, the second controller on a non-AIO SX system. Args: node_name: - the name of the host/node, usually 'controller-1' lab: - lab to test """ boot_interfaces = lab['boot_device_dict'] LOG.tc_step("Restoring {}".format(node_name)) install_helper.open_vlm_console_thread(node_name, boot_interface=boot_interfaces, vlm_power_on=True) LOG.info( "Verifying {} is Locked, Disabled and Online ...".format(node_name)) system_helper.wait_for_hosts_states(node_name, administrative=HostAdminState.LOCKED, operational=HostOperState.DISABLED, availability=HostAvailState.ONLINE) LOG.info("Unlocking {} ...".format(node_name)) rc, output = host_helper.unlock_host(node_name, available_only=False) assert rc == 0 or rc == 4, "Host {} failed to unlock: rc = {}, msg: {}".format( node_name, rc, output) if rc == 4: LOG.warn('{} now is in degraded status'.format(node_name)) LOG.info('{} is installed'.format(node_name))
def revert(): for host_to_revert in modified_hosts: check_host_cpu_and_memory(host_to_revert, expt_shared_cpu={0: 1, 1: 1}, expt_1g_page={0: 4}) p0_shared = len(shared_disabled_hosts[host_to_revert][0]) p1_shared = len(shared_disabled_hosts[host_to_revert][1]) try: LOG.fixture_step("Revert {} shared cpu and memory setting".format(host_to_revert)) host_helper.lock_host(host_to_revert) host_helper.modify_host_cpu(host_to_revert, 'shared', p0=p0_shared, p1=p1_shared) host_helper.modify_host_memory(host_to_revert, proc=0, gib_1g=0) finally: host_helper.unlock_host(host_to_revert) host_helper.wait_for_memory_update(host_to_revert) check_host_cpu_and_memory(host_to_revert, expt_shared_cpu={0: p0_shared, 1: p1_shared}, expt_1g_page={0: 0})
def revert(): LOG.fixture_step('Restore the MTUs of the data IFs on hosts if modified') global HOSTS_IF_MODIFY_ARGS items_to_revert = HOSTS_IF_MODIFY_ARGS.copy() for item in items_to_revert: host, pre_mtu, mtu, max_mtu, interface, net_type = item host_helper.lock_host(host, swact=True) LOG.info('Restore DATA MTU of IF:{} on host:{} to:{}, current MTU:{}'.format(interface, host, pre_mtu, mtu)) host_helper.modify_mtu_on_interface(host, interface, pre_mtu, network_type=net_type, lock_unlock=False) LOG.info('OK, Data MTUs of IF:{} on host:{} are restored, from: {} to:{}'.format( interface, host, mtu, pre_mtu)) host_helper.unlock_host(host) HOSTS_IF_MODIFY_ARGS.remove(item) LOG.info('OK, all changed MTUs of DATA IFs are restored')
def test_lock_unlock_host(host_type): """ Verify lock unlock host Test Steps: - Select a host per given type. If type is controller, select standby controller. - Lock selected host and ensure it is successfully locked - Unlock selected host and ensure it is successfully unlocked """ LOG.tc_step("Select a {} node from system if any".format(host_type)) if host_type == 'controller': if system_helper.is_aio_simplex(): host = 'controller-0' else: host = system_helper.get_standby_controller_name() assert host, "No standby controller available" else: if host_type == 'compute' and system_helper.is_aio_system(): skip("No compute host on AIO system") elif host_type == 'storage' and not system_helper.is_storage_system(): skip("System does not have storage nodes") hosts = system_helper.get_hosts(personality=host_type, availability=HostAvailState.AVAILABLE, operational=HostOperState.ENABLED) assert hosts, "No good {} host on system".format(host_type) host = hosts[0] LOG.tc_step("Lock {} host - {} and ensure it is successfully " "locked".format(host_type, host)) HostsToRecover.add(host) host_helper.lock_host(host, swact=False) # wait for services to stabilize before unlocking time.sleep(20) # unlock standby controller node and verify controller node is # successfully unlocked LOG.tc_step("Unlock {} host - {} and ensure it is successfully " "unlocked".format(host_type, host)) host_helper.unlock_host(host)
def test_host_operations_with_custom_kubectl_app(deploy_delete_kubectl_app): """ Test create, delete custom app via kubectl run cmd Args: deploy_delete_kubectl_app: fixture Setups: - Create kubectl app via kubectl run Test Steps: - If duplex: swact and verify pod still Running - Lock/unlock controller and verify pod still Running Teardown: - Delete kubectl deployment and service - Verify pod is removed """ app_name, pod_name = deploy_delete_kubectl_app active, standby = system_helper.get_active_standby_controllers() if standby: LOG.tc_step("Swact active controller and verify {} test app is " "running ".format(pod_name)) host_helper.swact_host() kube_helper.wait_for_pods_status(pod_names=pod_name, namespace='default', fail_ok=False) LOG.tc_step("Lock/unlock {} and verify {} test app is " "running.".format(active, pod_name)) HostsToRecover.add(active) host_helper.lock_host(active, swact=False) # wait for services to stabilize before unlocking time.sleep(20) host_helper.unlock_host(active) pod_name = kube_helper.get_pods(field='NAME', namespace='default', name=app_name, strict=False)[0] kube_helper.wait_for_pods_status(pod_names=pod_name, namespace=None, fail_ok=False)
def test_lock_unlock_standby_controller(no_simplex): """ Lock - Unlock standby controller """ standby_controller_host = system_helper.get_standby_controller_name() LOG.info("Standby Controller Host: {}".format(standby_controller_host)) assert standby_controller_host, "Standby controller not found" # Lock host_helper.lock_host(host=standby_controller_host, fail_ok=False) container_helper.wait_for_apps_status(apps="stx-openstack", status=AppStatus.APPLIED, timeout=600, check_interval=60) # Unlock host_helper.unlock_host(host=standby_controller_host, fail_ok=False) host_helper.wait_for_hosts_ready(hosts=standby_controller_host)
def test_lock_unlock_active_controller(): """ Lock - Unlock an active controller """ active_conroller_host = system_helper.get_active_controller_name() LOG.info("Active Controller Host: {}".format(active_conroller_host)) if system_helper.is_aio_simplex(): host_helper.lock_host(host=active_conroller_host, fail_ok=False) rc, output = host_helper.unlock_host(host=active_conroller_host, fail_ok=True) if rc == 1 and "Not patch current" in output: con_ssh = ControllerClient.get_active_controller() cmd = "sw-patch host-install controller-0" con_ssh.exec_sudo_cmd(cmd=cmd) host_helper.unlock_host(host=active_conroller_host, fail_ok=False) else: rc, output = host_helper.lock_host(host=active_conroller_host, fail_ok=True) assert rc == 1 assert "Can not lock an active controller" in output
def upgrade_controller(controller_host, con_ssh=None, fail_ok=False): """ Upgrades either controller-0 or controller-1 Args: controller_host (str): the controller host name con_ssh (SSHClient): fail_ok(bool): Returns: if fail_ok is true return error code and message """ if controller_host not in ['controller-0', 'controller-1']: err_msg = "The specified host {} is not a controller host".format(controller_host) if fail_ok: return 1, err_msg else: raise exceptions.UpgradeError(err_msg) LOG.info("Upgrading Host {}".format(controller_host)) if controller_host == 'controller-0': ensure_host_provisioned(controller_host, con_ssh=con_ssh) LOG.info("Host {} is provisioned for upgrade.....".format(controller_host)) # # open vlm console for controller-0 for boot through mgmt interface # LOG.info("Opening a vlm console for controller-0 .....") # install_helper.open_vlm_console_thread(controller_host) upgrade_host(controller_host, lock=True, con_ssh=con_ssh) LOG.info("Host {} is upgraded successfully......".format(controller_host)) # unlock upgraded controller LOG.tc_step("Unlocking {} after upgrade......".format(controller_host)) if controller_host == 'controller-1': host_helper.unlock_host(controller_host, available_only=True, check_hypervisor_up=False, con_ssh=con_ssh, fail_ok=fail_ok) else: host_helper.unlock_host(controller_host, available_only=True, con_ssh=con_ssh, fail_ok=fail_ok) LOG.info("Host {} unlocked after upgrade......".format(controller_host))
def test_lock_unlock_compute_hosts(no_simplex, no_duplex): """ Lock - Unlock Compute Hosts """ compute_hosts = system_helper.get_computes() LOG.info(" Compute nodes found: {}".format(len(compute_hosts))) for host in compute_hosts: LOG.info("Compute Host: {}".format(host)) # Lock host_helper.lock_host(host=host, fail_ok=False) host_helper.wait_for_hosts_ready(hosts=host) container_helper.wait_for_apps_status(apps="stx-openstack", status=AppStatus.APPLIED, timeout=600, check_interval=60) # Unlock host_helper.unlock_host(host=host, fail_ok=False) host_helper.wait_for_hosts_ready(hosts=host)
def upgrade_host_lock_unlock(host, con_ssh=None): """ swact, if required, lock and unlock before upgrade. Args: host (str): hostname or id in string format con_ssh (SSHClient): Returns: (return_code(int), msg(str)) (0, "Host is host is locked/unlocked) """ LOG.info("Checking if host {} is active ....".format(host)) active_controller = system_helper.get_active_controller_name() swact_back = False if active_controller == host: LOG.tc_step("Swact active controller and ensure active controller is changed") exit_code, output = host_helper.swact_host(hostname=active_controller) assert 0 == exit_code, "{} is not recognized as active controller".format(active_controller) active_controller = system_helper.get_active_controller_name() swact_back = True LOG.info("Host {}; doing lock/unlock to the host ....".format(host)) rc, output = host_helper.lock_host(host, con_ssh=con_ssh) if rc != 0 and rc != -1: err_msg = "Lock host {} rejected".format(host) LOG.warn(err_msg) return 1, err_msg rc, output = host_helper.unlock_host(host, available_only=True, con_ssh=con_ssh) if rc != 0: err_msg = "Unlock host {} failed: {}".format(host, output) return 1, err_msg if swact_back: time.sleep(60) if not system_helper.wait_for_host_values(host, timeout=360, fail_ok=True, operational=HostOperState.ENABLED, availability=HostAvailState.AVAILABLE): err_msg = " Swacting to standby is not possible because {} is not in available state " \ "within the specified timeout".format(host) assert False, err_msg LOG.tc_step("Swact active controller back and ensure active controller is changed") rc, output = host_helper.swact_host(hostname=active_controller) if rc != 0: err_msg = "Failed to swact back to host {}: {}".format(host, output) return 1, err_msg LOG.info("Swacted and {} has become active......".format(host)) return 0, "Host {} is locked and unlocked successfully".format(host)
def ensure_sufficient_4k_pages(request): """ Check if there is enough 4k pages on any compute node on any processors is a bit hassle Returns: """ # check if any 4k pages greater than 600000 means more than 2G(~536871 4k pages) total. storage_backing = request.param hypervisors = host_helper.get_hosts_in_storage_backing( storage_backing=storage_backing) if len(hypervisors) < 2: skip("Less than two hypersvisors with {} instance backing".format( storage_backing)) hypervisors = hypervisors[:2] LOG.fixture_step( "Configure {} with sufficient 4k pages".format(hypervisors)) for host in hypervisors: LOG.fixture_step( "Modify 4k page numbers to 600000 for {}".format(host)) num_4k_pages = host_helper.get_host_memories(host, 'app_total_4K') for proc, pages_4k in num_4k_pages.items(): if pages_4k[0] > 1024 * 1024 / 4: break else: proc_to_set = 1 if len(num_4k_pages) > 1 else 0 HostsToRecover.add(host, scope='module') host_helper.lock_host(host, swact=True) host_helper.modify_host_memory(host, proc=proc_to_set, gib_4k_range=(2, 4)) host_helper.unlock_host(host, check_hypervisor_up=True, check_webservice_up=True) return storage_backing, hypervisors
def test_apply_storage_profile_negative(create_storage_profile, personality): if personality == 'controller': host_name = system_helper.get_standby_controller_name() assert host_name, "No standby controller available on system" else: host_name = host_helper.get_up_hypervisors()[0] # For storage systems, skip test if ceph isn't healthy if len(system_helper.get_storage_nodes()) > 0: ceph_healthy = storage_helper.is_ceph_healthy() if not ceph_healthy: skip('Skipping due to ceph not being healthy') profile_name = create_storage_profile['profile_name'] origin_disk_num = create_storage_profile['disk_num'] disks_num = len(storage_helper.get_host_disks(host_name, 'device_node')) expt_err = 'profile has more disks than host does' if disks_num < origin_disk_num -1 \ else "Please check if host's disks match profile criteria" expt_err_list = [ "Please check if host's disks match profile criteria", "Failed to create storage function. Host personality must be 'storage'", ] if disks_num < origin_disk_num - 1: expt_err_list.append("profile has more disks than host does") positional_arg = host_name + ' ' + profile_name HostsToRecover.add(host_name) host_helper.lock_host(host_name, swact=True) exitcode, output = cli.system('host-apply-storprofile', positional_arg, fail_ok=True) host_helper.unlock_host(host_name) assert exitcode == 1 and any(expt in output for expt in expt_err_list)
def test_lock_unlock_storage_hosts(no_simplex, no_duplex): """ Lock - Unlock Storage Hosts """ if ProjVar.get_var('SYS_TYPE') != SysType.STORAGE: skip('Only applicable to Standard-external system') storage_hosts = system_helper.get_storage_nodes() LOG.info(" Storage nodes found: {}".format(len(storage_hosts))) for host in storage_hosts: LOG.info("Storage Host: {}".format(host)) # Lock host_helper.lock_host(host=host, fail_ok=False) host_helper.wait_for_hosts_ready(hosts=host) container_helper.wait_for_apps_status(apps="stx-openstack", status=AppStatus.APPLIED, timeout=600, check_interval=60) # Unlock host_helper.unlock_host(host=host, fail_ok=False) host_helper.wait_for_hosts_ready(hosts=host)
def test_admin_password(scenario, less_than_two_cons, _revert_admin_pw): """ Test the admin password change Test Steps: - lock standby controller change password and unlock - change passowrd and swact - check alarams """ if 'swact' in scenario and less_than_two_cons: skip(SkipSysType.LESS_THAN_TWO_CONTROLLERS) host = system_helper.get_standby_controller_name() assert host, "No standby controller on system" if scenario == "lock_standby_change_pswd": # lock the standby LOG.tc_step("Attempting to lock {}".format(host)) res, out = host_helper.lock_host(host=host) LOG.tc_step("Result of the lock was: {}".format(res)) # change password prev_pswd = Tenant.get('admin')['password'] post_pswd = '!{}9'.format(prev_pswd) LOG.tc_step('Changing admin password to {}'.format(post_pswd)) code, output = keystone_helper.set_user('admin', password=post_pswd, auth_info=Tenant.get( 'admin_platform')) # assert "Warning: 'admin' password changed. Please wait 5 minutes before Locking/Unlocking # the controllers" in output LOG.tc_step("Sleep for 180 seconds after admin password change") time.sleep(180) # CGTS-6928 LOG.tc_step("Check admin password is updated in keyring") assert post_pswd == security_helper.get_admin_password_in_keyring() if scenario == "change_pswd_swact": LOG.tc_step("Swact active controller") host_helper.swact_host() else: LOG.tc_step("Unlock host {}".format(host)) res = host_helper.unlock_host(host) LOG.info("Unlock hosts result: {}".format(res)) LOG.tc_step("Check admin password is updated in keyring") assert post_pswd == security_helper.get_admin_password_in_keyring()
def ensure_host_provisioned(host, con_ssh=None): """ check if host is provisioned. Args: host (str): hostname or id in string format con_ssh (SSHClient): Returns: (return_code(int), msg(str)) # 1, 2, 3, 4, 5 only returns when fail_ok=True (0, "Host is host is provisioned) """ LOG.info("Checking if host {} is already provisioned ....".format(host)) if is_host_provisioned(host, con_ssh=None): return 0, "Host {} is provisioned" active_controller = system_helper.get_active_controller_name() conter_swact_back = False if active_controller == host: LOG.tc_step("Swact active controller and ensure active controller is changed") exit_code, output = swact_host(hostname=active_controller) assert 0 == exit_code, "{} is not recognized as active controller".format(active_controller) active_controller = system_helper.get_active_controller_name() conter_swact_back = True LOG.info("Host {} not provisioned ; doing lock/unlock to provision the host ....".format(host)) rc, output = lock_host(host, con_ssh=con_ssh) if rc != 0 and rc != -1: err_msg = "Lock host {} rejected".format(host) raise exceptions.HostError(err_msg) rc, output = unlock_host(host, available_only=True, con_ssh=con_ssh) if rc != 0: err_msg = "Unlock host {} failed: {}".format(host, output) raise exceptions.HostError(err_msg) if conter_swact_back: LOG.tc_step("Swact active controller back and ensure active controller is changed") exit_code, output = swact_host(hostname=active_controller) assert 0 == exit_code, "{} is not recognized as active controller".format(active_controller) LOG.info("Checking if host {} is provisioned after lock/unlock ....".format(host)) if not is_host_provisioned(host, con_ssh=None): raise exceptions.HostError("Failed to provision host {}") # Delay for the alarm to clear . Could be improved. time.sleep(120) return 0, "Host {} is provisioned after lock/unlock".format(host)
def _lock_unlock_controllers(): LOG.fixture_step("Sleep for 300 seconds after admin password change") time.sleep(300) if more_than_one_controllers: active, standby = system_helper.get_active_standby_controllers() if standby: LOG.fixture_step( "(Session) Locking unlocking controllers to complete " "action") host_helper.lock_host(standby) host_helper.unlock_host(standby) host_helper.lock_host(active, swact=True) host_helper.unlock_host(active) else: LOG.warning("Standby controller unavailable. Skip lock unlock " "controllers post admin password change.") elif system_helper.is_aio_simplex(): LOG.fixture_step( "(Session) Simplex lab - lock/unlock controller to complete " "action") host_helper.lock_host('controller-0', swact=False) host_helper.unlock_host('controller-0')
def ovs_dpdk_1_core(): LOG.fixture_step("Review the ovs-dpdk vswitch be in just 1 core") vswitch_type = "ovs-dpdk" cpu_function = "vswitch" proc = "0" host_list = host_helper.get_hypervisors() for host in host_list: with host_helper.ssh_to_host(host) as node_ssh: cmd = "cat /proc/meminfo | grep Hugepagesize | awk '{print $2}'" hp = int( node_ssh.exec_cmd(cmd=cmd, fail_ok=False, get_exit_code=False)[1]) mem = host_helper.get_host_memories( host=host, headers=("app_hp_avail_2M", "app_hp_avail_1G", "mem_avail(MiB)", "vs_hp_total")) if hp == 1048576: if int(mem[proc][3]) < 2 or mem[proc][1] < 10: HostsToRecover.add(hostnames=host, scope="module") host_helper.lock_host(host=host) if int(mem[proc][3]) < 2: args = ' -f vswitch -1G {} {} {}'.format(2, host, proc) cli.system('host-memory-modify', args) host_helper.modify_host_cpu(host=host, cpu_function=cpu_function, **{"p{}".format(proc): 1}) # TODO maybe find a better option than sleep since we can't wait for applyying # container_helper.wait_for_apps_status(apps='stx-openstack', # status=AppStatus.APPLYING) time.sleep(60) container_helper.wait_for_apps_status( apps='stx-openstack', status=AppStatus.APPLIED, check_interval=30) if mem[proc][1] < 10: args = ' -1G {} {} {}'.format(10, host, proc) cli.system('host-memory-modify', args) host_helper.unlock_host(host=host) if hp == 2048: if int(mem[proc][3]) < 512 or mem[proc][0] < 2500: host_helper.lock_host(host=host) if int(mem[proc][3]) < 512: system_helper.modify_system( **{"vswitch_type": vswitch_type}) vswitch_args = ' -f vswitch -2M {} {} {}'.format( 512, host, proc) cli.system('host-memory-modify', vswitch_args) host_helper.modify_host_cpu(host=host, cpu_function=cpu_function, **{"p{}".format(proc): 1}) # TODO maybe find a better option than sleep since we can't wait for applyying # container_helper.wait_for_apps_status(apps='stx-openstack', # status=AppStatus.APPLIED) time.sleep(60) container_helper.wait_for_apps_status( apps='stx-openstack', status=AppStatus.APPLIED, check_interval=30) if mem[proc][0] < 2500: args = ' -2M {} {} {}'.format(2500, host, proc) cli.system('host-memory-modify', args) host_helper.unlock_host(host=host) test_table = host_helper.get_host_cpu_list_table(host=host) curr_assigned_function_list = table_parser.get_values( test_table, "assigned_function") assert "vSwitch" in curr_assigned_function_list
def test_attempt_host_unlock_during_partition_creation(): """ This test attempts to unlock a host while a partition is being created. It is expected to fail. Assumptions: * There's some free disk space available Test steps: * Query the hosts to determine disk space * Lock host * Create a partition but don't wait for completion * Attempt to unlock the host that is hosting the partition that is created Teardown: * Delete created partitions DISABLED since unlock while creating is not blocked. """ global partitions_to_restore partitions_to_restore = {} computes = system_helper.get_hosts(personality="compute") hosts = system_helper.get_controllers() + computes # Filter out active controller active_controller = system_helper.get_active_controller_name() print("This is active controller: {}".format(active_controller)) hosts.remove(active_controller) usable_disks = False for host in hosts: disks = storage_helper.get_host_disks(host) free_disks = storage_helper.get_host_disks_with_free_space(host, disks) if not free_disks: continue for uuid in free_disks: size_gib = float(free_disks[uuid]) if size_gib < 2.0: LOG.info("Skip this disk due to insufficient space") continue LOG.tc_step("Lock {} and create a partition for disk {}".format( host, uuid)) HostsToRecover.add(host) host_helper.lock_host(host) usable_disks = True LOG.info("Creating partition on {}".format(host)) rc, out = storage_helper.create_host_partition(host, uuid, int(size_gib), wait=False) uuid = table_parser.get_value_two_col_table( table_parser.table(out), "uuid") partitions_to_restore[host] = [] partitions_to_restore[host].append(uuid) LOG.tc_step( "Attempt to unlock host and ensure it's rejected when partition is " "being created") rc_ = host_helper.unlock_host(host, fail_ok=True, check_first=False)[0] assert rc_ != 0, "Unlock attempt unexpectedly passed" LOG.tc_step("wait for partition to be created") storage_helper.wait_for_host_partition_status(host=host, uuid=uuid, timeout=CP_TIMEOUT) container_helper.wait_for_apps_status(apps='platform-integ-apps', status=AppStatus.APPLIED, check_interval=10) # Only test one disk on each host break # Do it on one host only break if not usable_disks: skip("Did not find disks with sufficient space to test with.")