示例#1
0
 def _wait_for_config_apply(auth_info_, con_ssh_=None):
     controllers = system_helper.get_controllers(auth_info=auth_info_,
                                                 con_ssh=con_ssh_)
     for controller in controllers:
         system_helper.wait_for_events(
             start=start_time,
             fail_ok=False,
             timeout=60,
             entity_instance_id='host=controller',
             event_log_id=EventLogID.CONFIG_OUT_OF_DATE,
             auth_info=auth_info_,
             con_ssh=con_ssh_,
             **{
                 'Entity Instance ID': 'host={}'.format(controller),
                 'State': 'set'
             })
         # Extend timeout for controller-1 config-out-date clear to 5min due to CGTS-8497
         system_helper.wait_for_events(
             start=start_time,
             fail_ok=False,
             timeout=300,
             entity_instance_id='host=controller',
             event_log_id=EventLogID.CONFIG_OUT_OF_DATE,
             auth_info=auth_info_,
             con_ssh=con_ssh_,
             **{
                 'Entity Instance ID': 'host={}'.format(controller),
                 'State': 'clear'
             })
示例#2
0
def test_create_zero_sized_host_partition():
    """
    This test attempts to create a partition of size zero once on each host.
    This should be rejected.

    Test steps:
    * Create partition of size zero
    * Ensure the provisioning is rejected

    Teardown:
    * None
    """

    computes = system_helper.get_hosts(personality="compute")
    hosts = system_helper.get_controllers() + computes

    for host in hosts:
        disks = storage_helper.get_host_disks(host)
        for uuid in disks:
            LOG.tc_step(
                "Attempt to create zero sized partition on uuid {} on host {}".
                format(uuid, host))
            rc, out = storage_helper.create_host_partition(host,
                                                           uuid,
                                                           "0",
                                                           fail_ok=True)
            assert rc != 0, "Partition creation was expected to fail but instead succeeded"
            # Let's do this for one disk only on each host
            break
示例#3
0
def test_assign_rootfs_disk_to_pv():
    """
    This test attempts to create a PV with type Disk on the rootfs.  This is
    expected to fail.

    Assumptions:
    * None

    Test Steps:
    * Determine which disk is the rootfs
    * Attempt to create a PV on that disk using a PV type of Disk.

    Teardown:
    * None
    """

    computes = system_helper.get_hosts(personality="compute")
    hosts = system_helper.get_controllers() + computes

    rootfs = storage_helper.get_hosts_rootfs(hosts)

    for host in rootfs:
        uuid = rootfs[host]
        # cmd = "host-pv-add -t disk {} cgts-vg {}".format(host, uuid[0])
        cmd = "host-pv-add {} cgts-vg {}".format(host, uuid[0])
        rc, out = cli.system(cmd, fail_ok=True)
        assert rc != 0, "Expected PV creation to fail but instead succeeded"
示例#4
0
def test_create_partition_using_non_existent_device_node():
    """
    This test attempts to create a partition using an invalid disk.  It is
    expected to fail.

    Arguments:
    * None

    Steps:
    * Attempt to create a partition on a valid host using an invalid device
      node, e.g. /dev/sdz

    Teardown:
    * None
    """

    # Safely hard-coded since we don't have enough physical slots for this to be
    # possible
    device_node = "/dev/sdz"
    size_gib = "1"

    computes = system_helper.get_hosts(personality="compute")
    hosts = system_helper.get_controllers() + computes

    for host in hosts:
        LOG.tc_step(
            "Creating partition on host {} with size {} using device node {}".
            format(host, size_gib, device_node))
        rc, out = storage_helper.create_host_partition(host,
                                                       device_node,
                                                       size_gib,
                                                       fail_ok=True)
        assert rc != 0, "Partition creation was successful"
示例#5
0
def test_cpe_services_and_functions():
    if system_helper.host_exists(host='compute-0'):
        skip("compute-0 exists - skip for non-CPE lab")

    LOG.tc_step("Check controller+compute subfunction via system host-show")
    controllers = system_helper.get_controllers()
    for controller in controllers:
        assert system_helper.is_aio_system(controller=controller), \
            "{} does not have controller+compute subfunction in system host-show".format(controller)

    LOG.tc_step("Check CPE system services via nova service-list")
    check_params = [
        "nova-scheduler",
        # "nova-cert",
        "nova-conductor",
        # "nova-consoleauth",  # removed in Train
        "nova-compute"
    ]

    binaries = nova_helper.get_compute_services(field='Binary')
    assert set(check_params) <= set(binaries), "Not all binaries from {} exist in 'nova service-list'".\
        format(check_params)

    LOG.tc_step("Check all nodes are ready in kubectl get nodes")
    kube_helper.wait_for_nodes_ready(timeout=3)
示例#6
0
def test_host_disk_wipe_rootfs():
    """
    This test attempts to run system host-disk-wipe on a node using the rootfs
    disk.  Command format is:

    system host-disk-wipe [--confirm] <hostname or id> <disk uuid>

    Note, host-disk-wipe is only applicable to controller and compute nodes. It
    cannot be used on the rootfs disk.  It cannot be used for a disk that is
    used by a PV or has partitions used by a PV.

    Arguments:
    - None

    Test Steps:
    1.  Determine which is the rootfs disk
    2.  Attempt to wipe the disk
    3.  Expect it to fail for every node

    Assumptions:
    - None
    """
    computes = system_helper.get_hosts(personality="compute")
    storage = system_helper.get_hosts(personality="storage")
    hosts = system_helper.get_controllers() + computes + storage

    LOG.tc_step("Gather rootfs disks")
    rootfs = storage_helper.get_hosts_rootfs(hosts)

    for host in rootfs:
        uuid = rootfs[host]
        LOG.tc_step("Attempting to wipe {} from {}".format(uuid[0], host))
        cmd = 'host-disk-wipe --confirm {} {}'.format(host, uuid[0])
        rc, out = cli.system(cmd, fail_ok=True)
        assert rc != 0, "Expected wipe disk to fail but instead succeeded"
示例#7
0
def test_modify_second_last_partition():
    """
    This test attempts to modify a partition that is not the last.  It is
    expected to fail, since only the very last partition can be modified.

    Arguments:
    * None

    Test steps:
    * Create partition1
    * Create partition2
    * Attempt to modify partition1

    Teardown:
    * None
    """

    global partitions_to_restore
    partitions_to_restore = {}

    computes = system_helper.get_hosts(personality="compute")
    hosts = system_helper.get_controllers() + computes

    for host in hosts:
        disks = storage_helper.get_host_disks(host)
        free_disks = storage_helper.get_host_disks_with_free_space(host, disks)
        if not free_disks:
            continue

        partitions_to_restore[host] = []
        for disk_uuid in free_disks:
            size_gib = float(free_disks[disk_uuid])
            partition_size = "1"
            partition_chunks = size_gib / int(partition_size)
            if partition_chunks < 3:
                LOG.info(
                    "Skip disk {} due to insufficient space".format(disk_uuid))
                continue

            LOG.info("Creating first partition on {}".format(host))
            uuid = storage_helper.create_host_partition(
                host, disk_uuid, partition_size)[1]
            partitions_to_restore[host].append(uuid)

            LOG.info("Creating second partition on {}".format(host))
            uuid1 = storage_helper.create_host_partition(
                host, disk_uuid, partition_size)[1]
            partitions_to_restore[host].append(uuid1)

            LOG.tc_step(
                "Modifying partition {} from size {} to size {} from host {} on disk {}"
                .format(uuid, partition_size,
                        int(partition_size) + 1, host, disk_uuid))
            rc, out = storage_helper.modify_host_partition(
                host, uuid, int(partition_size) + 1, fail_ok=True)
            assert rc != 0, "Partition modification was expected to fail but instead was successful"
def check_host(controller):
    host = system_helper.get_active_controller_name()
    if controller == 'standby':
        controllers = system_helper.get_controllers(availability=(HostAvailState.AVAILABLE, HostAvailState.DEGRADED,
                                                                  HostAvailState.ONLINE))
        controllers.remove(host)
        if not controllers:
            skip('Standby controller does not exist or not in good state')
        host = controllers[0]
    return host
示例#9
0
def test_delete_host_partitions():
    """
    This test creates host partitions and the teardown deletes them.

    Arguments:
    * None

    Test Steps:
    * Create a partition on each host

    Teardown:
    * Re-create those partitions
    """
    global partitions_to_restore
    partitions_to_restore = {}

    computes = system_helper.get_hosts(personality="compute")
    hosts = system_helper.get_controllers() + computes

    usable_disks = False
    for host in hosts:
        disks = storage_helper.get_host_disks(host)
        free_disks = storage_helper.get_host_disks_with_free_space(host, disks)
        if not free_disks:
            continue

        for disk_uuid in free_disks:
            size_gib = float(free_disks[disk_uuid])
            partition_chunks = int(size_gib)
            if partition_chunks < 2:
                LOG.info(
                    "Skip disk {} due to insufficient space".format(disk_uuid))
                continue
            usable_disks = True
            LOG.info("Creating partition on {}".format(host))
            rc, out = storage_helper.create_host_partition(host,
                                                           disk_uuid,
                                                           "1",
                                                           fail_ok=False,
                                                           wait=False)
            assert rc == 0, "Partition creation was expected to succeed but instead failed"
            # Check that first disk was created
            uuid = table_parser.get_value_two_col_table(
                table_parser.table(out), "uuid")
            storage_helper.wait_for_host_partition_status(host=host,
                                                          uuid=uuid,
                                                          timeout=CP_TIMEOUT)
            partitions_to_restore[host] = []
            partitions_to_restore[host].append(uuid)
            # Only test one disk on each host
            break

    if not usable_disks:
        skip("Did not find disks with sufficient space to test with.")
示例#10
0
def test_enable_tpm(swact_first):
    con_ssh = ControllerClient.get_active_controller()

    LOG.tc_step('Check if TPM is already configured')
    code, cert_id, cert_type = get_tpm_status(con_ssh)

    if code == 0:
        LOG.info('TPM already configured on the lab, cert_id:{}, cert_type:{}'.
                 format(cert_id, cert_type))

        LOG.tc_step('disable TPM first in order to test enabling TPM')
        code, output = remove_cert_from_tpm(con_ssh,
                                            fail_ok=False,
                                            check_first=False)
        assert 0 == code, 'failed to disable TPM'
        time.sleep(30)

        LOG.info('Waiting alarm: out-of-config cleaned up')
        system_helper.wait_for_alarm_gone(EventLogID.CONFIG_OUT_OF_DATE)

    else:
        LOG.info('TPM is NOT configured on the lab')
        LOG.info('-code:{}, cert_id:{}, cert_type:{}'.format(
            code, cert_id, cert_type))

    if swact_first:
        LOG.tc_step('Swact the active controller as instructed')

        if len(system_helper.get_controllers()) < 2:
            LOG.info('Less than 2 controllers, skip swact')
        else:
            host_helper.swact_host(fail_ok=False)
            copy_config_from_local(
                con_ssh, local_conf_backup_dir,
                os.path.join(HostLinuxUser.get_home(), conf_backup_dir))

    LOG.tc_step('Install HTTPS Certificate into TPM')
    code, output = store_cert_into_tpm(
        con_ssh,
        check_first=False,
        fail_ok=False,
        pem_password=HostLinuxUser.get_password())
    assert 0 == code, 'Failed to instll certificate into TPM, cert-file'

    LOG.info('OK, certificate is installed into TPM')

    LOG.info('Wait the out-of-config alarm cleared')
    system_helper.wait_for_alarm_gone(EventLogID.CONFIG_OUT_OF_DATE)

    LOG.tc_step(
        'Verify the configurations changes for impacted components, expecting all changes exit'
    )
    verify_configuration_changes(expected=True, connection=con_ssh)
示例#11
0
def test_create_many_small_host_partitions_on_a_single_host():
    """
    This test attempts to create multiple tiny partitions on a single host.

    Assumptions:
    * There's some free disk space available

    Test steps:
    * Query the hosts to determine disk space
    * Create small partitions until the disk space is consumed
    * Repeat on all applicable hosts

    Teardown:
    * Delete created partitions

    """

    global partitions_to_restore
    partitions_to_restore = {}

    computes = system_helper.get_hosts(personality="compute")
    hosts = system_helper.get_controllers() + computes

    usable_disks = False
    for host in hosts:
        partitions_to_restore[host] = []
        disks = storage_helper.get_host_disks(host)
        free_disks = storage_helper.get_host_disks_with_free_space(host, disks)
        if not free_disks:
            continue
        for disk_uuid in free_disks:
            size_gib = float(free_disks[disk_uuid])
            num_partitions = 2
            if size_gib <= num_partitions:
                LOG.info("Skipping disk {} due to insufficient space".format(
                    disk_uuid))
                continue
            partition_chunks = int(size_gib / num_partitions)
            usable_disks = True
            LOG.info("Creating partition on {}".format(host))
            # partitions_to_restore[host] = []
            for i in range(0, num_partitions):
                uuid = storage_helper.create_host_partition(
                    host, disk_uuid, partition_chunks)[1]
                partitions_to_restore[host].append(uuid)
            # Only test one disk on each host
            break
        # Only test one host (otherwise takes too long)
        if usable_disks:
            break

    if not usable_disks:
        skip("Did not find disks with sufficient space to test with.")
示例#12
0
def wait_for_con_drbd_sync_complete():
    if len(
            system_helper.get_controllers(
                administrative=HostAdminState.UNLOCKED)) < 2:
        LOG.info(
            "Less than two unlocked controllers on system. Do not wait for drbd sync"
        )
        return False

    host = 'controller-1'
    LOG.fixture_step(
        "Waiting for controller-1 drbd sync alarm gone if present")
    end_time = time.time() + 1200
    while time.time() < end_time:
        drbd_alarms = system_helper.get_alarms(
            alarm_id=EventLogID.CON_DRBD_SYNC,
            reason_text='drbd-',
            entity_id=host,
            strict=False)

        if not drbd_alarms:
            LOG.info("{} drbd sync alarm is cleared".format(host))
            break
        time.sleep(10)

    else:
        assert False, "drbd sync alarm {} is not cleared within timeout".format(
            EventLogID.CON_DRBD_SYNC)

    LOG.fixture_step(
        "Wait for {} becomes available in system host-list".format(host))
    system_helper.wait_for_host_values(host,
                                       availability=HostAvailState.AVAILABLE,
                                       timeout=120,
                                       fail_ok=False,
                                       check_interval=10)

    LOG.fixture_step(
        "Wait for {} drbd-cinder in sm-dump to reach desired state".format(
            host))
    host_helper.wait_for_sm_dump_desired_states(host,
                                                'drbd-',
                                                strict=False,
                                                timeout=30,
                                                fail_ok=False)
    return True
示例#13
0
def wait_for_tmp_status(cert_id, ssh_client=None, expected_status=''):
    rc, actual_id, actual_mode, actual_states = get_cert_info(
        cert_id, con_ssh=ssh_client)
    LOG.info('auctual_id={}, actual_mode={}, actual_states={}'.format(
        actual_id, actual_mode, actual_states))

    controllers = system_helper.get_controllers(con_ssh=ssh_client)
    if expected_status == 'tpm-config-applied':
        for h in controllers:
            if h not in actual_states[
                    'state'] or actual_states['state'][h] != expected_status:
                return 1, '{} is not in expected status: {}'.format(
                    h, expected_status)

        return 0, 'all controllers:{} are in expected status:{}'.format(
            controllers, expected_status)

    elif rc != 0:
        return 0, 'no detailed information as expected'

    return 1, 'did not get expected status, continue to wait'
示例#14
0
def test_increase_controllerfs():
    """
    This test increases the size of the various controllerfs filesystems all at
    once.

    Arguments:
    - None

    Test Steps:
    - Query the filesystem for their current size
    - Increase the size of each filesystem at once

    Assumptions:
    - There is sufficient free space to allow for an increase, otherwise skip
      test.

    """
    drbdfs_val = {}
    LOG.tc_step("Determine the space available for each drbd filesystem")
    for fs in DRBDFS:
        drbdfs_val[fs] = storage_helper.get_controllerfs_values(fs)[0]
        LOG.info("Current value of {} is {}".format(fs, drbdfs_val[fs]))
        drbdfs_val[fs] = drbdfs_val[fs] + 1
        LOG.info("Will attempt to increase the value of {} to {}".format(
            fs, drbdfs_val[fs]))

    LOG.tc_step("Increase the size of all filesystems")
    storage_helper.modify_controllerfs(**drbdfs_val)
    # Need to wait until the change takes effect before checking the
    # filesystems
    hosts = system_helper.get_controllers()
    for host in hosts:
        system_helper.wait_for_alarm_gone(
            alarm_id=EventLogID.CONFIG_OUT_OF_DATE,
            entity_id="host={}".format(host),
            timeout=600)

    LOG.tc_step(
        "Confirm the underlying filesystem size matches what is expected")
    storage_helper.check_controllerfs(**drbdfs_val)
示例#15
0
def test_disable_tpm(swact_first):
    ssh_client = ControllerClient.get_active_controller()

    LOG.tc_step('Check if TPM is already configured')
    code, cert_id, cert_type = get_tpm_status(ssh_client)

    if code == 0:
        LOG.info('TPM is configured on the lab')

        if swact_first:
            LOG.tc_step('Swact the active controller as instructed')
            if len(system_helper.get_controllers()) < 2:
                LOG.info('Less than 2 controllers, skip swact')
            else:
                host_helper.swact_host(fail_ok=False)
                copy_config_from_local(
                    ssh_client, local_conf_backup_dir,
                    os.path.join(HostLinuxUser.get_home(), conf_backup_dir))

        LOG.tc_step('Disabling TPM')
        code, output = remove_cert_from_tpm(ssh_client,
                                            fail_ok=False,
                                            check_first=False)
        assert 0 == code, 'failed to disable TPM'

        LOG.info('Wait the out-of-config alarm cleared')
        system_helper.wait_for_alarm_gone(EventLogID.CONFIG_OUT_OF_DATE)

        LOG.tc_step(
            'Verify the configurations changes for impacted components, DO NOT expect any of the changes'
        )
        verify_configuration_changes(expected=False, connection=ssh_client)

    else:
        LOG.info('TPM is NOT configured on the lab, skip the test')
        skip('TPM is NOT configured on the lab, skip the test')
示例#16
0
def test_delete_heat_after_swact(template_name):
    """
    Test if a heat stack can be deleted after swact:

    Args:
        template_name (str): e.g, OS_Cinder_Volume.

    =====
    Prerequisites (skip test if not met):
        - at least two hypervisors hosts on the system

    Test Steps:
        - Create a heat stack with the given template
        - Verify heat stack is created sucessfully
        - Verify heat resources are created
        - Swact controllers
        - Delete Heat stack and verify resource deletion

    """
    if len(system_helper.get_controllers()) < 2:
        skip(SkipSysType.LESS_THAN_TWO_CONTROLLERS)

    # add test step
    verify_basic_template(template_name, delete_after_swact=True)
示例#17
0
def _test_status_firewall_reboot():
    """
    Test iptables status after reboot of controller

    Test Steps:
        - Stop iptables service
        - Confirm iptables service has stopped
        - Reboot the controller being tested
        - Confirm iptables service is online
        - Repeat for second controller
    """
    LOG.tc_step("Getting the controller(s)")
    controllers = system_helper.get_controllers()
    for controller in controllers:
        with host_helper.ssh_to_host(controller) as con_ssh:
            LOG.tc_step("Stopping iptables service")
            cmd = 'service iptables stop'
            con_ssh.exec_sudo_cmd(cmd)
            LOG.tc_step("checking iptables status")
            cmd = 'service iptables status'
            code, output = con_ssh.exec_sudo_cmd(cmd)
            assert 'Active: inactive' or 'Active: failed' in output, "iptables service did not stop running on host {}"\
                .format(controller)

        LOG.tc_step("Rebooting {}".format(controller))
        HostsToRecover.add(controller)
        host_helper.reboot_hosts(controller)

        with host_helper.ssh_to_host(controller) as con_ssh:
            LOG.tc_step(
                "Checking iptables status on host {} after reboot".format(
                    controller))
            cmd = 'service iptables status | grep --color=never Active'
            code, output = con_ssh.exec_sudo_cmd(cmd)
            assert 'active' in output, "iptables service did not start after reboot on host {}".format(
                controller)
示例#18
0
def less_than_two_controllers(con_ssh=None,
                              auth_info=Tenant.get('admin_platform')):
    return len(
        system_helper.get_controllers(con_ssh=con_ssh,
                                      auth_info=auth_info)) < 2
def less_than_two_cons(no_openstack):
    return len(system_helper.get_controllers()) < 2
示例#20
0
def test_increase_host_partition_size_beyond_avail_disk_space():
    """
    This test attempts to increase the size of an existing host partition
    beyond the available space on disk.  It is expected to fail.

    Assumptions:
    * Partitions are available in Ready state.

    Test steps:
    * Create partition
    * Modify the partition to consume over than the available disk space

    Teardown:
    * Delete created partitions

    """

    global partitions_to_restore
    partitions_to_restore = {}

    computes = system_helper.get_hosts(personality="compute")
    hosts = system_helper.get_controllers() + computes

    usable_disks = False
    for host in hosts:
        disks = storage_helper.get_host_disks(host)
        free_disks = storage_helper.get_host_disks_with_free_space(host, disks)
        if not free_disks:
            continue

        for disk_uuid in free_disks:
            size_gib = float(free_disks[disk_uuid])
            partition_chunks = int(size_gib)
            if partition_chunks < 2:
                LOG.info(
                    "Skip disk {} due to insufficient space".format(disk_uuid))
                continue
            usable_disks = True
            LOG.info("Creating partition on {}".format(host))
            rc, out = storage_helper.create_host_partition(host,
                                                           disk_uuid,
                                                           "1",
                                                           fail_ok=False,
                                                           wait=False)
            assert rc == 0, "Partition creation was expected to succeed but instead failed"
            # Check that first disk was created
            uuid = table_parser.get_value_two_col_table(
                table_parser.table(out), "uuid")
            storage_helper.wait_for_host_partition_status(host=host,
                                                          uuid=uuid,
                                                          timeout=CP_TIMEOUT)
            partitions_to_restore[host] = []
            partitions_to_restore[host].append(uuid)

            device_node = storage_helper.get_host_partition_values(
                host, uuid, "device_node")[0]
            device_node = device_node.rstrip(string.digits)
            if device_node.startswith("/dev/nvme"):
                device_node = device_node[:-1]
            size_gib += 1
            LOG.tc_step(
                "Modifying partition {} from size 1 to size {} from host {} on device node {}"
                .format(uuid, int(size_gib), host, device_node))
            rc, out = storage_helper.modify_host_partition(host,
                                                           uuid,
                                                           str(int(size_gib)),
                                                           fail_ok=True)
            assert rc != 0, "Expected partition modification to fail and instead it succeeded"
            LOG.info(out)
            # Only test one disk on each host
            break

    if not usable_disks:
        skip("Did not find disks with sufficient space to test with.")
示例#21
0
def test_resize_drbd_filesystem_while_resize_inprogress():
    """
    This test attempts to resize a drbd filesystem while an existing drbd
    resize is in progress.  This should be rejected.

    Arguments:
    - None

    Test steps:
    1.  Increase the size of backup to allow for test to proceed.
    2.  Wait for alarms to clear and then check the underlying filesystem is
    updated
    2.  Attempt to resize the glance filesystem.  This should be successful.
    3.  Attempt to resize cgcs again immediately.  This should be rejected.

    Assumptions:
    - None

    """

    start_time = common.get_date_in_format()
    drbdfs_val = {}
    fs = "extension"
    LOG.tc_step(
        "Increase the {} size before proceeding with rest of test".format(fs))
    drbdfs_val[fs] = storage_helper.get_controllerfs_values(fs)[0]
    LOG.info("Current value of {} is {}".format(fs, drbdfs_val[fs]))
    drbdfs_val[fs] = int(drbdfs_val[fs]) + 5
    LOG.info("Will attempt to increase the value of {} to {}".format(
        fs, drbdfs_val[fs]))
    LOG.tc_step("Increase the size of filesystems")
    storage_helper.modify_controllerfs(**drbdfs_val)

    hosts = system_helper.get_controllers()
    for host in hosts:
        system_helper.wait_for_events(
            event_log_id=EventLogID.CONFIG_OUT_OF_DATE,
            start=start_time,
            entity_instance_id="host={}".format(host),
            strict=False,
            **{'state': 'set'})

    for host in hosts:
        system_helper.wait_for_alarm_gone(
            alarm_id=EventLogID.CONFIG_OUT_OF_DATE,
            entity_id="host={}".format(host),
            timeout=600)

    LOG.tc_step(
        "Confirm the underlying filesystem size matches what is expected")
    storage_helper.check_controllerfs(**drbdfs_val)

    drbdfs_val = {}
    fs = "database"
    LOG.tc_step("Determine the current filesystem size")
    value = storage_helper.get_controllerfs_values(fs)[0]
    LOG.info("Current value of {} is {}".format(fs, value))
    drbdfs_val[fs] = int(value) + 1
    LOG.info("Will attempt to increase the value of {} to {}".format(
        fs, drbdfs_val[fs]))

    LOG.tc_step("Increase the size of filesystems")
    storage_helper.modify_controllerfs(**drbdfs_val)

    LOG.tc_step("Attempt to increase the size of the filesystem again")
    drbdfs_val[fs] = int(drbdfs_val[fs]) + 1
    code = storage_helper.modify_controllerfs(fail_ok=True, **drbdfs_val)[0]
    assert 1 == code, "Filesystem modify succeeded while failure is expected: {}".format(
        drbdfs_val)

    # Appearance of sync alarm is delayed so wait for it to appear and then
    # clear
    if not system_helper.is_aio_simplex():
        system_helper.wait_for_alarm(alarm_id=EventLogID.CON_DRBD_SYNC,
                                     timeout=300)
        system_helper.wait_for_alarm_gone(alarm_id=EventLogID.CON_DRBD_SYNC,
                                          timeout=300)
示例#22
0
def test_modify_drdb_swact_then_reboot():
    """
    This test modifies the size of the drbd based filesystems, does and
    immediate swact and then reboots the active controller.

    Arguments:
    - None

    Test Steps:
    - Determine how much free space we have available
    - Increase datebase
    - Increase extension
    - Initiate a controller swact
    - Initiate a controller reboot

    Assumptions:
    - None

    """

    drbdfs = DRBDFS
    con_ssh = ControllerClient.get_active_controller()

    LOG.tc_step("Determine the available free space on the system")
    cmd = "vgdisplay -C --noheadings --nosuffix -o vg_free --units g cgts-vg"
    rc, out = con_ssh.exec_sudo_cmd(cmd)
    free_space = out.lstrip()
    LOG.info("Available free space on the system is: {}".format(free_space))
    if float(free_space) <= 10:
        skip("Not enough free space to complete test.")

    drbdfs_val = {}
    LOG.tc_step("Determine the space available for each drbd fs")
    for fs in drbdfs:
        table_ = table_parser.table(
            cli.system('controllerfs-show {}'.format(fs))[1])
        drbdfs_val[fs] = table_parser.get_value_two_col_table(table_, 'size')

    LOG.info("Current fs values are: {}".format(drbdfs_val))

    LOG.tc_step("Increase the size of the extension and database filesystem")
    partition_name = "database"
    partition_value = drbdfs_val[partition_name]
    backup_freespace = math.trunc(float(free_space) / 10)
    new_partition_value = backup_freespace + int(partition_value)
    cmd = "controllerfs-modify {}={}".format(partition_name,
                                             new_partition_value)
    cli.system(cmd)

    partition_name = "extension"
    partition_value = drbdfs_val[partition_name]
    cgcs_freespace = math.trunc(backup_freespace / 2)
    new_partition_value = cgcs_freespace + int(partition_value)
    cmd = "controllerfs-modify {}={}".format(partition_name,
                                             new_partition_value)
    cli.system(cmd)

    hosts = system_helper.get_controllers()
    for host in hosts:
        system_helper.wait_for_alarm_gone(
            alarm_id=EventLogID.CONFIG_OUT_OF_DATE,
            entity_id="host={}".format(host),
            timeout=600)
    standby_cont = system_helper.get_standby_controller_name()
    system_helper.wait_for_host_values(standby_cont,
                                       availability=HostAvailState.AVAILABLE)
    host_helper.swact_host()

    act_cont = system_helper.get_active_controller_name()
    host_helper.reboot_hosts(act_cont)

    time.sleep(5)

    system_helper.wait_for_alarm_gone(
        alarm_id=EventLogID.HOST_RECOVERY_IN_PROGRESS,
        entity_id="host={}".format(act_cont),
        timeout=600)
示例#23
0
def test_decrease_host_partition_size():
    """
    This test attempts to decrease the size of an existing host partition.  It
    is expected to fail since decreasing the size of a partition is not
    supported.


    Test Steps:
    * Create a partition
    * Modify the partition to decrease its size

    Teardown:
    * Delete created partition

    """
    global partitions_to_restore
    partitions_to_restore = {}

    computes = system_helper.get_hosts(personality="compute")
    hosts = system_helper.get_controllers() + computes

    usable_disks = False
    for host in hosts:
        disks = storage_helper.get_host_disks(host)
        free_disks = storage_helper.get_host_disks_with_free_space(host, disks)
        if not free_disks:
            continue

        for disk_uuid in free_disks:
            size_gib = float(free_disks[disk_uuid])
            partition_chunks = int(size_gib)
            if partition_chunks < 2:
                LOG.info(
                    "Skip disk {} due to insufficient space".format(disk_uuid))
                continue
            usable_disks = True
            LOG.info("Creating partition on {}".format(host))
            rc, out = storage_helper.create_host_partition(host,
                                                           disk_uuid,
                                                           "1",
                                                           fail_ok=False,
                                                           wait=False)
            assert rc == 0, "Partition creation was expected to succeed but instead failed"
            # Check that first disk was created
            uuid = table_parser.get_value_two_col_table(
                table_parser.table(out), "uuid")
            storage_helper.wait_for_host_partition_status(host=host,
                                                          uuid=uuid,
                                                          timeout=CP_TIMEOUT)
            partitions_to_restore[host] = []
            partitions_to_restore[host].append(uuid)

            device_node, size_gib = storage_helper.get_host_partition_values(
                host, uuid, ("device_node", "size_gib"))
            total_size = int(size_gib) - 1
            LOG.tc_step(
                "Modifying partition {} from size {} to size {} from host {} on device node {}"
                .format(uuid, int(size_gib), str(total_size), host,
                        device_node[:-1]))
            rc, out = storage_helper.modify_host_partition(host,
                                                           uuid,
                                                           str(total_size),
                                                           fail_ok=True)
            assert rc != 0, "Expected partition modification to fail and instead it succeeded"
            # Only test one disk on each host
            break

    if not usable_disks:
        skip("Did not find disks with sufficient space to test with.")
示例#24
0
def _test_increase_ceph_mon():
    """
    Increase the size of ceph-mon.  Only applicable to a storage system.

    Fails until CGTS-8216

    Test steps:
    1.  Determine the current size of ceph-mon
    2.  Attempt to modify ceph-mon to invalid values
    3.  Check if there is free space to increase ceph-mon
    4.  Attempt to increase ceph-mon
    5.  Wait for config out-of-date alarms to raise
    6.  Lock/unlock all affected nodes (controllers and storage)
    7.  Wait for alarms to clear
    8.  Check that ceph-mon has the correct updated value

    Enhancement:
    1.  Possibly check there is enough disk space for ceph-mon to increase.  Not sure if
    this is required since there always seems to be some space on the rootfs.

    """
    table_ = table_parser.table(cli.system("ceph-mon-list")[1])
    ceph_mon_gib = table_parser.get_values(table_, "ceph_mon_gib",
                                           **{"hostname": "controller-0"})[0]
    LOG.info("ceph_mon_gib is currently: {}".format(ceph_mon_gib))

    LOG.tc_step("Attempt to modify ceph-mon to invalid values")
    invalid_cmg = ['19', '41', 'fds']
    for value in invalid_cmg:
        host = "controller-0"
        cli.system("ceph-mon-modify {} ceph_mon_gib={}".format(host, value),
                   fail_ok=True)

    if int(ceph_mon_gib) >= 30:
        skip("Insufficient disk space to execute test")

    ceph_mon_gib_avail = 40 - int(ceph_mon_gib)
    new_ceph_mon_gib = math.trunc(ceph_mon_gib_avail / 10) + int(ceph_mon_gib)

    LOG.tc_step("Increase ceph_mon_gib to {}".format(new_ceph_mon_gib))
    hosts = system_helper.get_controllers()
    for host in hosts:
        cli.system("ceph-mon-modify {} ceph_mon_gib={}".format(
            host, new_ceph_mon_gib))
        # We only need to do this for one controller now and it applies to both
        break

    LOG.info("Wait for expected alarms to appear")
    storage_hosts = system_helper.get_storage_nodes()
    total_hosts = hosts + storage_hosts
    for host in total_hosts:
        system_helper.wait_for_alarm(alarm_id=EventLogID.CONFIG_OUT_OF_DATE,
                                     entity_id="host={}".format(host))

    LOG.tc_step("Lock/unlock all affected nodes")
    for host in storage_hosts:
        HostsToRecover.add(host)
        host_helper.lock_host(host)
        host_helper.unlock_host(host)
        system_helper.wait_for_alarm_gone(
            alarm_id=EventLogID.CONFIG_OUT_OF_DATE,
            entity_id="host={}".format(host))
        time.sleep(10)

    standby = system_helper.get_standby_controller_name()
    active = system_helper.get_active_controller_name()
    HostsToRecover.add(standby)
    host_helper.lock_host(standby)
    host_helper.unlock_host(standby)
    system_helper.wait_for_alarm_gone(alarm_id=EventLogID.CONFIG_OUT_OF_DATE,
                                      entity_id="host={}".format(standby))
    time.sleep(10)
    host_helper.swact_host(active)
    HostsToRecover.add(active)
    host_helper.lock_host(active)
    host_helper.unlock_host(active)
    system_helper.wait_for_alarm_gone(alarm_id=EventLogID.CONFIG_OUT_OF_DATE,
                                      entity_id="host={}".format(active))

    table_ = table_parser.table(cli.system("ceph-mon-list")[1])
    ceph_mon_gib = table_parser.get_values(table_, "ceph_mon_gib",
                                           **{"hostname": "controller-0"})[0]
    assert ceph_mon_gib != new_ceph_mon_gib, "ceph-mon did not change"
示例#25
0
def test_attempt_host_unlock_during_partition_creation():
    """
    This test attempts to unlock a host while a partition is being created.  It
    is expected to fail.

    Assumptions:
    * There's some free disk space available

    Test steps:
    * Query the hosts to determine disk space
    * Lock host
    * Create a partition but don't wait for completion
    * Attempt to unlock the host that is hosting the partition that is created

    Teardown:
    * Delete created partitions

    DISABLED since unlock while creating is not blocked.

    """

    global partitions_to_restore
    partitions_to_restore = {}

    computes = system_helper.get_hosts(personality="compute")
    hosts = system_helper.get_controllers() + computes

    # Filter out active controller
    active_controller = system_helper.get_active_controller_name()
    print("This is active controller: {}".format(active_controller))
    hosts.remove(active_controller)

    usable_disks = False
    for host in hosts:
        disks = storage_helper.get_host_disks(host)
        free_disks = storage_helper.get_host_disks_with_free_space(host, disks)
        if not free_disks:
            continue

        for uuid in free_disks:
            size_gib = float(free_disks[uuid])
            if size_gib < 2.0:
                LOG.info("Skip this disk due to insufficient space")
                continue

            LOG.tc_step("Lock {} and create a partition for disk {}".format(
                host, uuid))
            HostsToRecover.add(host)
            host_helper.lock_host(host)
            usable_disks = True
            LOG.info("Creating partition on {}".format(host))
            rc, out = storage_helper.create_host_partition(host,
                                                           uuid,
                                                           int(size_gib),
                                                           wait=False)
            uuid = table_parser.get_value_two_col_table(
                table_parser.table(out), "uuid")
            partitions_to_restore[host] = []
            partitions_to_restore[host].append(uuid)

            LOG.tc_step(
                "Attempt to unlock host and ensure it's rejected when partition is "
                "being created")
            rc_ = host_helper.unlock_host(host,
                                          fail_ok=True,
                                          check_first=False)[0]
            assert rc_ != 0, "Unlock attempt unexpectedly passed"

            LOG.tc_step("wait for partition to be created")
            storage_helper.wait_for_host_partition_status(host=host,
                                                          uuid=uuid,
                                                          timeout=CP_TIMEOUT)

            container_helper.wait_for_apps_status(apps='platform-integ-apps',
                                                  status=AppStatus.APPLIED,
                                                  check_interval=10)
            # Only test one disk on each host
            break
        # Do it on one host only
        break

    if not usable_disks:
        skip("Did not find disks with sufficient space to test with.")
示例#26
0
def _test_create_partition_and_associate_with_pv_cgts_vg():
    """
    This test attempt to create a partition and then associate it with a PV
    (physical volume), resulting in the partition being In-use.

    Assumptions:
    * There's some free disk space available

    Test steps:
    * Query hosts to determine disk space
    * Create partition
    * Associate it with cgts-vg PV
    * Checks the partition is in-use state
    * Attempts to delete the partition that is in-use.  It should fail.
    * Attempt to assign the in-use partition to another PV.  It should fail.

    Teardown:
    * None

    DISABLING: This fails since the partition says 'adding on unlock'.  Should
    it be in-service?  Follow up with dev.
    """

    global partitions_to_restore
    partitions_to_restore = {}

    if not system_helper.is_aio_system():
        skip("This test requires an AIO system.")

    hosts = system_helper.get_controllers()

    for host in hosts:
        disks = storage_helper.get_host_disks(host)
        free_disks = storage_helper.get_host_disks_with_free_space(host, disks)
        if not free_disks:
            continue
        for uuid in free_disks:
            size_gib = float(free_disks[uuid])
            if size_gib <= 1:
                LOG.tc_step("Skip this disk due to insufficient space")
                continue
            LOG.info("Creating partition on {}".format(host))
            rc, out = storage_helper.create_host_partition(host, uuid, "1")
            uuid = table_parser.get_value_two_col_table(
                table_parser.table(out), "uuid")
            partitions_to_restore[host] = []
            partitions_to_restore[host].append(uuid)
            LOG.tc_step("Associating partition {} with cgts-vg".format(uuid))
            # cmd = "host-pv-add -t partition {} cgts-vg {}".format(host, uuid)
            cmd = "host-pv-add {} cgts-vg {}".format(host, uuid)
            rc, out = cli.system(cmd)
            assert rc == 0, "Associating partition with PV failed"
            LOG.tc_step("Check that partition is In-use state")
            storage_helper.wait_for_host_partition_status(
                host=host,
                uuid=uuid,
                final_status=PartitionStatus.IN_USE,
                interim_status=PartitionStatus.READY,
                timeout=CP_TIMEOUT)
            LOG.tc_step("Attempt to delete In-Use partition")
            rc, out = storage_helper.delete_host_partition(host,
                                                           uuid,
                                                           fail_ok=True)
            assert rc != 0, "Partition deletion was expected to fail but instead passed"
            LOG.tc_step(
                "Attempt to associate the In-Use partition with another PV")
            # cmd = "host-pv-add -t partition {} nova-local {}".format(host, uuid)
            cmd = "host-pv-add {} nova-local {}".format(host, uuid)
            rc, out = cli.system(cmd)
            assert rc != 0, "Partition association succeeded but was expected to fail"
            # Only test one disk on each host
            break
        # Do it on one host only
        break
示例#27
0
def test_increase_extensionfs_with_alarm():
    """
    This test increases the size of the extenteion controllerfs filesystems while there is an alarm condition for the
    fs.

    Arguments:
    - None

    Test Steps:
    - Query the filesystem for their current size
    - cause an alarm condition by filling the space on that fs
    - verify controller-0 is degraded
    - Increase the size of extension filesystem.
    - Verify alarm is gone

    Assumptions:
    - There is sufficient free space to allow for an increase, otherwise skip
      test.
    """
    file_loc = "/opt/extension"
    cmd = "cd " + file_loc
    file_path = file_loc + "/" + "testFile"
    drbdfs_val = {}
    fs = "extension"

    active_controller = system_helper.get_active_controller_name()

    LOG.tc_step("Determine the space available for extension filesystem")
    drbdfs_val[fs] = storage_helper.get_controllerfs_values(fs)[0]
    LOG.info("Current value of {} is {}".format(fs, drbdfs_val[fs]))

    # get the 91% of the current size
    LOG.info(
        "Will attempt to fill up the space to 90% of fs {} of value of {}".
        format(fs, drbdfs_val[fs]))
    file_size = int((drbdfs_val[fs] * 0.91) * 1000)
    file_size = str(file_size) + "M"
    cmd1 = "fallocate -l {} testFile".format(file_size)
    con_ssh = ControllerClient.get_active_controller()
    con_ssh.exec_cmd(cmd)
    con_ssh.exec_sudo_cmd(cmd1)
    if not con_ssh.file_exists(file_path=file_path):
        LOG.info("File {} is not created".format(file_path))
        return 0

    # fill_in_fs(size=file_size)
    LOG.tc_step(
        "Verifying that the alarm is created after filling the fs space in {}".
        format(fs))
    system_helper.wait_for_alarm(alarm_id="100.104",
                                 entity_id=active_controller,
                                 timeout=600,
                                 strict=False)

    # verify the controller is in degraded state
    LOG.tc_step(
        "Verifying controller is degraded after filling the fs space in {}".
        format(fs))
    system_helper.wait_for_host_values(active_controller,
                                       availability='degraded')

    drbdfs_val[fs] = drbdfs_val[fs] + 2

    LOG.info("Will attempt to increase the value of {} to {}".format(
        fs, drbdfs_val[fs]))

    LOG.tc_step("Increase the size of extension filesystem")
    storage_helper.modify_controllerfs(**drbdfs_val)

    # Need to wait until the change takes effect before checking the
    # filesystems
    hosts = system_helper.get_controllers()
    for host in hosts:
        system_helper.wait_for_alarm_gone(
            alarm_id=EventLogID.CONFIG_OUT_OF_DATE,
            entity_id="host={}".format(host),
            timeout=600)
        LOG.tc_step(
            "Verifying that the alarm is cleared after increasing the fs space in {}"
            .format(fs))
        system_helper.wait_for_alarm_gone(alarm_id="100.104",
                                          entity_id="host={}".format(host),
                                          timeout=600,
                                          strict=False)

    LOG.tc_step(
        "Confirm the underlying filesystem size matches what is expected")
    storage_helper.check_controllerfs(**drbdfs_val)

    # verify the controller is in available state
    LOG.tc_step(
        "Verifying that the controller is in available state after increasing the fs space in {}"
        .format(fs))
    system_helper.wait_for_host_values(active_controller,
                                       availability='available')
示例#28
0
def test_create_multiple_partitions_on_single_host():
    """
    This test attempts to create multiple partitions at once on a single host.
    While the first partition is being created, we will attempt to create a
    second partition.  The creation of the second partition should be rejected
    but the creation of the first partition should be successful.

    Assumptions:
    * There's some free disk space available

    Test steps:
    * Query the hosts to determine disk space
    * Create a small partition but don't wait for creation
    * Immediately create a second small partition
    * Check that the second partition creation is rejected
    * Check the first partition was successfully created
    * Repeat on all applicable hosts

    Teardown:
    * Delete created partitions

    """

    global partitions_to_restore
    partitions_to_restore = {}

    computes = system_helper.get_hosts(personality="compute")
    hosts = system_helper.get_controllers() + computes

    usable_disks = False
    for host in hosts:
        disks = storage_helper.get_host_disks(host)
        free_disks = storage_helper.get_host_disks_with_free_space(host, disks)
        if not free_disks:
            continue

        for disk_uuid in free_disks:
            size_gib = float(free_disks[disk_uuid])
            partition_chunks = int(size_gib)
            if partition_chunks < 2:
                LOG.info(
                    "Skip disk {} due to insufficient space".format(disk_uuid))
                continue
            usable_disks = True
            LOG.info("Creating first partition on {}".format(host))
            rc1, out1 = storage_helper.create_host_partition(host,
                                                             disk_uuid,
                                                             "1",
                                                             fail_ok=False,
                                                             wait=False)
            LOG.info("Creating second partition on {}".format(host))
            rc, out = storage_helper.create_host_partition(host,
                                                           disk_uuid,
                                                           "1",
                                                           fail_ok=True)
            assert rc != 0, "Partition creation was expected to fail but was instead successful"
            # Check that first disk was created
            uuid = table_parser.get_value_two_col_table(
                table_parser.table(out1), "uuid")
            storage_helper.wait_for_host_partition_status(host=host,
                                                          uuid=uuid,
                                                          timeout=CP_TIMEOUT)
            partitions_to_restore[host] = []
            partitions_to_restore[host].append(uuid)
            # Only test one disk on each host
            break

    if not usable_disks:
        skip("Did not find disks with sufficient space to test with.")
示例#29
0
def test_increase_host_partition_size():
    """
    Create a partition and then modify it to consume the entire disk

    Arguments:
    * None


    Test Steps:
    * Create a partition
    * Modify the partition so we consume all available space on the disk
    * Check that the disk available space goes to zero
    * Delete the partition
    * Check that the available space is freed

    Teardown:
    * Delete the partitions

    """
    global partitions_to_restore
    partitions_to_restore = {}

    computes = system_helper.get_hosts(personality="compute")
    hosts = system_helper.get_controllers() + computes

    usable_disks = False
    for host in hosts:
        disks = storage_helper.get_host_disks(host)
        free_disks = storage_helper.get_host_disks_with_free_space(host, disks)
        if not free_disks:
            continue

        for disk_uuid in free_disks:
            size_gib = float(free_disks[disk_uuid])
            partition_chunks = int(size_gib)
            if partition_chunks < 2:
                LOG.info(
                    "Skip disk {} due to insufficient space".format(disk_uuid))
                continue
            usable_disks = True
            LOG.info("Creating partition on {}".format(host))
            rc, out = storage_helper.create_host_partition(host,
                                                           disk_uuid,
                                                           "1",
                                                           fail_ok=False,
                                                           wait=False)
            assert rc == 0, "Partition creation was expected to succeed but instead failed"
            # Check that first disk was created
            uuid = table_parser.get_value_two_col_table(
                table_parser.table(out), "uuid")
            storage_helper.wait_for_host_partition_status(host=host,
                                                          uuid=uuid,
                                                          timeout=CP_TIMEOUT)
            partitions_to_restore[host] = []
            partitions_to_restore[host].append(uuid)

            device_node = storage_helper.get_host_partition_values(
                host, uuid, "device_node")[0]
            device_node = device_node.rstrip(string.digits)
            if device_node.startswith("/dev/nvme"):
                device_node = device_node[:-1]
            LOG.tc_step(
                "Modifying partition {} from size 1 to size {} from host {} on device node {}"
                .format(uuid,
                        int(size_gib) - 2, host, device_node))
            storage_helper.modify_host_partition(host, uuid,
                                                 str(int(size_gib) - 2))
            new_disk_available_gib = storage_helper.get_host_disk_values(
                host, device_node, "available_gib")[0]
            assert 0 <= int(float(new_disk_available_gib)) <= 3, \
                "Expected disk space to be consumed but instead we have {} available".format(new_disk_available_gib)
            # Only test one disk on each host
            break

    if not usable_disks:
        skip("Did not find disks with sufficient space to test with.")
示例#30
0
def test_swact_100_times():
    """
    Skip Condition:
        - Less than two controllers on system

    Test Steps:
        - Boot a vm and ensure it's pingable
        - Start writing from pre-existed vm before swacting
        - Repeat following steps 100 times:
            - ensure system has standby controller
            - system host-swact
            - ensure all services are active in sudo sm-dump on new active controller
            - ensure pre-existed vm is still pingable from NatBox
            - ensure writing did not stop on pre-existed vm
            - ensure new vm can be launched in 2 minutes
            - ensure newly booted vm is pingable from NatBox
            - delete newly booted vm

    Teardown:
        - delete vms, volumes

    """
    if len(system_helper.get_controllers()) < 2:
        skip("Less than two controllers on system")

    if not system_helper.get_standby_controller_name():
        assert False, "No standby controller on system"

    LOG.tc_step("Boot a vm and ensure it's pingable")
    vm_base = vm_helper.boot_vm(name='pre_swact', cleanup='function')[1]

    LOG.tc_step("Start writing from pre-existed vm before swacting")
    end_event = Events("End write in base vm")
    base_vm_thread = vm_helper.write_in_vm(vm_base, end_event=end_event, expect_timeout=40, thread_timeout=60*100)

    try:
        for i in range(100):
            iter_str = "Swact iter{}/100 - ".format(i+1)

            LOG.tc_step("{}Ensure system has standby controller".format(iter_str))
            standby = system_helper.get_standby_controller_name()
            assert standby

            LOG.tc_step("{}Swact active controller and ensure active controller is changed".format(iter_str))
            host_helper.swact_host()

            LOG.tc_step("{}Check all services are up on active controller via sudo sm-dump".format(iter_str))
            host_helper.wait_for_sm_dump_desired_states(controller=standby, fail_ok=False)

            LOG.tc_step("{}Ensure pre-existed vm still pingable post swact".format(iter_str))
            vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_base, timeout=45)

            time.sleep(5)
            LOG.tc_step("{}Ensure writing from pre-existed vm resumes after swact".format(iter_str))
            assert base_vm_thread.res is True, "Writing in pre-existed vm stopped after {}".format(iter_str.lower())

            LOG.tc_step("{}Attempt to boot new vm after 2 minutes of post swact and ensure it's pingable".
                        format(iter_str))
            time.sleep(60)
            for j in range(3):
                code, vm_new, msg = vm_helper.boot_vm(name='post_swact', fail_ok=True, cleanup='function')

                if code == 0:
                    break

                LOG.warning("VM failed to boot - attempt{}".format(j+1))
                vm_helper.delete_vms(vms=vm_new)
                assert j < 2, "No vm can be booted 2+ minutes after swact"

                LOG.tc_step("{}VM{} failed to boot, wait for 30 seconds and retry".format(j+1, iter_str))
                time.sleep(30)

            vm_helper.wait_for_vm_pingable_from_natbox(vm_new)

            LOG.tc_step("{}Delete the vm created".format(iter_str))
            vm_helper.delete_vms(vms=vm_new)
    except:
        raise
    finally:
        LOG.tc_step("End the base_vm_thread")
        end_event.set()
        base_vm_thread.wait_for_thread_end(timeout=20)

    post_standby = system_helper.get_standby_controller_name()
    assert post_standby, "System does not have standby controller after last swact"