예제 #1
0
def get_cert_info(cert_id, con_ssh=None):
    LOG.info('check the status of the current certificate')
    cmd = 'certificate-show ' + cert_id
    output = cli.system(cmd, ssh_client=con_ssh, fail_ok=False)[1]
    if output:
        table = table_parser.table(output)
        if table:
            actual_id = table_parser.get_value_two_col_table(table, 'uuid')
            actual_type = table_parser.get_value_two_col_table(
                table, 'certtype')
            actual_details = table_parser.get_value_two_col_table(
                table, 'details')
            actual_states = ''
            if not actual_details:
                # CGTS-9529
                LOG.fatal('No details in output of certificate-show')
                LOG.fatal(
                    'Ignore it until the known issue CGTS-9529 fixed, output:'
                    + output)
                # assert False, 'No details in output of certificate-show'
            else:
                LOG.debug('details from output of certificate-show: {}'.format(
                    actual_details))
                actual_states = eval(actual_details)
                LOG.debug('states: {}'.format(actual_states))
                return 0, actual_id, actual_type, actual_states

            LOG.info('')
            return 1, actual_id, actual_type, actual_states
    else:
        LOG.info('no "details" in output')

    return 2, '', '', ''
예제 #2
0
def test_restore(restore_setup):
    controller1 = 'controller-1'
    controller0 = 'controller-0'

    lab = restore_setup["lab"]
    is_aio_lab = lab.get('system_type', 'Standard') == 'CPE'
    is_sx = is_aio_lab and (len(lab['controller_nodes']) < 2)

    tis_backup_files = restore_setup['tis_backup_files']
    backup_src = RestoreVars.get_restore_var('backup_src'.upper())
    backup_src_path = RestoreVars.get_restore_var('backup_src_path'.upper())

    controller_node = lab[controller0]
    con_ssh = ControllerClient.get_active_controller(name=lab['short_name'],
                                                     fail_ok=True)
    sys_prompt = Prompt.TIS_NODE_PROMPT_BASE.format('.*' +
                                                    lab['name'].split('_')[0])
    controller_prompt = '{}|{}'.format(sys_prompt, Prompt.CONTROLLER_0)
    controller_node.telnet_conn.set_prompt(controller_prompt)

    if not con_ssh:
        LOG.info("Establish ssh connection with {}".format(controller0))
        controller_node.ssh_conn = install_helper.ssh_to_controller(
            controller_node.host_ip, initial_prompt=controller_prompt)
        controller_node.ssh_conn.deploy_ssh_key()
        con_ssh = controller_node.ssh_conn
        ControllerClient.set_active_controller(con_ssh)

    LOG.info("Restore system from backup....")
    system_backup_file = [
        file for file in tis_backup_files if "system.tgz" in file
    ].pop()
    images_backup_file = [
        file for file in tis_backup_files if "images.tgz" in file
    ].pop()

    LOG.tc_step("Restoring {}".format(controller0))

    LOG.info("System config restore from backup file {} ...".format(
        system_backup_file))

    if backup_src.lower() == 'usb':
        system_backup_path = "{}/{}".format(BackupRestore.USB_BACKUP_PATH,
                                            system_backup_file)
    else:
        system_backup_path = "{}{}".format(HostLinuxUser.get_home(),
                                           system_backup_file)

    compute_configured = install_helper.restore_controller_system_config(
        system_backup=system_backup_path, is_aio=is_aio_lab)[2]

    # return

    LOG.info('re-connect to the active controller using ssh')
    con_ssh.close()
    controller_node.ssh_conn = install_helper.ssh_to_controller(
        controller_node.host_ip, initial_prompt=controller_prompt)
    LOG.info("Source Keystone user admin environment ...")
    LOG.info("set prompt to:{}, telnet_conn:{}".format(
        controller_prompt, controller_node.telnet_conn))

    controller_node.telnet_conn.exec_cmd("cd; source /etc/platform/openrc")
    con_ssh = install_helper.ssh_to_controller(controller_node.host_ip)
    controller_node.ssh_conn = con_ssh
    ControllerClient.set_active_controller(con_ssh)

    make_sure_all_hosts_locked(con_ssh)

    if backup_src.lower() == 'local':
        images_backup_path = "{}{}".format(HostLinuxUser.get_home(),
                                           images_backup_file)
        common.scp_from_test_server_to_active_controller(
            "{}/{}".format(backup_src_path, images_backup_file),
            HostLinuxUser.get_home())
    else:
        images_backup_path = "{}/{}".format(BackupRestore.USB_BACKUP_PATH,
                                            images_backup_file)

    LOG.info(
        "Images restore from backup file {} ...".format(images_backup_file))

    new_prompt = r'{}.*~.*\$ |controller\-0.*~.*\$ '.format(
        lab['name'].split('_')[0])
    LOG.info('set prompt to:{}'.format(new_prompt))
    con_ssh.set_prompt(new_prompt)

    install_helper.restore_controller_system_images(
        images_backup=images_backup_path,
        tel_net_session=controller_node.telnet_conn)
    # this is a workaround for CGTS-8190
    install_helper.update_auth_url(con_ssh)

    LOG.tc_step(
        "Verifying  restoring controller-0 is complete and is in available state ..."
    )
    LOG.debug('Wait for system ready in 60 seconds')
    time.sleep(60)

    timeout = HostTimeout.REBOOT + 60
    availability = HostAvailState.AVAILABLE
    is_available = system_helper.wait_for_hosts_states(
        controller0,
        availability=HostAvailState.AVAILABLE,
        fail_ok=True,
        timeout=timeout)
    if not is_available:
        LOG.warn(
            'After {} seconds, the first node:{} does NOT reach {}'.format(
                timeout, controller0, availability))
        LOG.info('Check if drbd is still synchronizing data')
        con_ssh.exec_sudo_cmd('drbd-overview')
        is_degraded = system_helper.wait_for_hosts_states(
            controller0,
            availability=HostAvailState.DEGRADED,
            fail_ok=True,
            timeout=300)
        if is_degraded:
            LOG.warn('Node: {} is degraded: {}'.format(
                controller0, HostAvailState.DEGRADED))
            con_ssh.exec_sudo_cmd('drbd-overview')
        else:
            LOG.fatal('Node:{} is NOT in Available nor Degraded status')
            # the customer doc does have wording regarding this situation, continue
            # assert False, 'Node:{} is NOT in Available nor Degraded status'

    # delete the system backup files from sysadmin home
    LOG.tc_step("Copying backup files to /opt/backups ... ")
    if backup_src.lower() == 'local':
        con_ssh.exec_cmd("rm -f {} {}".format(system_backup_path,
                                              images_backup_path))

        cmd_rm_known_host = r'sed -i "s/^[^#]\(.*\)"/#\1/g /etc/ssh/ssh_known_hosts; \sync'
        con_ssh.exec_sudo_cmd(cmd_rm_known_host)

        # transfer all backup files to /opt/backups from test server
        with con_ssh.login_as_root():
            con_ssh.scp_on_dest(source_user=TestFileServer.get_user(),
                                source_ip=TestFileServer.get_server(),
                                source_pswd=TestFileServer.get_password(),
                                source_path=backup_src_path + "/*",
                                dest_path=StxPath.BACKUPS + '/',
                                timeout=1200)

    else:
        # copy all backupfiles from USB to /opt/backups
        cmd = " cp  {}/* {}".format(BackupRestore.USB_BACKUP_PATH,
                                    StxPath.BACKUPS)
        con_ssh.exec_sudo_cmd(cmd, expect_timeout=600)

    LOG.tc_step("Checking if backup files are copied to /opt/backups ... ")
    assert int(con_ssh.exec_cmd("ls {} | wc -l".format(StxPath.BACKUPS))[1]) >= 2, \
        "Missing backup files in {}".format(StxPath.BACKUPS)

    if is_aio_lab:
        LOG.tc_step("Restoring Cinder Volumes ...")
        restore_volumes()

        LOG.tc_step('Run restore-complete (CGTS-9756)')
        cmd = 'echo "{}" | sudo -S config_controller --restore-complete'.format(
            HostLinuxUser.get_password())
        controller_node.telnet_conn.login()
        controller_node.telnet_conn.exec_cmd(
            cmd, extra_expects=[' will reboot on completion'])

        LOG.info('- wait untill reboot completes, ')
        time.sleep(120)
        LOG.info('- confirm the active controller is actually back online')
        controller_node.telnet_conn.login()

        LOG.tc_step(
            "reconnecting to the active controller after restore-complete")
        con_ssh = install_helper.ssh_to_controller(controller_node.host_ip)

        if not compute_configured:
            LOG.tc_step(
                'Latest 18.07 EAR1 or Old-load on AIO/CPE lab: config its '
                'compute functionalities')
            # install_helper.run_cpe_compute_config_complete(controller_node, controller0)

            # LOG.info('closing current ssh connection')
            # con_ssh.close()

            LOG.tc_step('Run restore-complete (CGTS-9756)')
            controller_node.telnet_conn.login()

            cmd = 'echo "{}" | sudo -S config_controller --restore-complete'.\
                format(HostLinuxUser.get_password())
            controller_node.telnet_conn.exec_cmd(cmd,
                                                 extra_expects=' will reboot ')
            controller_node.telnet_conn.close()

            LOG.info(
                'Wait until "config_controller" reboot the active controller')
            time.sleep(180)

            controller_node.telnet_conn = install_helper.open_telnet_session(
                controller_node)
            controller_node.telnet_conn.login()
            time.sleep(120)

            con_ssh = install_helper.ssh_to_controller(controller_node.host_ip)
            controller_node.ssh_conn = con_ssh

            ControllerClient.set_active_controller(con_ssh)

            host_helper.wait_for_hosts_ready(controller0)

        LOG.tc_step('Install the standby controller: {}'.format(controller1))
        if not is_sx:
            install_non_active_node(controller1, lab)

    elif len(lab['controller_nodes']) >= 2:
        LOG.tc_step('Install the standby controller: {}'.format(controller1))
        install_non_active_node(controller1, lab)

        boot_interfaces = lab['boot_device_dict']

        hostnames = system_helper.get_hosts()
        storage_hosts = [host for host in hostnames if 'storage' in host]
        compute_hosts = [
            host for host in hostnames
            if 'storage' not in host and 'controller' not in host
        ]

        if len(storage_hosts) > 0:
            # con_ssh.exec_sudo_cmd('touch /etc/ceph/ceph.client.None.keyring')
            for storage_host in storage_hosts:
                LOG.tc_step("Restoring {}".format(storage_host))
                install_helper.open_vlm_console_thread(
                    storage_host,
                    boot_interface=boot_interfaces,
                    vlm_power_on=True)

                LOG.info(
                    "Verifying {} is Locked, Diabled and Online ...".format(
                        storage_host))
                system_helper.wait_for_hosts_states(
                    storage_host,
                    administrative=HostAdminState.LOCKED,
                    operational=HostOperState.DISABLED,
                    availability=HostAvailState.ONLINE)

                LOG.info("Unlocking {} ...".format(storage_host))
                rc, output = host_helper.unlock_host(storage_host,
                                                     available_only=True)
                assert rc == 0, "Host {} failed to unlock: rc = {}, msg: {}".format(
                    storage_host, rc, output)

            LOG.info("Veryifying the Ceph cluster is healthy ...")
            storage_helper.wait_for_ceph_health_ok(timeout=600)

            LOG.info("Importing images ...")
            image_backup_files = install_helper.get_backup_files(
                IMAGE_BACKUP_FILE_PATTERN, StxPath.BACKUPS, con_ssh)
            LOG.info("Image backup found: {}".format(image_backup_files))
            imported = install_helper.import_image_from_backup(
                image_backup_files)
            LOG.info("Images successfully imported: {}".format(imported))

        LOG.tc_step("Restoring Cinder Volumes ...")
        restore_volumes()

        LOG.tc_step('Run restore-complete (CGTS-9756), regular lab')
        controller_node.telnet_conn.login()
        cmd = 'echo "{}" | sudo -S config_controller --restore-complete'.format(
            HostLinuxUser.get_password())
        controller_node.telnet_conn.exec_cmd(
            cmd, extra_expects='controller-0 login:'******'rebuild ssh connection')
        con_ssh = install_helper.ssh_to_controller(controller_node.host_ip)
        controller_node.ssh_conn = con_ssh

        LOG.tc_step("Restoring Compute Nodes ...")
        if len(compute_hosts) > 0:
            for compute_host in compute_hosts:
                LOG.tc_step("Restoring {}".format(compute_host))
                install_helper.open_vlm_console_thread(
                    compute_host,
                    boot_interface=boot_interfaces,
                    vlm_power_on=True)

                LOG.info(
                    "Verifying {} is Locked, Diabled and Online ...".format(
                        compute_host))
                system_helper.wait_for_hosts_states(
                    compute_host,
                    administrative=HostAdminState.LOCKED,
                    operational=HostOperState.DISABLED,
                    availability=HostAvailState.ONLINE)
                LOG.info("Unlocking {} ...".format(compute_host))
                rc, output = host_helper.unlock_host(compute_host,
                                                     available_only=True)
                assert rc == 0, "Host {} failed to unlock: rc = {}, msg: {}".format(
                    compute_host, rc, output)

        LOG.info("All nodes {} are restored ...".format(hostnames))
    else:
        LOG.warn('Only 1 controller, but not AIO lab!!??')

    LOG.tc_step("Delete backup files from {} ....".format(StxPath.BACKUPS))
    con_ssh.exec_sudo_cmd("rm -rf {}/*".format(StxPath.BACKUPS))

    LOG.tc_step('Perform post-restore testing/checking')
    post_restore_test(con_ssh)

    LOG.tc_step("Waiting until all alarms are cleared ....")
    timeout = 300
    healthy, alarms = system_helper.wait_for_all_alarms_gone(timeout=timeout,
                                                             fail_ok=True)
    if not healthy:
        LOG.warn('Alarms exist: {}, after waiting {} seconds'.format(
            alarms, timeout))
        rc, message = con_ssh.exec_sudo_cmd('drbd-overview')

        if rc != 0 or (r'[===>' not in message
                       and r'] sync\'ed: ' not in message):
            LOG.warn('Failed to get drbd-overview information')

        LOG.info('Wait for the system to be ready in {} seconds'.format(
            HostTimeout.REBOOT))
        system_helper.wait_for_all_alarms_gone(timeout=HostTimeout.REBOOT,
                                               fail_ok=False)

    LOG.tc_step("Verifying system health after restore ...")
    rc, failed = system_helper.get_system_health_query(con_ssh=con_ssh)
    assert rc == 0, "System health not OK: {}".format(failed)

    collect_logs()
예제 #3
0
def perform_vm_operation(vm_type,
                         vm_id,
                         op='live_migration',
                         extra_specs='vtpm'):
    LOG.info('Perform action:{} to the VM, extra specs:{}'.format(
        op, extra_specs))

    op_table = {
        'live_migration':
        lambda x, y: vm_helper.live_migrate_vm(y),
        'cold_migration':
        lambda x, y: vm_helper.cold_migrate_vm(y),
        'stop_start':
        lambda x, y: (vm_helper.stop_vms(y), vm_helper.start_vms(y)),
        'suspend_resume':
        lambda x, y: (vm_helper.suspend_vm(y), vm_helper.resume_vm(y)),
        'pause_unpause':
        lambda x, y: (vm_helper.pause_vm(y), vm_helper.unpause_vm(y)),
        'reboot_host':
        lambda x, y: reboot_hosting_node(x, y, force_reboot=False),
        'soft_reboot':
        lambda x, y: vm_helper.reboot_vm(y, hard=False),
        'hard_reboot':
        lambda x, y: vm_helper.reboot_vm(y, hard=True),
        'lock_unlock':
        lambda x, y: lock_unlock_hosting_node(x, y, force_lock=False),
        'evacuate':
        lambda x, y: reboot_hosting_node(x, y, force_reboot=True),
    }

    if op in op_table:
        LOG.info('Perform action: {}'.format(op))
        op_table[op](vm_type, vm_id)

        return True

    elif op == 'resize_to_autorc':
        if vm_type == 'autorc':
            LOG.info(
                'resize from AUTO-RECOVERY to another AUTO-RECOVER flavor')

        to_flavor_id = get_flavor_id(vm_type, 'autorc2')

        LOG.info('TODO: {}, m_type={}, to_flavor_id={}'.format(
            to_flavor_id, vm_type, to_flavor_id))

        vm_helper.resize_vm(vm_id, to_flavor_id)

    elif op == 'resize_to_non_autorc':
        LOG.info('perform {} on type:{}, id:{}'.format(op, vm_type, vm_id))
        if vm_type == 'non_autorc2':
            LOG.warn(
                'resize from AUTO-RECOVERY to another AUTO-RECOVER flavor')

        to_flavor_id = get_flavor_id(vm_type, 'non_autorc2')
        vm_helper.resize_vm(vm_id, to_flavor_id)

    elif op == 'resize_to_non_vtpm':
        LOG.info('perform {} on type:{}, id:{}'.format(op, vm_type, vm_id))

        to_flavor_id = get_flavor_id(vm_type, 'non_vtpm')

        vm_helper.resize_vm(vm_id, to_flavor_id)

    else:
        LOG.fatal('Unsupported action: {}'.format(op))
        return False