def ssh_to_stx(lab=None, set_client=False): if not lab: lab = ProjVar.get_var('LAB') user = HostLinuxUser.get_user() password = HostLinuxUser.get_password() if ProjVar.get_var('IPV6_OAM'): lab = convert_to_ipv6(lab) LOG.info("SSH to IPv6 system {} via tuxlab2".format(lab['short_name'])) tuxlab2_ip = YOW_TUXLAB2['ip'] tux_user = TestFileServer.get_user() tuxlab_prompt = r'{}@{}\:(.*)\$ '.format(tux_user, YOW_TUXLAB2['name']) tuxlab2_ssh = SSHClient(host=tuxlab2_ip, user=tux_user, password=TestFileServer.get_password(), initial_prompt=tuxlab_prompt) tuxlab2_ssh.connect(retry_timeout=300, retry_interval=30, timeout=60) con_ssh = SSHFromSSH(ssh_client=tuxlab2_ssh, host=lab['floating ip'], user=user, password=password, initial_prompt=Prompt.CONTROLLER_PROMPT) else: con_ssh = SSHClient(lab['floating ip'], user=HostLinuxUser.get_user(), password=HostLinuxUser.get_password(), initial_prompt=Prompt.CONTROLLER_PROMPT) con_ssh.connect(retry=True, retry_timeout=30, use_current=False) if set_client: ControllerClient.set_active_controller(con_ssh) return con_ssh
def install_clone_setup(): lab = InstallVars.get_install_var('LAB') LOG.info("Lab info; {}".format(lab)) install_cloned_info = { 'usb_verified': False, 'build_server': None, 'hostnames': [k for k, v in lab.items() if isinstance(v, node.Node)], 'system_mode': 'duplex' if len(lab['controller_nodes']) == 2 else "simplex" } controller_node = lab['controller-0'] controller_conn = None extra_controller_prompt = Prompt.TIS_NODE_PROMPT_BASE.format( lab['name'].split('_')[0]) + '|' + Prompt.CONTROLLER_0 local_client = LocalHostClient(connect=True) if local_client.ping_server(controller_node.host_ip, fail_ok=True)[0] == 100: try: controller_conn = install_helper.ssh_to_controller( controller_node.host_ip, fail_ok=True, initial_prompt=extra_controller_prompt) except: LOG.info("SSH connection to {} not yet available yet ..".format( controller_node.name)) if controller_conn: LOG.info("Connection established with controller-0 ....") ControllerClient.set_active_controller(ssh_client=controller_conn) if verify_usb(controller_conn): install_cloned_info['usb_verified'] = True bld_server = get_build_server_info( InstallVars.get_install_var('BUILD_SERVER')) LOG.info("Connecting to Build Server {} ....".format(bld_server['name'])) bld_server_attr = dict() bld_server_attr['name'] = bld_server['name'] bld_server_attr['server_ip'] = bld_server['ip'] bld_server_attr['prompt'] = r'{}@{}\:(.*)\$ '.format( TestFileServer.get_user(), bld_server['name']) bld_server_conn = install_helper.establish_ssh_connection( bld_server_attr['name'], user=TestFileServer.get_user(), password=TestFileServer.get_password(), initial_prompt=bld_server_attr['prompt']) bld_server_conn.exec_cmd("bash") bld_server_conn.set_prompt(bld_server_attr['prompt']) bld_server_conn.deploy_ssh_key(install_helper.get_ssh_public_key()) bld_server_attr['ssh_conn'] = bld_server_conn bld_server_obj = Server(**bld_server_attr) install_cloned_info['build_server'] = bld_server_obj return install_cloned_info
def setup_tis_ssh(lab): con_ssh = ControllerClient.get_active_controller(fail_ok=True) if con_ssh is None: con_ssh = SSHClient(lab['floating ip'], HostLinuxUser.get_user(), HostLinuxUser.get_password(), CONTROLLER_PROMPT) con_ssh.connect(retry=True, retry_timeout=30) ControllerClient.set_active_controller(con_ssh) return con_ssh
def ssh_to_stx(lab=None, set_client=False): if not lab: lab = ProjVar.get_var('LAB') con_ssh = SSHClient(lab['floating ip'], user=HostLinuxUser.get_user(), password=HostLinuxUser.get_password(), initial_prompt=Prompt.CONTROLLER_PROMPT) con_ssh.connect(retry=True, retry_timeout=30, use_current=False) if set_client: ControllerClient.set_active_controller(con_ssh) return con_ssh
def setup_vbox_tis_ssh(lab): if 'external_ip' in lab.keys(): con_ssh = ControllerClient.get_active_controller(fail_ok=True) if con_ssh: con_ssh.disconnect() con_ssh = SSHClient(lab['external_ip'], HostLinuxUser.get_user(), HostLinuxUser.get_password(), CONTROLLER_PROMPT, port=lab['external_port']) con_ssh.connect(retry=True, retry_timeout=30) ControllerClient.set_active_controller(con_ssh) else: con_ssh = setup_tis_ssh(lab) return con_ssh
def run(self): """ Do not run this command. Start threads from start_thread functions Returns: """ LOG.info("Starting {}".format(self.name)) # run the function try: MThread.running_threads.append(self) LOG.info("Connecting to lab fip in new thread...") lab = ProjVar.get_var('lab') from keywords import common con_ssh = common.ssh_to_stx(set_client=True) if ProjVar.get_var('IS_DC'): LOG.info("Connecting to subclouds fip in new thread...") ControllerClient.set_active_controller(con_ssh, 'RegionOne') con_ssh_dict = ControllerClient.get_active_controllers_map() for name in con_ssh_dict: if name in lab: subcloud_fip = lab[name]['floating ip'] subcloud_ssh = SSHClient(subcloud_fip) try: subcloud_ssh.connect(use_current=False) ControllerClient.set_active_controller( subcloud_ssh, name=name) except: if name == ProjVar.get_var('PRIMARY_SUBCLOUD'): raise LOG.warning('Cannot connect to {}'.format(name)) LOG.info("Connecting to NatBox in new thread...") NATBoxClient.set_natbox_client() if ProjVar.get_var('REMOTE_CLI'): RemoteCLIClient.get_remote_cli_client() LOG.info("Execute function {}({}, {})".format( self.func.__name__, self.args, self.kwargs)) self._output = self.func(*self.args, **self.kwargs) LOG.info("{} returned: {}".format(self.func.__name__, self._output.__str__())) self._output_returned.set() except: err = traceback.format_exc() # LOG.error("Error found in thread call {}".format(err)) self._err = err raise finally: LOG.info("Terminating thread: {}".format(self.thread_id)) if ProjVar.get_var('IS_DC'): ssh_clients = ControllerClient.get_active_controllers( current_thread_only=True) for con_ssh in ssh_clients: con_ssh.close() else: ControllerClient.get_active_controller().close() natbox_ssh = NATBoxClient.get_natbox_client() if natbox_ssh: natbox_ssh.close() if ProjVar.get_var('REMOTE_CLI'): RemoteCLIClient.get_remote_cli_client().close() LOG.debug("{} has finished".format(self.name)) MThread.running_threads.remove(self)
def setup_module(): global ssh_client ssh_client = SSHClient('128.224.150.141') ControllerClient.set_active_controller(ssh_client) ssh_client.connect() LOG.info("setup done")
def test_system(): LOG.tc_func_start() cli.system('host-list') cli.system('host-show', 1) try: cli.system('host-list', auth_info=auth.Tenant.get('tenant1')) raise Exception("you should fail!") except CLIRejected: LOG.info("nova test passed without authentication") ProjVar.set_var(SOURCE_OPENRC=True) cli.system('host-list', auth_info=None) ProjVar.set_var(SOURCE_OPENRC=None) LOG.tc_func_end() def test_auth_tenant(): LOG.tc_func_start() cli.openstack('server list', auth_info=auth.Tenant.get('tenant1')) LOG.tc_func_end() if __name__ == '__main__': ssh_client = SSHClient('128.224.150.142') ControllerClient.set_active_controller(ssh_client) ssh_client.connect() test_system() test_auth_tenant() test_nova()
def test_upgrade_restore(restore_setup): """ This script is restore part of simplex upgrade which restore the backup file with n+1 load test_upgrade_simplex_system.py will create backup files in n-1 load. Args: restore_setup: This function checks backup avialbility and the parameters for backup Examples To execute py.test --lab=wcp_67 --backup-path=/sandbox/upgrade --backup-build-id='2018-03-16_11-04-06' --backup-builds-dir=TC_18.03_Host --skip_setup_feed tc_bnr/restore/test_upgrade_simplex_restore.py steps: 1. Set the feed in tuxlab 2. Boot from tuxlab 3.login and set password 4. Mova backup files to controllers 5. Execute upgrade_controller_simplex <backup file> 6. Restore Volumes 7. Restore images 8. Restore computes 9. Activate upgrade 10. Upgrade complete 11. Load delete teardown: Unreserve VLM """ # This restore setup called from test_restore to setup the restore enviorment and files. controller0 = 'controller-0' lab = restore_setup["lab"] tis_backup_files = restore_setup['tis_backup_files'] backup_src = RestoreVars.get_restore_var('backup_src'.upper()) backup_src_path = RestoreVars.get_restore_var('backup_src_path'.upper()) controller_node = lab[controller0] con_ssh = ControllerClient.get_active_controller(name=lab['short_name'], fail_ok=True) if not con_ssh: LOG.info("Establish ssh connection with {}".format(controller0)) controller_prompt = Prompt.TIS_NODE_PROMPT_BASE.format( lab['name'].split('_')[0]) + '|' + Prompt.CONTROLLER_0 controller_node.ssh_conn = install_helper.ssh_to_controller( controller_node.host_ip, initial_prompt=controller_prompt) controller_node.ssh_conn.deploy_ssh_key() con_ssh = controller_node.ssh_conn LOG.info("Restore system from backup....") system_backup_file = [ file for file in tis_backup_files if "system.tgz" in file ].pop() images_backup_file = [ file for file in tis_backup_files if "images.tgz" in file ].pop() LOG.tc_step("Restoring controller 0 ") LOG.info("System config restore from backup file {} ...".format( system_backup_file)) if backup_src.lower() == 'usb': system_backup_path = "{}/{}".format(BackupRestore.USB_BACKUP_PATH, system_backup_file) else: system_backup_path = "{}{}".format(HostLinuxUser.get_home(), system_backup_file) LOG.tc_step("Restoring the backup system files ") install_helper.upgrade_controller_simplex( system_backup=system_backup_path, tel_net_session=controller_node.telnet_conn, fail_ok=True) LOG.info('re-connect to the active controller using ssh') con_ssh.close() time.sleep(60) con_ssh = install_helper.ssh_to_controller(controller_node.host_ip, retry=True) controller_node.ssh_conn = con_ssh ControllerClient.set_active_controller(con_ssh) if backup_src.lower() == 'local': images_backup_path = "{}{}".format(HostLinuxUser.get_home(), images_backup_file) common.scp_from_test_server_to_active_controller( "{}/{}".format(backup_src_path, images_backup_file), HostLinuxUser.get_home()) else: images_backup_path = "{}/{}".format(BackupRestore.USB_BACKUP_PATH, images_backup_file) LOG.tc_step( "Images restore from backup file {} ...".format(images_backup_file)) new_prompt = r'{}.*~.*\$ |controller\-0.*~.*\$ '.format( lab['name'].split('_')[0]) LOG.info('set prompt to:{}'.format(new_prompt)) con_ssh.set_prompt(new_prompt) install_helper.restore_controller_system_images( images_backup=images_backup_path, tel_net_session=controller_node.telnet_conn, fail_ok=True) LOG.debug('Wait for system ready in 60 seconds') time.sleep(60) LOG.tc_step("Copying backup files to /opt/backups ... ") if backup_src.lower() == 'local': con_ssh.exec_cmd("rm -f {} {}".format(system_backup_path, images_backup_path)) cmd_rm_known_host = r'sed -i "s/^[^#]\(.*\)"/#\1/g /etc/ssh/ssh_known_hosts; \sync' con_ssh.exec_sudo_cmd(cmd_rm_known_host) # transfer all backup files to /opt/backups from test server with con_ssh.login_as_root(): con_ssh.scp_on_dest(source_user=TestFileServer.get_user(), source_ip=TestFileServer.get_server(), source_pswd=TestFileServer.get_password(), source_path=backup_src_path + "/*", dest_path=StxPath.BACKUPS + '/', timeout=1200) else: # copy all backupfiles from USB to /opt/backups cmd = " cp {}/* {}".format(BackupRestore.USB_BACKUP_PATH, StxPath.BACKUPS) con_ssh.exec_sudo_cmd(cmd, expect_timeout=600) LOG.tc_step("Checking if backup files are copied to /opt/backups ... ") assert int(con_ssh.exec_cmd("ls {} | wc -l".format(StxPath.BACKUPS))[1]) >= 2, \ "Missing backup files in {}".format(StxPath.BACKUPS) LOG.tc_step("Restoring Cinder Volumes ...") restore_volumes() LOG.tc_step("Delete backup files from {} ....".format(StxPath.BACKUPS)) con_ssh.exec_sudo_cmd("rm -rf {}/*".format(StxPath.BACKUPS)) LOG.tc_step("Restoring compute ") install_helper.restore_compute(tel_net_session=controller_node.telnet_conn) # Activate the upgrade LOG.tc_step("Activating upgrade....") upgrade_helper.activate_upgrade() # LOG.info("Upgrade activate complete.....") # Complete upgrade LOG.tc_step("Completing upgrade") upgrade_helper.complete_upgrade() LOG.info("Upgrade is complete......") LOG.info("Lab: {} upgraded successfully".format(lab['name'])) # Delete the previous load LOG.tc_step("Deleting imported load... ") upgrade_helper.delete_imported_load()
def get_current_strategy_details(orchestration, conn_ssh=None): """ Gets orchestration strategy details when successfully applied. Args: orchestration: conn_ssh: Returns: dict of strategy values """ if orchestration is None: raise ValueError( "The orchestration type (choices are 'patch' or 'upgrade') must be specified" ) if orchestration is not "patch" and orchestration is not "upgrade": raise ValueError( "Invalid orchestration type (choices are 'patch' or 'upgrade') specified" ) cmd = '' if orchestration is "patch": cmd += "patch-strategy show --details" else: cmd += "upgrade-strategy show --details" try: rc, output = cli.sw_manager(cmd, ssh_client=conn_ssh, fail_ok=True) except: time.sleep(20) if not conn_ssh.is_connected(): conn_ssh.connect(retry=True) ControllerClient.set_active_controller(ssh_client=conn_ssh) rc, output = cli.sw_manager(cmd, ssh_client=conn_ssh, fail_ok=True) rtn = {} if rc == 0 and output is not None and ('strategy-uuid' in [ tr.strip() for tr in output.split(':') ]): lines = output.splitlines() build_phase_index = [ i for i, word in enumerate(lines) if "build-phase" in word ] apply_phase_index = [ i for i, word in enumerate(lines) if "apply-phase" in word ] strategy_lines = [] build_phase_lines = [] apply_phase_lines = [] if len(build_phase_index) > 0: strategy_lines.extend(lines[1:build_phase_index[0]]) if len(apply_phase_index) > 0: build_phase_lines.extend( lines[build_phase_index[0]:apply_phase_index[0]]) apply_phase_lines.extend(lines[apply_phase_index[0]:]) else: build_phase_lines.extend(lines[build_phase_index[0]:]) else: strategy_lines.extend(lines[1:]) strategy_values = {} build_phase_values = {} apply_phase_values = {} if len(strategy_lines) > 0: for line in strategy_lines: pairs = line.split(':', 1) strategy_values[pairs[0].strip()] = pairs[1].strip() rtn['strategy'] = strategy_values if len(build_phase_lines) > 0: for line in build_phase_lines: pairs = line.split(':') if pairs[0].strip() == "stages": break build_phase_values[pairs[0].strip()] = pairs[1].strip() rtn['build'] = build_phase_values if len(apply_phase_lines) > 0: for line in apply_phase_lines: pairs = line.split(':', 1) if pairs[0].strip() == "stages": break apply_phase_values[pairs[0].strip()] = pairs[1].strip() rtn['apply'] = apply_phase_values return rtn
def setup_tis_ssh(): global con_ssh con_ssh = SSHClient(Labs.PV0['floating ip'], HostLinuxUser.get_user(), HostLinuxUser.get_password(), CONTROLLER_PROMPT) con_ssh.connect() ControllerClient.set_active_controller(con_ssh)
def test_restore(restore_setup): controller1 = 'controller-1' controller0 = 'controller-0' lab = restore_setup["lab"] is_aio_lab = lab.get('system_type', 'Standard') == 'CPE' is_sx = is_aio_lab and (len(lab['controller_nodes']) < 2) tis_backup_files = restore_setup['tis_backup_files'] backup_src = RestoreVars.get_restore_var('backup_src'.upper()) backup_src_path = RestoreVars.get_restore_var('backup_src_path'.upper()) controller_node = lab[controller0] con_ssh = ControllerClient.get_active_controller(name=lab['short_name'], fail_ok=True) sys_prompt = Prompt.TIS_NODE_PROMPT_BASE.format('.*' + lab['name'].split('_')[0]) controller_prompt = '{}|{}'.format(sys_prompt, Prompt.CONTROLLER_0) controller_node.telnet_conn.set_prompt(controller_prompt) if not con_ssh: LOG.info("Establish ssh connection with {}".format(controller0)) controller_node.ssh_conn = install_helper.ssh_to_controller( controller_node.host_ip, initial_prompt=controller_prompt) controller_node.ssh_conn.deploy_ssh_key() con_ssh = controller_node.ssh_conn ControllerClient.set_active_controller(con_ssh) LOG.info("Restore system from backup....") system_backup_file = [ file for file in tis_backup_files if "system.tgz" in file ].pop() images_backup_file = [ file for file in tis_backup_files if "images.tgz" in file ].pop() LOG.tc_step("Restoring {}".format(controller0)) LOG.info("System config restore from backup file {} ...".format( system_backup_file)) if backup_src.lower() == 'usb': system_backup_path = "{}/{}".format(BackupRestore.USB_BACKUP_PATH, system_backup_file) else: system_backup_path = "{}{}".format(HostLinuxUser.get_home(), system_backup_file) compute_configured = install_helper.restore_controller_system_config( system_backup=system_backup_path, is_aio=is_aio_lab)[2] # return LOG.info('re-connect to the active controller using ssh') con_ssh.close() controller_node.ssh_conn = install_helper.ssh_to_controller( controller_node.host_ip, initial_prompt=controller_prompt) LOG.info("Source Keystone user admin environment ...") LOG.info("set prompt to:{}, telnet_conn:{}".format( controller_prompt, controller_node.telnet_conn)) controller_node.telnet_conn.exec_cmd("cd; source /etc/platform/openrc") con_ssh = install_helper.ssh_to_controller(controller_node.host_ip) controller_node.ssh_conn = con_ssh ControllerClient.set_active_controller(con_ssh) make_sure_all_hosts_locked(con_ssh) if backup_src.lower() == 'local': images_backup_path = "{}{}".format(HostLinuxUser.get_home(), images_backup_file) common.scp_from_test_server_to_active_controller( "{}/{}".format(backup_src_path, images_backup_file), HostLinuxUser.get_home()) else: images_backup_path = "{}/{}".format(BackupRestore.USB_BACKUP_PATH, images_backup_file) LOG.info( "Images restore from backup file {} ...".format(images_backup_file)) new_prompt = r'{}.*~.*\$ |controller\-0.*~.*\$ '.format( lab['name'].split('_')[0]) LOG.info('set prompt to:{}'.format(new_prompt)) con_ssh.set_prompt(new_prompt) install_helper.restore_controller_system_images( images_backup=images_backup_path, tel_net_session=controller_node.telnet_conn) # this is a workaround for CGTS-8190 install_helper.update_auth_url(con_ssh) LOG.tc_step( "Verifying restoring controller-0 is complete and is in available state ..." ) LOG.debug('Wait for system ready in 60 seconds') time.sleep(60) timeout = HostTimeout.REBOOT + 60 availability = HostAvailState.AVAILABLE is_available = system_helper.wait_for_hosts_states( controller0, availability=HostAvailState.AVAILABLE, fail_ok=True, timeout=timeout) if not is_available: LOG.warn( 'After {} seconds, the first node:{} does NOT reach {}'.format( timeout, controller0, availability)) LOG.info('Check if drbd is still synchronizing data') con_ssh.exec_sudo_cmd('drbd-overview') is_degraded = system_helper.wait_for_hosts_states( controller0, availability=HostAvailState.DEGRADED, fail_ok=True, timeout=300) if is_degraded: LOG.warn('Node: {} is degraded: {}'.format( controller0, HostAvailState.DEGRADED)) con_ssh.exec_sudo_cmd('drbd-overview') else: LOG.fatal('Node:{} is NOT in Available nor Degraded status') # the customer doc does have wording regarding this situation, continue # assert False, 'Node:{} is NOT in Available nor Degraded status' # delete the system backup files from sysadmin home LOG.tc_step("Copying backup files to /opt/backups ... ") if backup_src.lower() == 'local': con_ssh.exec_cmd("rm -f {} {}".format(system_backup_path, images_backup_path)) cmd_rm_known_host = r'sed -i "s/^[^#]\(.*\)"/#\1/g /etc/ssh/ssh_known_hosts; \sync' con_ssh.exec_sudo_cmd(cmd_rm_known_host) # transfer all backup files to /opt/backups from test server with con_ssh.login_as_root(): con_ssh.scp_on_dest(source_user=TestFileServer.get_user(), source_ip=TestFileServer.get_server(), source_pswd=TestFileServer.get_password(), source_path=backup_src_path + "/*", dest_path=StxPath.BACKUPS + '/', timeout=1200) else: # copy all backupfiles from USB to /opt/backups cmd = " cp {}/* {}".format(BackupRestore.USB_BACKUP_PATH, StxPath.BACKUPS) con_ssh.exec_sudo_cmd(cmd, expect_timeout=600) LOG.tc_step("Checking if backup files are copied to /opt/backups ... ") assert int(con_ssh.exec_cmd("ls {} | wc -l".format(StxPath.BACKUPS))[1]) >= 2, \ "Missing backup files in {}".format(StxPath.BACKUPS) if is_aio_lab: LOG.tc_step("Restoring Cinder Volumes ...") restore_volumes() LOG.tc_step('Run restore-complete (CGTS-9756)') cmd = 'echo "{}" | sudo -S config_controller --restore-complete'.format( HostLinuxUser.get_password()) controller_node.telnet_conn.login() controller_node.telnet_conn.exec_cmd( cmd, extra_expects=[' will reboot on completion']) LOG.info('- wait untill reboot completes, ') time.sleep(120) LOG.info('- confirm the active controller is actually back online') controller_node.telnet_conn.login() LOG.tc_step( "reconnecting to the active controller after restore-complete") con_ssh = install_helper.ssh_to_controller(controller_node.host_ip) if not compute_configured: LOG.tc_step( 'Latest 18.07 EAR1 or Old-load on AIO/CPE lab: config its ' 'compute functionalities') # install_helper.run_cpe_compute_config_complete(controller_node, controller0) # LOG.info('closing current ssh connection') # con_ssh.close() LOG.tc_step('Run restore-complete (CGTS-9756)') controller_node.telnet_conn.login() cmd = 'echo "{}" | sudo -S config_controller --restore-complete'.\ format(HostLinuxUser.get_password()) controller_node.telnet_conn.exec_cmd(cmd, extra_expects=' will reboot ') controller_node.telnet_conn.close() LOG.info( 'Wait until "config_controller" reboot the active controller') time.sleep(180) controller_node.telnet_conn = install_helper.open_telnet_session( controller_node) controller_node.telnet_conn.login() time.sleep(120) con_ssh = install_helper.ssh_to_controller(controller_node.host_ip) controller_node.ssh_conn = con_ssh ControllerClient.set_active_controller(con_ssh) host_helper.wait_for_hosts_ready(controller0) LOG.tc_step('Install the standby controller: {}'.format(controller1)) if not is_sx: install_non_active_node(controller1, lab) elif len(lab['controller_nodes']) >= 2: LOG.tc_step('Install the standby controller: {}'.format(controller1)) install_non_active_node(controller1, lab) boot_interfaces = lab['boot_device_dict'] hostnames = system_helper.get_hosts() storage_hosts = [host for host in hostnames if 'storage' in host] compute_hosts = [ host for host in hostnames if 'storage' not in host and 'controller' not in host ] if len(storage_hosts) > 0: # con_ssh.exec_sudo_cmd('touch /etc/ceph/ceph.client.None.keyring') for storage_host in storage_hosts: LOG.tc_step("Restoring {}".format(storage_host)) install_helper.open_vlm_console_thread( storage_host, boot_interface=boot_interfaces, vlm_power_on=True) LOG.info( "Verifying {} is Locked, Diabled and Online ...".format( storage_host)) system_helper.wait_for_hosts_states( storage_host, administrative=HostAdminState.LOCKED, operational=HostOperState.DISABLED, availability=HostAvailState.ONLINE) LOG.info("Unlocking {} ...".format(storage_host)) rc, output = host_helper.unlock_host(storage_host, available_only=True) assert rc == 0, "Host {} failed to unlock: rc = {}, msg: {}".format( storage_host, rc, output) LOG.info("Veryifying the Ceph cluster is healthy ...") storage_helper.wait_for_ceph_health_ok(timeout=600) LOG.info("Importing images ...") image_backup_files = install_helper.get_backup_files( IMAGE_BACKUP_FILE_PATTERN, StxPath.BACKUPS, con_ssh) LOG.info("Image backup found: {}".format(image_backup_files)) imported = install_helper.import_image_from_backup( image_backup_files) LOG.info("Images successfully imported: {}".format(imported)) LOG.tc_step("Restoring Cinder Volumes ...") restore_volumes() LOG.tc_step('Run restore-complete (CGTS-9756), regular lab') controller_node.telnet_conn.login() cmd = 'echo "{}" | sudo -S config_controller --restore-complete'.format( HostLinuxUser.get_password()) controller_node.telnet_conn.exec_cmd( cmd, extra_expects='controller-0 login:'******'rebuild ssh connection') con_ssh = install_helper.ssh_to_controller(controller_node.host_ip) controller_node.ssh_conn = con_ssh LOG.tc_step("Restoring Compute Nodes ...") if len(compute_hosts) > 0: for compute_host in compute_hosts: LOG.tc_step("Restoring {}".format(compute_host)) install_helper.open_vlm_console_thread( compute_host, boot_interface=boot_interfaces, vlm_power_on=True) LOG.info( "Verifying {} is Locked, Diabled and Online ...".format( compute_host)) system_helper.wait_for_hosts_states( compute_host, administrative=HostAdminState.LOCKED, operational=HostOperState.DISABLED, availability=HostAvailState.ONLINE) LOG.info("Unlocking {} ...".format(compute_host)) rc, output = host_helper.unlock_host(compute_host, available_only=True) assert rc == 0, "Host {} failed to unlock: rc = {}, msg: {}".format( compute_host, rc, output) LOG.info("All nodes {} are restored ...".format(hostnames)) else: LOG.warn('Only 1 controller, but not AIO lab!!??') LOG.tc_step("Delete backup files from {} ....".format(StxPath.BACKUPS)) con_ssh.exec_sudo_cmd("rm -rf {}/*".format(StxPath.BACKUPS)) LOG.tc_step('Perform post-restore testing/checking') post_restore_test(con_ssh) LOG.tc_step("Waiting until all alarms are cleared ....") timeout = 300 healthy, alarms = system_helper.wait_for_all_alarms_gone(timeout=timeout, fail_ok=True) if not healthy: LOG.warn('Alarms exist: {}, after waiting {} seconds'.format( alarms, timeout)) rc, message = con_ssh.exec_sudo_cmd('drbd-overview') if rc != 0 or (r'[===>' not in message and r'] sync\'ed: ' not in message): LOG.warn('Failed to get drbd-overview information') LOG.info('Wait for the system to be ready in {} seconds'.format( HostTimeout.REBOOT)) system_helper.wait_for_all_alarms_gone(timeout=HostTimeout.REBOOT, fail_ok=False) LOG.tc_step("Verifying system health after restore ...") rc, failed = system_helper.get_system_health_query(con_ssh=con_ssh) assert rc == 0, "System health not OK: {}".format(failed) collect_logs()
def pre_restore_checkup(): """ Fixture to check the system states before doing system restore, including: - collect logs - check if backup files exist on the backup media - check if the build-ids match with each other - wipe disks Args: Return: backup files: - the backup files to restore with """ lab = InstallVars.get_install_var('LAB') LOG.info("Lab info; {}".format(lab)) backup_build_id = RestoreVars.get_restore_var("BACKUP_BUILD_ID") controller_node = lab['controller-0'] backup_src = RestoreVars.get_restore_var('backup_src'.upper()) backup_src_path = RestoreVars.get_restore_var('backup_src_path'.upper()) tis_backup_files = [] extra_controller_prompt = Prompt.TIS_NODE_PROMPT_BASE.format(lab['name']. split('_')[0]) + '|' + \ Prompt.CONTROLLER_0 controller_conn = install_helper.ssh_to_controller( controller_node.host_ip, initial_prompt=extra_controller_prompt, fail_ok=True) LOG.info('Collect logs before restore') if controller_conn: collect_logs(controller_conn) ControllerClient.set_active_controller(controller_conn) else: LOG.info('Cannot collect logs because no ssh connection to the lab') if not controller_conn: LOG.warn( 'failed to collect logs because no ssh connection established to ' 'controller-0 of lab:{}'.format(controller_node.host_ip)) else: pass LOG.info('backup_src={}, backup_src_path={}'.format( backup_src, backup_src_path)) if backup_src.lower() == 'usb': if controller_conn: LOG.info("Connection established with controller-0 ....") ControllerClient.set_active_controller(ssh_client=controller_conn) LOG.info( "Checking if a USB flash drive with backup files is plugged in... " ) usb_device_name = install_helper.get_usb_device_name( con_ssh=controller_conn) assert usb_device_name, "No USB found " LOG.info("USB flash drive found, checking for backup files ... ") usb_part_info = install_helper.get_usb_device_partition_info( usb_device=usb_device_name, con_ssh=controller_conn) assert usb_part_info and len( usb_part_info) > 0, "No USB or partition found" usb_part_name = "{}2".format(usb_device_name) assert usb_part_name in usb_part_info.keys( ), "No {} partition exist in USB" result, mount_point = install_helper.is_usb_mounted( usb_device=usb_part_name, con_ssh=controller_conn) if not result: assert install_helper.mount_usb(usb_device=usb_part_name, con_ssh=controller_conn), \ "Unable to mount USB partition {}".format(usb_part_name) tis_backup_files = install_helper.get_titanium_backup_filenames_usb( usb_device=usb_part_name, con_ssh=controller_conn) assert len( tis_backup_files) >= 2, "Missing backup files: {}".format( tis_backup_files) # extract build id from the file name file_parts = tis_backup_files[0].split('_') file_backup_build_id = '_'.join([file_parts[3], file_parts[4]]) assert re.match(TIS_BLD_DIR_REGEX, file_backup_build_id), " Invalid build id format {} extracted from " \ "backup_file {}".format( file_backup_build_id, tis_backup_files[0]) if backup_build_id is not None: if backup_build_id != file_backup_build_id: LOG.info( " The build id extracted from backup file is different than " "specified; Using the extracted build id {} ....". format(file_backup_build_id)) backup_build_id = file_backup_build_id else: backup_build_id = file_backup_build_id RestoreVars.set_restore_var(backup_build_id=backup_build_id) else: LOG.info(" SSH connection not available yet with controller-0; " "USB will be checked after controller boot ....") else: test_server_attr = dict() test_server_attr['name'] = TestFileServer.get_hostname().split('.')[0] test_server_attr['server_ip'] = TestFileServer.get_server() test_server_attr['prompt'] = r'\[{}@{} {}\]\$ ' \ .format(TestFileServer.get_user(), test_server_attr['name'], TestFileServer.get_user()) test_server_conn = install_helper.establish_ssh_connection( test_server_attr['name'], user=TestFileServer.get_user(), password=TestFileServer.get_password(), initial_prompt=test_server_attr['prompt']) test_server_conn.set_prompt(test_server_attr['prompt']) test_server_conn.deploy_ssh_key(install_helper.get_ssh_public_key()) test_server_attr['ssh_conn'] = test_server_conn test_server_obj = Server(**test_server_attr) RestoreVars.set_restore_var(backup_src_server=test_server_obj) # test if backup path for the lab exist in Test server if os.path.basename(backup_src_path) != lab['short_name']: backup_src_path += '/{}'.format(lab['short_name']) RestoreVars.set_restore_var(backup_src_path=backup_src_path) assert not test_server_conn.exec_cmd("test -e {}".format(backup_src_path))[0], \ "Missing backup files from source {}: {}".format(test_server_attr['name'], backup_src_path) tis_backup_files = install_helper.get_backup_files( TITANIUM_BACKUP_FILE_PATTERN, backup_src_path, test_server_conn) assert len(tis_backup_files) >= 2, "Missing backup files: {}".format( tis_backup_files) # extract build id from the file name file_parts = tis_backup_files[0].split('_') file_backup_build_id = '_'.join([file_parts[3], file_parts[4]]) assert re.match(TIS_BLD_DIR_REGEX, file_backup_build_id), "Invalid build id format {} extracted from " \ "backup_file {}".format( file_backup_build_id, tis_backup_files[0]) if backup_build_id is not None: if backup_build_id != file_backup_build_id: LOG.info( " The build id extracted from backup file is different than specified; " "Using the extracted build id {} ....".format( file_backup_build_id)) backup_build_id = file_backup_build_id else: backup_build_id = file_backup_build_id RestoreVars.set_restore_var(backup_build_id=backup_build_id) if controller_conn: # Wipe disks in order to make controller-0 NOT boot from hard-disks # hosts = [k for k , v in lab.items() if isinstance(v, node.Node)] # install_helper.wipe_disk_hosts(hosts) if not RestoreVars.get_restore_var('skip_reinstall'): LOG.info('Try to do wipedisk_via_helper on controller-0') install_helper.wipedisk_via_helper(controller_conn) assert backup_build_id, "The Build id of the system backup must be provided." return tis_backup_files
def restore_setup(pre_restore_checkup): """ Fixture to do preparation before system restore. Args: pre_restore_checkup: - actions done prior to this Returen: a dictionary - containing infromation about target system, output directory, build server and backup files. """ LOG.debug('Restore with settings:\n{}'.format( RestoreVars.get_restore_vars())) lab = InstallVars.get_install_var('LAB') LOG.info("Lab info; {}".format(lab)) hostnames = [k for k, v in lab.items() if isinstance(v, node.Node)] LOG.info("Lab hosts; {}".format(hostnames)) backup_build_id = RestoreVars.get_restore_var("BACKUP_BUILD_ID") output_dir = ProjVar.get_var('LOG_DIR') controller_node = lab['controller-0'] controller_prompt = '' extra_controller_prompt = Prompt.TIS_NODE_PROMPT_BASE.format(lab['name'].split('_')[0]) + '|' + \ Prompt.CONTROLLER_0 if RestoreVars.get_restore_var('skip_reinstall'): LOG.info('Skip reinstall as instructed') LOG.info('Connect to controller-0 now') controller_node.ssh_conn = install_helper.ssh_to_controller( controller_node.host_ip, initial_prompt=extra_controller_prompt, fail_ok=True) bld_server_obj = None else: # bld_server = get_build_server_info(InstallVars.get_install_var('BUILD_SERVER')) bld_server = get_build_server_info( RestoreVars.get_restore_var('BUILD_SERVER')) LOG.info("Connecting to Build Server {} ....".format( bld_server['name'])) bld_server_attr = dict() bld_server_attr['name'] = bld_server['name'] bld_server_attr['server_ip'] = bld_server['ip'] bld_server_attr['prompt'] = r'{}@{}\:(.*)\$ '.format( TestFileServer.get_user(), bld_server['name']) bld_server_conn = install_helper.establish_ssh_connection( bld_server_attr['name'], user=TestFileServer.get_user(), password=TestFileServer.get_password(), initial_prompt=bld_server_attr['prompt']) bld_server_conn.exec_cmd("bash") bld_server_conn.set_prompt(bld_server_attr['prompt']) bld_server_conn.deploy_ssh_key(install_helper.get_ssh_public_key()) bld_server_attr['ssh_conn'] = bld_server_conn bld_server_obj = Server(**bld_server_attr) # If controller is accessible, check if USB with backup files is avaialble load_path = os.path.join( BuildServerPath.DEFAULT_WORK_SPACE, RestoreVars.get_restore_var("BACKUP_BUILDS_DIR"), backup_build_id) InstallVars.set_install_var(tis_build_dir=load_path) # set up feed for controller LOG.fixture_step( "Setting install feed in tuxlab for controller-0 ... ") if 'vbox' not in lab['name'] and not RestoreVars.get_restore_var( 'skip_setup_feed'): assert install_helper.set_network_boot_feed(bld_server_conn, load_path), \ "Fail to set up feed for controller" if not RestoreVars.get_restore_var('skip_reinstall'): # power off hosts LOG.fixture_step("Powring off system hosts ... ") install_helper.power_off_host(hostnames) LOG.fixture_step("Booting controller-0 ... ") is_cpe = (lab.get('system_type', 'Standard') == 'CPE') low_latency = RestoreVars.get_restore_var('low_latency') os.environ['XTERM'] = 'xterm' install_helper.boot_controller(small_footprint=is_cpe, system_restore=True, low_latency=low_latency) # establish ssh connection with controller LOG.fixture_step( "Establishing ssh connection with controller-0 after install..." ) node_name_in_ini = r'{}.*\~\$ '.format( install_helper.get_lab_info(controller_node.barcode)['name']) controller_prompt = re.sub(r'([^\d])0*(\d+)', r'\1\2', node_name_in_ini) controller_prompt = controller_prompt + '|' + Prompt.TIS_NODE_PROMPT_BASE.format( lab['name'].split('_')[0]) + '|' + Prompt.CONTROLLER_0 LOG.info('initial_prompt=' + controller_prompt) controller_node.ssh_conn = install_helper.ssh_to_controller( controller_node.host_ip, initial_prompt=controller_prompt) LOG.info('Deploy ssh key') controller_node.ssh_conn.deploy_ssh_key() ControllerClient.set_active_controller(ssh_client=controller_node.ssh_conn) con_ssh = controller_node.ssh_conn tis_backup_files = pre_restore_checkup backup_src = RestoreVars.get_restore_var('backup_src'.upper()) backup_src_path = RestoreVars.get_restore_var('backup_src_path'.upper()) if backup_src.lower() == 'local': LOG.fixture_step( "Transferring system backup file to controller-0 {} ... ".format( HostLinuxUser.get_home())) system_backup_file = [ file for file in tis_backup_files if "system.tgz" in file ].pop() common.scp_from_test_server_to_active_controller( "{}/{}".format(backup_src_path, system_backup_file), HostLinuxUser.get_home()) assert con_ssh.exec_cmd("ls {}{}".format(HostLinuxUser.get_home(), system_backup_file))[0] == 0, \ "Missing backup file {} in dir {}".format(system_backup_file, HostLinuxUser.get_home()) elif backup_src.lower() == 'usb': tis_backup_files = pre_restore_checkup usb_device_name = install_helper.get_usb_device_name(con_ssh=con_ssh) usb_part_name = "{}2".format(usb_device_name) assert usb_device_name, "No USB found " LOG.fixture_step( "USB flash drive found, checking for backup files ... ") if len(tis_backup_files) == 0: LOG.fixture_step("Checking for backup files in USB ... ") usb_part_info = install_helper.get_usb_device_partition_info( usb_device=usb_device_name, con_ssh=con_ssh) assert usb_part_info and len( usb_part_info) > 0, "No USB or partition found" assert usb_part_name in usb_part_info.keys( ), "No {} partition exist in USB" result, mount_point = install_helper.is_usb_mounted( usb_device=usb_part_name) if not result: assert install_helper.mount_usb(usb_device=usb_part_name, con_ssh=con_ssh), \ "Unable to mount USB partition {}".format(usb_part_name) tis_backup_files = install_helper.get_titanium_backup_filenames_usb( usb_device=usb_part_name) assert len( tis_backup_files) >= 2, "Missing backup files: {}".format( tis_backup_files) else: result, mount_point = install_helper.is_usb_mounted( usb_device=usb_part_name) if not result: assert install_helper.mount_usb(usb_device=usb_part_name, con_ssh=con_ssh), \ "Unable to mount USB partition {}".format(usb_part_name) _restore_setup = { 'lab': lab, 'output_dir': output_dir, 'build_server': bld_server_obj, 'tis_backup_files': tis_backup_files } return _restore_setup