def install_non_active_node(node_name, lab): """ Install the non-active controller node, usually it is controller-1, the second controller on a non-AIO SX system. Args: node_name: - the name of the host/node, usually 'controller-1' lab: - lab to test """ boot_interfaces = lab['boot_device_dict'] LOG.tc_step("Restoring {}".format(node_name)) install_helper.open_vlm_console_thread(node_name, boot_interface=boot_interfaces, vlm_power_on=True) LOG.info( "Verifying {} is Locked, Disabled and Online ...".format(node_name)) system_helper.wait_for_hosts_states(node_name, administrative=HostAdminState.LOCKED, operational=HostOperState.DISABLED, availability=HostAvailState.ONLINE) LOG.info("Unlocking {} ...".format(node_name)) rc, output = host_helper.unlock_host(node_name, available_only=False) assert rc == 0 or rc == 4, "Host {} failed to unlock: rc = {}, msg: {}".format( node_name, rc, output) if rc == 4: LOG.warn('{} now is in degraded status'.format(node_name)) LOG.info('{} is installed'.format(node_name))
def check_url_access(url, headers=None, verify=True, fail_ok=False): """ Check the access to a given url Args: url(str): url to check headers(None|dict): request headers of the http request verify(bool|str): True: secure request False: equivalent to --insecure in curl cmd str: applies to https system. CA-Certificate path. e.g., verify=/path/to/cert fail_ok(bool): Returns(tuple): (status_code, response) - (1, <std_err>): An exception has occurred - (status_code, response): status code and response from requests call """ LOG.info('curl -i {}...'.format(url)) try: req = requests.get(url=url, headers=headers, verify=verify) except requests.exceptions.RequestException as e: if fail_ok: message = 'Exception trying to access {}: {}'.format(url, e) LOG.warn(message) return 1, message raise e LOG.info('Status: [{}]'.format(req.status_code)) LOG.debug( '\n{} {}\nHeaders: {}\nResponse code: {}\nResponse body: {}'.format( req.request.method, req.request.url, req.request.headers, req.status_code, req.text)) if not fail_ok: req.raise_for_status() return req.status_code, req.text
def check_changes(expected_changes, actual_changes, expected=True): expected_changes = sorted(expected_changes) actual_changes = sorted(actual_changes.splitlines()) if len(actual_changes) <= len(expected_changes): error_message = 'Fail, actual changes do not match expected.\n\tactual:\n{}\nexpected:\n{}'.format( actual_changes, expected_changes) LOG.info(error_message) return error_message prefix = '<' if expected else '>' error_message = '' i, j = 0, 0 while i < len(actual_changes) and j < len(expected_changes): line = actual_changes[i] if line.strip() and line.startswith(prefix): line = line[2:].strip() rule = expected_changes[j] if re.match(rule, line): i += 1 j += 1 else: error_message += 'Mismatch: expecting:{}, actual:{}\n'.format( rule, line) LOG.warn(error_message) i += 1 else: i += 1 return error_message
def get_max_allowed_mtus(host='controller-0', network_type='oam', if_name='', if_info=None): if not if_info: if_info = get_if_info(host=host) if_names = [name for name in if_info if network_type in name] if not if_names: assert 0, 'Cannot find {} interface on host {}. Interface info: {}'.format(network_type, host, if_info) if not if_name: if len(if_names) > 1: LOG.warn('Multiple NICs found for network_type: "{}" on host:{}, {}'.format( network_type, host, if_names)) if_name = if_names[0] LOG.warn('Will chose the first NIC:{} found for network_type: "{}" on host:{}'.format( if_name, network_type, host)) else: assert if_name in if_names, 'Specified if_name {} not exist for {}'.format(if_name, host) min_mtu = 0 uses_ifs = if_info[if_name]['uses_ifs'] if uses_ifs: min_mtu = min([if_info[nic]['mtu'] for nic in uses_ifs]) # check for mtu type # if it's not vlan set not restriction till mtu 9216 CGTS-8184 uses_ifs_type = if_info[if_name]['type'] if uses_ifs_type != 'vlan': min_mtu = 9216 return min_mtu, if_info[if_name]['mtu'], if_name
def is_process_running(pid, host, con_ssh=None, retries=3, interval=3): """ Check if the process with the PID is existing Args: pid (int): process id host (str): host the process resides con_ssh: ssh connection/client to the host retries (int): times to re-try if no process found before return failure interval (int): time to wait before next re-try Returns: boolean - true if the process existing, false otherwise msg (str) - the details of the process or error messages """ cmd = 'ps -p {}'.format(pid) for _ in range(retries): with host_helper.ssh_to_host(host, con_ssh=con_ssh) as host_ssh: code, output = host_ssh.exec_cmd(cmd, fail_ok=True) if 0 != code: LOG.warn('Process:{} DOES NOT exist, error:{}'.format( pid, output)) else: return True, output time.sleep(interval) return False, ''
def get_impt_prof_by_name(self, name, impt_setting): for uuid in impt_setting.keys(): if 'name' in impt_setting[uuid] and name == impt_setting[uuid][ 'name']: return impt_setting[uuid] LOG.warn('Failed to find imported storage-profile with name:{}'.format( name)) return {}
def wait_for_backup_status(backup_id, target_status='available', timeout=1800, wait_between_check=30, fail_ok=False, con_ssh=None): """ Wait the specified cinder-backup to reach certain status. Args: backup_id: - id of the cinder-backup target_status: - the expected status to wait, by default it's 'available' timeout: - how long to wait if the cinder-backup does not reach expected status, 1800 seconds by default wait_between_check: - interval between checking the status, 30 seconds by default fail_ok: - if the test case should be failed if any error occurs, False by default con_ssh: - current ssh connection the lab Return: error-code: - 0 -- success - 1 -- failed error-msg: - message about the reason of failure """ cmd = 'cinder backup-show ' + backup_id end_time = time.time() + timeout output = '' while time.time() < end_time: rc, output = con_ssh.exec_cmd(cmd) table_ = table_parser.table(output) status = table_parser.get_value_two_col_table(table_, 'status') if status.lower() == target_status.lower(): break time.sleep(wait_between_check) else: msg = 'Backup:{} did not reach status:{} in {} seconds'.format( backup_id, target_status, timeout) LOG.warn(msg + 'output:' + output) assert fail_ok, msg return 1, msg return 0, 'all cinder backup:{} reached status:{} after {} seconds'.format( backup_id, target_status, timeout)
def is_controller_swacted(prev_active, prev_standby, swact_start_timeout=MTCTimeout.KILL_PROCESS_SWACT_NOT_START, swact_complete_timeout=MTCTimeout.KILL_PROCESS_SWACT_COMPLETE, con_ssh=None): """ Wait and check if the active-controller on the system was 'swacted' with give time period Args: prev_active: previous active controller prev_standby: previous standby controller swact_start_timeout: check within this time frame if the swacting started swact_complete_timeout: check if the swacting (if any) completed in this time period con_ssh: ssh connection/client to the current active-controller Returns: """ LOG.info('Check if the controllers started to swact within:{}, and completing swacting within:{}'.format( swact_start_timeout, swact_complete_timeout)) code = -1 host = prev_active for retry in range(1, 5): LOG.info('retry{:02d}: checking if swacting triggered, prev-active-controller={}'.format(retry, prev_active)) code = 0 try: code, msg = host_helper.wait_for_swact_complete( host, con_ssh=con_ssh, fail_ok=True, swact_start_timeout=swact_start_timeout, swact_complete_timeout=swact_complete_timeout) if 0 == code: LOG.info('OK, host-swacted, prev-active:{}, pre-standby:{}, code:{}, message:{}'.format( prev_active, prev_active, code, msg)) return True active, standby = system_helper.get_active_standby_controllers() if active == prev_standby and standby == prev_active: LOG.info('swacted?! prev-active:{} prev-standby:{}, cur active:{}, cur standby:{}'.format( prev_active, prev_standby, active, standby)) return True break except Exception as e: LOG.warn('erred, indicating system is in unstable state, meaning probably swacting is in process. ' 'previous active-controller:{}, previous standby-controller:{}\nerror message:{}'. format(prev_active, prev_standby, e)) if retry >= 4: LOG.error('Fail the test after retry {} times, system remains in unstable state, ' 'meaning probably swacting is in process. previous active-controller:{}, ' 'previous standby-controller:{}\nerror message:{}'. format(retry, prev_active, prev_standby, e)) raise time.sleep(10) return 0 == code
def rescue_vm(vm_type, vm_id): if 'non_autorc' in vm_type: vm_helper.wait_for_vm_status(vm_id, status=VMStatus.ERROR, timeout=VMTimeout.AUTO_RECOVERY) LOG.warn('VM got into ERROR status as expected') LOG.warn('Attempting to rescure the VM:{}'.format(vm_id)) vm_helper.stop_vms(vm_id) vm_helper.start_vms(vm_id)
def get_cert_id(output): pat = re.compile(cert_id_line) for line in output.splitlines(): m = re.match(pat, line) if m and len(m.groups()) >= 1: return m.group(1) LOG.warn('No certificate id found from output:' + output) return ''
def get_suitable_hypervisors(): """ Get low latency hypervisors with HT-off TODO: following settings should checked, but most of them cannot be easily done automatically # Processor Configuration # Hyper-Threading = Disabled # Power & Performance # Policy = Performance # Workload = Balanced # P-States # SpeedStep = Enabled # Turbo Boost = Enabled # Energy Efficient Turbo = Disabled # C-States # CPU C-State = Disabled # Acoustic and Performance # Fan Profile = Performance: """ global testable_hypervisors LOG.fixture_step( 'Check if the lab meets conditions required by this test case') hypervisors = host_helper.get_hypervisors() for hypervisor in hypervisors: personality, subfunc = system_helper.get_host_values( hypervisor, ('personality', 'subfunctions')) personalities = subfunc + personality if not personalities or 'lowlatency' not in personalities: continue cpu_info, num_threads, vm_cores, num_cores = get_cpu_info(hypervisor) if cpu_info and 'topology' in cpu_info and cpu_info['topology'][ 'threads'] == 1: if num_threads != 1: LOG.warn( 'conflicting info: num_threads={}, while cpu_info.threads={}' .format(num_threads, cpu_info['topology']['threads'])) testable_hypervisors[hypervisor] = { 'personalities': personalities, 'cpu_info': cpu_info, 'vm_cores': vm_cores, 'num_cores': num_cores, 'for_host_test': False, 'for_vm_test': False, } else: LOG.warning( 'hypervisor:{} has HT-on, ignore it'.format(hypervisor)) return testable_hypervisors.keys()
def upgrade_host_lock_unlock(host, con_ssh=None): """ swact, if required, lock and unlock before upgrade. Args: host (str): hostname or id in string format con_ssh (SSHClient): Returns: (return_code(int), msg(str)) (0, "Host is host is locked/unlocked) """ LOG.info("Checking if host {} is active ....".format(host)) active_controller = system_helper.get_active_controller_name() swact_back = False if active_controller == host: LOG.tc_step("Swact active controller and ensure active controller is changed") exit_code, output = host_helper.swact_host(hostname=active_controller) assert 0 == exit_code, "{} is not recognized as active controller".format(active_controller) active_controller = system_helper.get_active_controller_name() swact_back = True LOG.info("Host {}; doing lock/unlock to the host ....".format(host)) rc, output = host_helper.lock_host(host, con_ssh=con_ssh) if rc != 0 and rc != -1: err_msg = "Lock host {} rejected".format(host) LOG.warn(err_msg) return 1, err_msg rc, output = host_helper.unlock_host(host, available_only=True, con_ssh=con_ssh) if rc != 0: err_msg = "Unlock host {} failed: {}".format(host, output) return 1, err_msg if swact_back: time.sleep(60) if not system_helper.wait_for_host_values(host, timeout=360, fail_ok=True, operational=HostOperState.ENABLED, availability=HostAvailState.AVAILABLE): err_msg = " Swacting to standby is not possible because {} is not in available state " \ "within the specified timeout".format(host) assert False, err_msg LOG.tc_step("Swact active controller back and ensure active controller is changed") rc, output = host_helper.swact_host(hostname=active_controller) if rc != 0: err_msg = "Failed to swact back to host {}: {}".format(host, output) return 1, err_msg LOG.info("Swacted and {} has become active......".format(host)) return 0, "Host {} is locked and unlocked successfully".format(host)
def abort_upgrade(con_ssh=None, timeout=60, fail_ok=False): """ Aborts upgrade Args: con_ssh (SSHClient): timeout (int) fail_ok (bool): Returns (tuple): (0, dict/list) (1, <stderr>) # cli returns stderr, applicable if fail_ok is true """ if con_ssh is None: con_ssh = ControllerClient.get_active_controller() cmd = "source /etc/nova/openrc; system upgrade-abort" con_ssh.send(cmd) end_time = time.time() + timeout rc = 1 while time.time() < end_time: index = con_ssh.expect([con_ssh.prompt, Prompt.YES_N_PROMPT], timeout=timeout) if index == 1: con_ssh.send('yes') index = con_ssh.expect([con_ssh.prompt, Prompt.CONFIRM_PROMPT], timeout=timeout) if index == 1: con_ssh.send('abort') index = con_ssh.expect([con_ssh.prompt, Prompt.CONFIRM_PROMPT], timeout=timeout) if index == 0: rc = con_ssh.exec_cmd("echo $?")[0] con_ssh.flush() break if rc != 0: err_msg = "CLI system upgrade-abort rejected" LOG.warning(err_msg) if fail_ok: return 1, err_msg else: raise exceptions.CLIRejected(err_msg) table_ = system_upgrade_show()[1] state = table_parser.get_value_two_col_table(table_, "state") if "aborting" in state: return 0, "Upgrade aborting" else: err_msg = "Upgrade abort failed" if fail_ok: LOG.warn(err_msg) return 1, err_msg else: raise exceptions.CLIRejected(err_msg)
def pb_migrate_test(backup_info, con_ssh, vm_ids=None): """ Run migration test before doing system backup. Args: backup_info: - options for doing backup con_ssh: - current ssh connection vm_ids Return: None """ hyporvisors = host_helper.get_up_hypervisors(con_ssh=con_ssh) if len(hyporvisors) < 2: LOG.info( 'Only {} hyporvisors, it is not enougth to test migration'.format( len(hyporvisors))) LOG.info('Skip migration test') return 0 else: LOG.debug('There {} hyporvisors'.format(len(hyporvisors))) LOG.info('Randomly choose some VMs and do migrate:') target = random.choice(vm_ids) LOG.info('-OK, test migration of VM:{}'.format(target)) original_host = vm_helper.get_vm_host(target) LOG.info('Original host:{}'.format(original_host)) vm_helper.live_migrate_vm(target) current_host = vm_helper.get_vm_host(target) LOG.info('After live-migration, host:{}'.format(original_host)) if original_host == current_host: LOG.info('backup_info:{}'.format(backup_info)) LOG.warn( 'VM is still on its original host, live-migration failed? original host:{}' .format(original_host)) original_host = current_host vm_helper.cold_migrate_vm(target) current_host = vm_helper.get_vm_host(target) LOG.info('After code-migration, host:{}'.format(current_host)) if original_host == current_host: LOG.warn( 'VM is still on its original host, code-migration failed? original host:{}' .format(original_host))
def compare_storage_profile(self, xml_setting, impt_setting): for sprof in xml_setting: name = sprof['name'] sprof_impt = self.get_impt_prof_by_name(name, impt_setting) if not sprof_impt: LOG.warn('NONE profile imported EISTS for profile from XML {}'. format(name)) return 1 if not self.compare_single_storage_profile(sprof, sprof_impt): LOG.warn('profile imported does not match profile from XML') return 1 return 0
def parse_storprofiles_from_xml(xml_file=None): """ Args: xml_file: Returns: example result: {'storageProfile':[ {'disk': [{'node': '/dev/sdb', 'volumeFunc': 'osd', 'size': '228936'}, {'node': '/dev/sdc', 'volumeFunc': 'osd', 'size': '228936'}], 'name': 'ceph_storage_profile' }], 'localstorageProfile': {'lvg': [{'lvm_vg_name': 'nova-local', 'concurrent_disk_operations': '2', 'instance_backing': 'image'}], 'disk': [{'node': '/dev/sdb', 'size': '228936'}], 'name': 'with_ceph_image_local_storage_backed'} } """ if not xml_file: return {} storprofile = {} expected_types = ['storageProfile', 'localstorageProfile'] for type in expected_types: storprofile.setdefault(type, []) root = ElementTree.parse(xml_file).getroot() for child in root: if child.tag not in expected_types: continue values = child.attrib for grandchild in child: if grandchild.tag in values: values[grandchild.tag].append(grandchild.attrib) else: values[grandchild.tag] = [grandchild.attrib] if child.tag in storprofile: LOG.warn('{} already exists!'.format(child.tag)) storprofile[child.tag].append(values) else: storprofile[child.tag] = [values] return storprofile
def collect_logs(con_ssh=None, fail_ok=True): """ Collect logs on the system by calling collect_tis_logs, backup logs under /scratch before head if any, so that there are enough disk space. Args: con_ssh: - ccurrent ssh connection to the target fail_ok: - True: do not break the whole test case if there's any error during collecting logs False: abort the entire test case if there's any eorr. True by default. Return: None """ log_tarball = r'/scratch/ALL_NODES*' log_dir = r'~/collected-logs' old_log_dir = r'~/collected-logs/old-files' try: if con_ssh is None: con_ssh = ControllerClient.get_active_controller() prep_cmd = 'mkdir {}; mkdir {}'.format(log_dir, old_log_dir) code, output = con_ssh.exec_cmd(prep_cmd, fail_ok=fail_ok) if code != 0: LOG.warn('failed to execute cmd:{}, code:{}'.format( prep_cmd, code)) con_ssh.exec_sudo_cmd('rm -rf /scratch/ALL_NODES*', fail_ok=fail_ok) prep_cmd = 'mv -f {} {}'.format(log_tarball, old_log_dir) code, output = con_ssh.exec_sudo_cmd(prep_cmd, fail_ok=fail_ok) if code != 0: LOG.warn('failed to execute cmd:{}, code:{}'.format( prep_cmd, code)) LOG.info('execute: rm -rf /scratch/ALL_NODES*') con_ssh.exec_sudo_cmd('rm -rf /scratch/ALL_NODES*', fail_ok=fail_ok) LOG.info('ok, removed /scratch/ALL_NODES*') else: LOG.info('ok, {} moved to {}'.format(log_tarball, old_log_dir)) collect_tis_logs(con_ssh=con_ssh) except exceptions.ActiveControllerUnsetException: pass
def create_keys(ssh_con, hp='', den=True, st=True, kt=('f', 'p'), opr='private.st.key', opu='public.st.key'): if den is True and st is True: LOG.warn('Cannot set both "den" and "st"') st = False cmd = 'create' options = {'hp': hp, 'st': st, 'opr': opr, 'opu': opu, 'den': den} if isinstance(kt, set) and len(kt) > 0: cmd += ' -st ' + ' -st '.join(kt) return run_cmd(ssh_con, cmd, output_handle=False, **options)[1]
def check_transient_values(ssh_con, handles=None, expecting=True, fail_ok=False): LOG.info( 'Check if the values stored in volatile memory existing or not, expecting: {}' .format(expecting)) if handles: if isinstance(handles, list) or isinstance(handles, tuple): to_check = [h for h in handles if h] else: to_check = [handles] else: LOG.info('check if any values in volatile memory') to_check = [] rc, values = get_volatile_content(ssh_con, fail_ok=fail_ok) if rc == 0 and values == to_check: if expecting is True: LOG.info('OK, found transient contents as expected') return True, values, handles else: LOG.warn('Not expecting but find values for handles:{}, vaules:{}'. format(handles, values)) if fail_ok: return False, values, handles assert False, 'Failed, expecting nothing in transient memory, but got {}'.format( values) else: if expecting is False: LOG.info('OK, as expected, no transient contents found') return True, values, handles else: LOG.warn( 'Expecting but failed to find values for handles:{}'.format( handles)) if fail_ok: return True, values, handles else: assert False, 'Failed to find expected contents:{}'.format( to_check)
def gen_linux_password(exclude_list=None, length=32): if exclude_list is None: exclude_list = [] if not isinstance(exclude_list, list): exclude_list = [exclude_list] if length < MIN_LINUX_PASSWORD_LEN: LOG.warn( 'Length requested is too small, must longer than {}, requesting ' '{}'.format(MIN_LINUX_PASSWORD_LEN, length)) return None total = length left = 3 vocabulary = [ascii_lowercase, ascii_uppercase, digits, SPECIAL_CHARACTERS] password = '' while not password: raw_password = [] for chars in vocabulary: count = random.randint(1, total - left) raw_password += random.sample(chars, min(count, len(chars))) left -= 1 total -= count password = ''.join( random.sample(raw_password, min(length, len(raw_password)))) missing_length = length - len(password) if missing_length > 0: all_chars = ''.join(vocabulary) password += ''.join( random.choice(all_chars) for _ in range(missing_length)) if password in exclude_list: password = '' LOG.debug('generated valid password:{}'.format(password)) return password
def __get_value(table_, field, row_index=None): """ Args: table_: output table as dictionary parsed by tempest field: field of the item. such as id, name, gateway_ip, etc row_index: row_index for a multi-column table. This is not required for a two-column table. Following table is considered to have only one row, and the row_index for that row is 0. +--------------------------------------+------------+--------+--------------------------------------------------------------+ | ID | Name | Status | Networks | +--------------------------------------+------------+--------+--------------------------------------------------------------+ | 1ab2c401-7863-42ab-8d2b-c2b7e8fa3adb | wrl5-avp-0 | ACTIVE | internal-net0=10.10.1.2, 10.10.0.2;public-net0=192.168.101.3 | +--------------------------------------+------------+--------+--------------------------------------------------------------+ Returns: return the value for a specific field (on a specific row if it's a multi-column table_) """ if __is_table_two_column(table_): if row_index is not None: LOG.warn( "Two-column table found, row_index {} will not be used to " "locate {} field".format(row_index, field)) for row in table_['values']: if row[0] == field: return row[1] raise ValueError("Value for {} not found in table_".format(field)) else: # table is a multi-column table if row_index is None: raise ValueError("row_index needs to be supplied!") col_index = __get_column_index(table_, field) return_value = table_['values'][row_index][col_index] LOG.debug("return value for {} field is: {}".format( field, return_value)) return return_value
def get_if_info(host): if_info = {} try: if_table = host_helper.get_host_interfaces_table(host) index_name = if_table['headers'].index('name') index_type = if_table['headers'].index('type') index_uses_ifs = if_table['headers'].index('uses i/f') index_used_by_ifs = if_table['headers'].index('used by i/f') index_class = if_table['headers'].index('class') index_attributes = if_table['headers'].index('attributes') for value in if_table['values']: name = value[index_name] if_type = value[index_type] uses_ifs = eval(value[index_uses_ifs]) used_by_ifs = eval(value[index_used_by_ifs]) if_class = value[index_class] network_types = [if_class] attributes = value[index_attributes].split(',') if name in if_info: LOG.warn('NIC {} already appeard! Duplicate of NIC:"{}"'.format(name, if_info[name])) else: if_info[name] = { 'mtu': int(re.split('MTU=', attributes[0])[1]), 'uses_ifs': uses_ifs, 'used_by_ifs': used_by_ifs, 'type': if_type, 'network_type': network_types } except IndexError as e: LOG.error('Failed to get oam-interface name/type, error message:{}'.format(e)) assert False, 'Failed to get oam-interface name/type, error message:{}'.format(e) except Exception as e: LOG.error('Failed to get oam-interface name/type, error message:{}'.format(e)) assert False, 'Failed to get oam-interface name/type, error message:{}'.format(e) assert if_info, 'Cannot get interface information' return if_info
def prepare_cert_file(con_ssh, primary_cert_file=SecurityPath.DEFAULT_CERT_PATH, alt_cert_file=SecurityPath.ALT_CERT_PATH): check_cmd = 'test -e {}'.format(primary_cert_file) return_code, result = con_ssh.exec_cmd(check_cmd, fail_ok=True) if return_code != 0: LOG.info('no certificate file found at:{}, code:{}, output:{}'.format( os.path.dirname(primary_cert_file), return_code, result)) LOG.info('searching alternative location:{}'.format(alt_cert_file)) check_cmd = 'test -e {}'.format(alt_cert_file) return_code, result = con_ssh.exec_cmd(check_cmd, fail_ok=True) if return_code != 0: message = 'no certificate file neither at specified location nor alternative path,' + \ 'specified:{}, alternative path:{}, code:{}, output:{}'.format( primary_cert_file, alt_cert_file, return_code, result) LOG.warn(message) return_code, result = fetch_cert_file(con_ssh) if return_code != 0 or not result: skip(message) return result else: primary_cert_file = alt_cert_file cert_file_to_use = os.path.join(os.path.dirname(primary_cert_file), '.bk-' + time.strftime('%Y%m%d-%H%M%S')) LOG.info('copy certificate file to ' + cert_file_to_use) return_code, result = con_ssh.exec_sudo_cmd('cp -L {} {}'.format( primary_cert_file, cert_file_to_use)) assert return_code == 0, 'Failed to copy certificate file for testing' LOG.info('OK, found certificate file:{}'.format(primary_cert_file)) return cert_file_to_use
def check_nv_values(ssh_con, handle, size=32, expecting=True, fail_ok=False): LOG.info('check if nv content exists') output = '' try: rc, output = run_cmd(ssh_con, 'nvread -ha {} -sz {}'.format(handle, size), output_handle=False, fail_ok=fail_ok) except Exception as e: LOG.info('Caught exception:{} when run cmd: "nvread -ha..."'.format(e)) rc = 1 if rc == 0: if expecting is True: LOG.info('Found the non_volatile contents:' + output + ' as epxected') return True, output, handle else: LOG.info( 'Not expecting but find non-volatile contents for:{}'.format( handle)) if fail_ok: return False, output, handle else: assert False, 'Not expecting but find non-volatile contents for:{}'.format( handle) else: if expecting is False: LOG.info('OK, did not find the NV content, this is expected.') return True, output, handle else: LOG.warn('Did not find the NV content but expecting.') if fail_ok: return False, output, handle else: assert False, 'Expecting but failed to find non_volatile contents for handle:' + str( handle)
def validate_current_strategy_phase(orchestration, expected_phase, conn_ssh=None): """ Validates the current expected phase Args: orchestration: expected_phase: conn_ssh: Returns (bool): True if valid otherwise return False """ if not OrchestStrategyPhase.validate(phase=expected_phase): LOG.warn( "The specified orchestration strategy phase='{}' is not valid phase. Valid phases are: {}" .format(expected_phase, [ OrchestStrategyPhase.BUILD, OrchestStrategyPhase.ABORT, OrchestStrategyPhase.APPLY ])) return False current_phase = get_current_strategy_phase(orchestration, conn_ssh=conn_ssh) if current_phase is not None and current_phase == expected_phase: LOG.info( "Current orchestration strategy phase is {} as expected".format( current_phase)) return True else: LOG.warn( "Current orchestration strategy phase='{}' does not match with expected phase='{}'" .format(current_phase, expected_phase)) return False
def get_process_info(name, cmd='', pid_file='', host='', process_type='sm', con_ssh=None, auth_info=Tenant.get('admin_platform')): """ Get the information of the process with the specified name Args: name (str): name of the process cmd (str): path of the executable pid_file (str): path of the file containing the process id host (str): host on which the process resides process_type (str): type of service/process, must be one of 'sm', 'pm', 'other' con_ssh: ssh connection/client to the active controller auth_info Returns: """ LOG.info('name:{} cmd={} pid_file={} host={} process_type={}'.format( name, cmd, pid_file, host, process_type)) active_controller = system_helper.get_active_controller_name(con_ssh=con_ssh, auth_info=auth_info) if not host: host = active_controller if process_type == 'sm': LOG.debug('to get_process_info for SM process:{} on host:{}'.format(name, host)) if host != active_controller: LOG.warn('Already swacted? host:{} is not the active controller now. Active controller is {}'.format( host, active_controller)) pid, name, impact, status, pid_file = get_process_from_sm(name, con_ssh=con_ssh, pid_file=pid_file) if status != 'enabled-active': LOG.warn('SM process is in status:{}, not "enabled-active"'.format(status)) if 'disabl' in status: LOG.warn('Wrong controller? Or controller already swacted, wait and try on the other controller') time.sleep(10) return get_process_from_sm(name, pid_file=pid_file) return -1, name, impact, status, pid_file else: return pid, name, impact, status, pid_file elif process_type == 'pmon': pid = get_pmon_process_id(pid_file, host, con_ssh=con_ssh) LOG.info('Found: PID={} for PMON process:{}'.format(pid, name)) return pid, name else: LOG.info('Try to find the process:{} using "ps"'.format(name)) pid = get_ancestor_process(name, host, cmd=cmd, con_ssh=con_ssh)[0] if -1 == pid: return -1, '' return pid, name
def check_volumes_spaces(con_ssh): from keywords import cinder_helper LOG.info('Checking cinder volumes and space usage') usage_threshold = 0.70 free_space, total_space, unit = cinder_helper.get_lvm_usage(con_ssh) if total_space and free_space < usage_threshold * total_space: if total_space: LOG.info( 'cinder LVM over-used: free:{}, total:{}, ration:{}%'.format( free_space, total_space, free_space / total_space * 100)) LOG.info('Deleting known LVM alarms') expected_reason = r'Cinder LVM .* Usage threshold exceeded; threshold: (\d+(\.\d+)?)%, actual: (\d+(\.\d+)?)%' expected_entity = 'host=controller' value_titles = ('UUID', 'Alarm ID', 'Reason Text', 'Entity ID') lvm_pool_usage = system_helper.get_alarms(fields=value_titles, con_ssh=con_ssh) if not lvm_pool_usage: LOG.warn('Cinder LVM pool is used up to 75%, but no alarm for it') else: if len(lvm_pool_usage) > 1: LOG.warn( 'More than one alarm existing for Cinder LVM over-usage') elif len(lvm_pool_usage) < 1: LOG.warn('No LVM cinder over-used alarms, got:{}'.format( lvm_pool_usage)) for lvm_alarm in lvm_pool_usage: alarm_uuid, alarm_id, reason_text, entity_id = lvm_alarm.split( '::::') if re.match(expected_reason, reason_text) and re.search( expected_entity, entity_id): LOG.info('Expected alarm:{}, reason:{}'.format( alarm_uuid, reason_text)) LOG.info('Deleting it') system_helper.delete_alarms(alarms=alarm_uuid)
def check_persistent_values(ssh_con, handle, expecting=True, fail_ok=False): LOG.info('Check if the value still existing for handle:{}'.format(handle)) cli = 'getcapability -cap 1 -pr ' + str(handle) output = '' try: output = run_cmd(ssh_con, cli, output_handle=False, fail_ok=fail_ok)[1] except Exception as e: LOG.warn('Caught exception: {}'.format(e)) if str(handle) in output or str(handle)[2:] in output: if expecting is True: LOG.info('OK, found the value in persistent memory, handle:' + handle + ' as expected') return True, handle, output else: LOG.warn( 'Not expecting but got value for handle:{}, results:{}'.format( handle, output)) if fail_ok: return False, output, handle else: assert False, 'Not-expecting but find the persistent contents:' + str( handle) else: if expecting is True: LOG.warn('Expecting but could not find the persistent values') if fail_ok: return False, output, handle assert False, 'Could not find the persistent values, while expecting them' else: LOG.info( 'OK, as expected, no value in persistent memory, handle:' + handle) return True, output, handle
def create_portforwarding_rules_for_vms(vm_mgmt_ips, router_id, protocol, for_ssh=False): """ Creates port-forwarding rules for vms. The public port is selected based on protocol for the purpose of this test: tcp - if for_ssh is True [8080, 8081, ..., 8089], otherwise [8090, 8091, ..., 8099] udp - [8100, 8101, ..., 8109] This functions creates max 10 rules for the purpose this test Args: vm_mgmt_ips (dict): {vm_di: mgmt_ip} router_id (str): Id of tenant router where the portforwarding rules are created protocol (str): tcp/udp for_ssh(bool): valid only with tcp protocol, otherwise ignored. Returns: (tuple) code, dict { vm_id: { 'pf_id': <pf_id>, 'public_port': <public_port>} } """ if vm_mgmt_ips is None or not isinstance(vm_mgmt_ips, dict) or len(vm_mgmt_ips) == 0 or router_id is None \ or protocol is None: msg = "Value for vm_mgmt_ips, router_id, and protocol must be specified " LOG.warn(msg) raise exceptions.InvalidStructure(msg) inside_port = 80 if protocol == "tcp": if for_ssh: base_port = 8080 inside_port = 22 else: base_port = 8090 elif protocol == "udp": base_port = 8100 else: msg = "Invalid protocol value {} provided".format(protocol) LOG.warn(msg) raise exceptions.InvalidStructure(msg) vm_pfs = {} for key, i in zip(vm_mgmt_ips, range(0, 10)): vm_name = vm_helper.get_vm_name_from_id(key) public_port = str(base_port + i) LOG.info( "Creating port forwarding rule for VM: {}: protocol={}, inside_address={}, inside_port={}," "outside_port={}.".format(vm_name, protocol, vm_mgmt_ips[key][0], inside_port, public_port)) rc, pf_id, msg = network_helper.create_port_forwarding_rule( router_id, inside_addr=vm_mgmt_ips[key][0], inside_port=str(inside_port), outside_port=public_port, protocol=protocol) assert rc == 0, "Port forwarding rule create failed for VM {}: {}".format( vm_name, msg) LOG.info("Port forwarding rule {} created for VM: {}".format( pf_id, vm_name)) vm_pf_info = { 'pf_id': pf_id, 'private_port': str(inside_port), 'public_port': public_port } vm_pfs[key] = vm_pf_info return vm_pfs
def fetch_cert_file(ssh_client, search_path=None): save_cert_to = os.path.dirname(SecurityPath.ALT_CERT_PATH) code, output = ssh_client.exec_cmd('mkdir -p {}'.format(save_cert_to), fail_ok=True) if code != 0: msg = 'failed to create path for certificate files:{}, error:'.format( save_cert_to, output) LOG.warn(msg) return code, msg from_server = build_server.DEFAULT_BUILD_SERVER['ip'] prompt = r'\[{}@.* \~\]\$'.format(TestFileServer.get_user()) ssh_to_server = SSHFromSSH(ssh_client, from_server, TestFileServer.get_user(), TestFileServer.get_password(), initial_prompt=prompt) ssh_to_server.connect(retry=5) if search_path is None: search_path = os.path.join(BuildServerPath.DEFAULT_HOST_BUILD_PATH, BuildServerPath.LAB_CONF_DIR_PREV) search_cmd = "\\find {} -maxdepth 5 -type f -name '*.pem'".format( search_path) code, output = ssh_to_server.exec_cmd(search_cmd, fail_ok=True) lab_name = ProjVar.get_var('lab')['name'] LOG.info('Get the PEM for current lab ({}) first'.format(lab_name)) if code == 0 and output: for file in output.splitlines(): exiting_lab_name = os.path.basename(os.path.dirname(file)) if exiting_lab_name in lab_name or lab_name in exiting_lab_name: certificate_file = file break else: certificate_file = output.splitlines()[0] else: msg = 'failed to fetch cert-file from build server, tried path:{}, server:{}'.format( search_path, from_server) LOG.warn(msg) return -1, msg LOG.info( 'found cert-file on build server, trying to scp to current active controller\ncert-file:{}' .format(certificate_file)) scp_cmd = \ 'scp -oStrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null {} ' \ '{}@{}:{}'.format( certificate_file, HostLinuxUser.get_user(), lab_info.get_lab_floating_ip(), save_cert_to) ssh_to_server.send(scp_cmd) timeout = 60 output_index = ssh_to_server.expect( [ssh_to_server.prompt, Prompt.PASSWORD_PROMPT], timeout=timeout) if output_index == 2: ssh_to_server.send('yes') output_index = ssh_to_server.expect( [ssh_to_server.prompt, Prompt.PASSWORD_PROMPT], timeout=timeout) if output_index == 1: ssh_to_server.send(HostLinuxUser.get_password()) output_index = ssh_to_server.expect(timeout=timeout) assert output_index == 0, "Failed to scp files" exit_code = ssh_to_server.get_exit_code() assert 0 == exit_code, "scp not fully succeeded" ssh_to_server.close() copied_cert_file = os.path.join(save_cert_to, os.path.basename(certificate_file)) ssh_client.exec_cmd('ls -l {}; mv {} {}.bk'.format(copied_cert_file, copied_cert_file, copied_cert_file)) return 0, copied_cert_file + '.bk'