def verify_configuration_changes(expected=True, connection=None): global file_changes error_messages = [] for service, info in file_changes.items(): for file_path, changes in info.items(): backup_file = os.path.join(conf_backup_dir, os.path.basename(file_path)) rc, output = connection.exec_sudo_cmd('diff -b ' + file_path + ' ' + backup_file) if rc == 1: error_message = check_changes(changes, output, expected=expected) if error_message: LOG.error('Failed, ' + error_message) error_messages.append(error_message) elif rc == 0: error_message = 'Fail, No change for file:{}, rc:{}, output:{}'.format( file_path, rc, output) LOG.info(error_message) error_messages.append(error_message) else: error_message = 'Failed, rc:{}, output:{}, '.format(rc, output) error_messages.append(error_message) LOG.error(error_message) assert len( error_messages) == 0, 'Failed with errors:' + '\n'.join(error_messages) return True
def _force_unreserve_vlm_console(barcode): action = VlmAction.VLM_FORCE_UNRESERVE cmd = '{} {} -L {} -P {} -t {}'.format(VLM, action, TestFileServer.get_user(), TestFileServer.VLM_PASSWORD, barcode) attr_dict = _get_attr_dict_for_vlm_console(barcode=barcode, attr='all') LOG.info(attr_dict) reserved_by = attr_dict['Reserved By'] reserve_note = attr_dict['Reserve Note'] if not reserved_by: msg = "Target {} is not reserved. Do nothing".format(barcode) LOG.info(msg) return -1, msg elif reserved_by == local_host.get_user() or not reserve_note: print("Force unreserving target: {}".format(barcode)) local_client().exec_cmd(cmd) reserved = _vlm_getattr(barcode, 'date')[1] if reserved: msg = "Failed to force unreserve target!" LOG.error(msg) return 1, msg else: msg = "Barcode {} was successfully unreserved".format(barcode) LOG.info(msg) return 0, msg else: msg = "Did not unreserve {} as it has a reservation note by {}: {}".format( barcode, reserved_by, reserve_note) LOG.error(msg) return 2, msg
def is_controller_swacted(prev_active, prev_standby, swact_start_timeout=MTCTimeout.KILL_PROCESS_SWACT_NOT_START, swact_complete_timeout=MTCTimeout.KILL_PROCESS_SWACT_COMPLETE, con_ssh=None): """ Wait and check if the active-controller on the system was 'swacted' with give time period Args: prev_active: previous active controller prev_standby: previous standby controller swact_start_timeout: check within this time frame if the swacting started swact_complete_timeout: check if the swacting (if any) completed in this time period con_ssh: ssh connection/client to the current active-controller Returns: """ LOG.info('Check if the controllers started to swact within:{}, and completing swacting within:{}'.format( swact_start_timeout, swact_complete_timeout)) code = -1 host = prev_active for retry in range(1, 5): LOG.info('retry{:02d}: checking if swacting triggered, prev-active-controller={}'.format(retry, prev_active)) code = 0 try: code, msg = host_helper.wait_for_swact_complete( host, con_ssh=con_ssh, fail_ok=True, swact_start_timeout=swact_start_timeout, swact_complete_timeout=swact_complete_timeout) if 0 == code: LOG.info('OK, host-swacted, prev-active:{}, pre-standby:{}, code:{}, message:{}'.format( prev_active, prev_active, code, msg)) return True active, standby = system_helper.get_active_standby_controllers() if active == prev_standby and standby == prev_active: LOG.info('swacted?! prev-active:{} prev-standby:{}, cur active:{}, cur standby:{}'.format( prev_active, prev_standby, active, standby)) return True break except Exception as e: LOG.warn('erred, indicating system is in unstable state, meaning probably swacting is in process. ' 'previous active-controller:{}, previous standby-controller:{}\nerror message:{}'. format(prev_active, prev_standby, e)) if retry >= 4: LOG.error('Fail the test after retry {} times, system remains in unstable state, ' 'meaning probably swacting is in process. previous active-controller:{}, ' 'previous standby-controller:{}\nerror message:{}'. format(retry, prev_active, prev_standby, e)) raise time.sleep(10) return 0 == code
def clear(self): if self.is_launched: LOG.error("Video recording is running still") return if not os.path.isfile(self.file_path): LOG.warning("%s is absent already", self.file_path) return os.remove(self.file_path)
def __exit__(self, *args): LOG.debug("Releasing lock") try: return self._lock.release() except RuntimeError: LOG.error("Lock did not release, lock was unlocked already") raise except: LOG.error("An unexpected error was caught when unlocking lock") raise
def __unreserve(scope, lab=None): hosts_to_unreserve = VlmHostsReserved._get_hosts_reserved(scope) if hosts_to_unreserve: LOG.fixture_step("({}) Unreserve hosts: {}".format( scope, hosts_to_unreserve)) try: vlm_helper.unreserve_hosts(hosts=hosts_to_unreserve, lab=lab) except VLMError as e: LOG.error(e) VlmHostsReserved._reset(scope)
def test_kernel_module_signatures(): """ Test kernel modules are properly signed on all stx hosts. Steps on each host: - 'cat /proc/sys/kernel/tainted', ensure value is 4096. If not, do following steps: - 'grep --color=never -i "module verification failed" /var/log/kern.log' to find out failed modules - 'modinfo <failed_module> | grep --color=never -E "sig|filename" to display signing info for each module """ hosts = system_helper.get_hosts() failed_hosts = {} for host in hosts: with host_helper.ssh_to_host(host) as host_ssh: LOG.tc_step( "Check for unassigned kernel modules on {}".format(host)) output = host_ssh.exec_cmd('cat /proc/sys/kernel/tainted', fail_ok=False)[1] output_binary = '{0:b}'.format(int(output)) unassigned_module_bit = '0' # 14th bit is to flag unassigned module if len(output_binary) >= 14: unassigned_module_bit = output_binary[-14] if unassigned_module_bit != '0': LOG.error( "Kernel module verification(s) failed on {}. Collecting " "more info".format(host)) LOG.tc_step( "Check kern.log for modules with failed verification") failed_modules = [] err_out = host_ssh.exec_cmd( 'grep --color=never -i "module verification failed" ' '/var/log/kern.log')[1] for line in err_out.splitlines(): module = re.findall(r'\] (.*): module verification failed', line)[0].strip() if module not in failed_modules: failed_modules.append(module) failed_hosts[host] = failed_modules LOG.tc_step("Display signing info for {} failed kernel " "modules: {}".format(host, failed_modules)) for module in failed_modules: host_ssh.exec_cmd('modinfo {} | grep --color=never -E ' '"sig|filename"'.format(module)) assert not failed_hosts, "Kernel module signature verification " \ "failed on: {}".format(failed_hosts)
def verify_cert_file(ssh_client, certificate_file, expect_existing=False): returned_code, result = ssh_client.exec_cmd( 'test -e {}'.format(certificate_file), fail_ok=True) if returned_code != 0: message = 'certificate file does not exist as expected, file:{}'.format( certificate_file) LOG.info(message) assert not expect_existing, message else: message = 'Https certificate is still existing, it should be removed, file:{}, output:{}'.format( certificate_file, result) LOG.error(message) assert expect_existing, message
def wait_for_thread_end(self, timeout=3600, fail_ok=False): """ Waits for thread (self) to finish executing. All tests should wait for threads to end before proceeding to teardown, unless it is expected to continue, e.g. LOG.tc_step will not work during setup or teardown Raise error if thread is still running after timeout Args: timeout (int): how long to wait for the thread to finish. self.timeout is preferred. fail_ok (bool): fail_ok=False will raise error if wait times out or fails test if thread exited due to error Returns (bool): True if thread is not running, False/exception otherwise """ if not self.is_alive(): LOG.info("{} was already finished".format(self.name)) if self._err: if not fail_ok: raise ThreadingError("Error in thread: {}".format( self._err)) LOG.error("Error found in thread call {}".format(self._err)) return True, self._err if not timeout: timeout = self.timeout LOG.info("Wait for {} to finish".format(self.name)) self.join(timeout) if not fail_ok: assert not self._err, "{} ran into an error: {}".format( self.name, self._err) if not self.is_alive(): LOG.info("{} has finished".format(self.name)) else: # Thread didn't finish before timeout LOG.error("{} did not finish within timeout".format(self.name)) if fail_ok: return False, self._err raise ThreadingError( TIMEOUT_ERR.format(self.func, self.args, self.kwargs)) return True, self._err
def is_vm_filesystem_rw(vm_id, rootfs='vda', vm_image_name=None): """ Args: vm_id: rootfs (str|list): vm_image_name (None|str): Returns: """ vm_helper.wait_for_vm_pingable_from_natbox(vm_id, timeout=240) if vm_image_name is None: vm_image_name = GuestImages.DEFAULT['guest'] router_host = dhcp_host = None try: LOG.info( "---------Collecting router and dhcp agent host info-----------") router_host = network_helper.get_router_host() mgmt_net = network_helper.get_mgmt_net_id() dhcp_host = network_helper.get_network_agents(field='Host', network=mgmt_net) if isinstance(rootfs, str): rootfs = [rootfs] for fs in rootfs: vm_helper.mount_attached_volume(vm_id=vm_id, rootfs=fs) with vm_helper.ssh_to_vm_from_natbox(vm_id, vm_image_name=vm_image_name, retry_timeout=300) as vm_ssh: for fs in rootfs: cmd = "mount | grep {} | grep rw | wc -l".format(fs) cmd_output = vm_ssh.exec_sudo_cmd(cmd)[1] if cmd_output != '1': LOG.info("Filesystem /dev/{} is not rw for VM: {}".format( fs, vm_id)) return False return True except exceptions.SSHRetryTimeout: LOG.error("Failed to ssh, collecting vm console log.") vm_helper.get_console_logs(vm_ids=vm_id) LOG.info("Router host: {}. dhcp agent host: {}".format( router_host, dhcp_host)) raise
def get_pmon_process_info(name, host, conf_file=None, con_ssh=None): """ Get process info from its PMON config file Args: name (str): name of the PMON process host (str): host on which the PROM process running con_ssh: connection to the active controller conf_file (str): configuration file for the PMON process Returns (dict): settings of the process """ LOG.info('Get PMON process information for {}'.format(name)) if not conf_file: file_name = '{}.conf'.format(name) else: file_name = conf_file cmd = 'cat {}'.format(os.path.join(PMON_PROC_CONF_DIR, file_name)) with host_helper.ssh_to_host(host, con_ssh=con_ssh) as con0_ssh: code, output = con0_ssh.exec_sudo_cmd(cmd) if 0 != code or not output.strip(): LOG.error( 'Failed to read config file:{}/{} for PMON process:{} on host:{}, ' 'code:{}, message:{}'.format(PMON_PROC_CONF_DIR, file_name, name, host, code, output)) return {} conf_parser = configparser.ConfigParser() conf_parser.read_file(StringIO(output)) settings = {} if 'process' in conf_parser.sections(): settings = { k.strip(): v.split(';')[0].strip() for k, v in conf_parser.items('process') } settings['interval'] = int(settings.get('interval', 5)) settings['debounce'] = int(settings.get('debounce', 20)) LOG.debug('process settings:{}'.format(settings)) return settings
def create_flavors_and_images(request): # TODO need to check with add_default_specs set to True on baremetal LOG.fixture_step("Creating flavor and image") fl_id = nova_helper.create_flavor(name=request.param['flavor_name'], vcpus=request.param['flavor_vcpus'], ram=request.param['flavor_ram'], root_disk=request.param['flavor_disk'], properties=request.param['properties'], is_public=True, add_default_specs=False, cleanup="module")[1] LOG.error(request.param['image_file']) im_id = glance_helper.create_image( name=request.param['image_name'], source_image_file=request.param['image_file'], disk_format=request.param['disk_format'], cleanup="module")[1] return {"flavor": fl_id, "image": im_id}
def get_columns(table_, headers): if not isinstance(headers, list) and not isinstance(headers, set): headers = [headers] all_headers = table_['headers'] if not set(headers).issubset(all_headers): LOG.error('Unknown column:{}'.format( list(set(all_headers) - set(headers)) + list(set(headers) - set(all_headers)))) return [] selected_column_positions = [ i for i, header in enumerate(all_headers) if header in headers ] results = [] for row in table_['values']: results.append([row[i] for i in selected_column_positions]) return results
def _make_sure_user_exist(user_name, sudoer=False, secondary_group=False, password_expiry_days=90, password_expiry_warn_days=2, delete_if_existing=True): """ Make sure there is a LDAP User with the specified name existing, create one if not. Args: user_name (str): the user name of the LDAP User sudoer (bool): create sudoer or not secondary_group (str): the second group the user belongs to password_expiry_days (int): password_expiry_warn_days (int): delete_if_existing (bool): Delete the existing user if True, otherwise keep the user Returns: bool - True if successful, False otherwise user_info (dict) - user settings """ code, user_info = theLdapUserManager.create_ldap_user( user_name, check_if_existing=True, delete_if_existing=delete_if_existing, sudoer=sudoer, secondary_group=secondary_group, password_expiry_days=password_expiry_days, password_expiry_warn_days=password_expiry_warn_days) if code > 0: LOG.error( 'Failed to make sure the LDAP User {} exist with code {}'.format( user_name, code)) return False, user_info return True, user_info
def get_if_info(host): if_info = {} try: if_table = host_helper.get_host_interfaces_table(host) index_name = if_table['headers'].index('name') index_type = if_table['headers'].index('type') index_uses_ifs = if_table['headers'].index('uses i/f') index_used_by_ifs = if_table['headers'].index('used by i/f') index_class = if_table['headers'].index('class') index_attributes = if_table['headers'].index('attributes') for value in if_table['values']: name = value[index_name] if_type = value[index_type] uses_ifs = eval(value[index_uses_ifs]) used_by_ifs = eval(value[index_used_by_ifs]) if_class = value[index_class] network_types = [if_class] attributes = value[index_attributes].split(',') if name in if_info: LOG.warn('NIC {} already appeard! Duplicate of NIC:"{}"'.format(name, if_info[name])) else: if_info[name] = { 'mtu': int(re.split('MTU=', attributes[0])[1]), 'uses_ifs': uses_ifs, 'used_by_ifs': used_by_ifs, 'type': if_type, 'network_type': network_types } except IndexError as e: LOG.error('Failed to get oam-interface name/type, error message:{}'.format(e)) assert False, 'Failed to get oam-interface name/type, error message:{}'.format(e) except Exception as e: LOG.error('Failed to get oam-interface name/type, error message:{}'.format(e)) assert False, 'Failed to get oam-interface name/type, error message:{}'.format(e) assert if_info, 'Cannot get interface information' return if_info
def validate_pipelines(data): """ This function validates that the pipelines contains the correct data Types, as defined by the API doc. """ # We should probably report which field is of the wrong type if we have a fail # The json objects don't seem to have a 'meters' attribute anymore. # type(data["meters"]) is not list or \ if type(data["name"]) is not str \ or type(data["location"]) is not str \ or type(data["compress"]) is not bool \ or type(data["enabled"]) is not bool \ or type(data["backup_count"]) is not int \ or type(data["max_bytes"]) is not int: LOG.error("Received wrong data type") LOG.error("Pipelines data type test: FAILED") return 1 return 0
def _power_off(barcode_, power_off_event_, timeout_, output_queue): client = local_client() if power_off_event_.wait(timeout=timeout_): rc, output = _vlm_exec_cmd(VlmAction.VLM_TURNOFF, barcode_, reserve=False, client=client, count=2) rtn = (rc, output) else: err_msg = "Timed out waiting for power_off_event to be set" LOG.error(err_msg) rtn = (2, err_msg) if 0 == rtn[0]: LOG.info("{} powered off successfully".format(barcode_)) else: LOG.error("Failed to power off {}.".format(barcode_)) output_queue.put({barcode_: rtn})
def wait_for_event(self, timeout=3600, fail_ok=False): """ Waits for this Events object to have its flag set to True Args: timeout: fail_ok: Returns (bool): True if event flag set to true, False/exception otherwise """ if self.is_set(): LOG.info("Event \"{}\" has already been set".format(self.message)) return True if not timeout: LOG.warning( "No timeout was specified. This can lead to waiting indefinitely" ) if not is_multi_thread_active() and not timeout: LOG.error( "There are no other running threads that can set this event to true. " "This would wait indefinitely") raise ThreadingError(INFINITE_WAIT_EVENT_EXPECTED) LOG.info("Waiting for event \"{}\" flag set to true".format( self.message)) if not threading.Event.wait(self, timeout): if fail_ok: LOG.error( "Timed out waiting for event \"{}\" flag set to True". format(self.message)) return False raise ThreadingError(EVENT_TIMEOUT) if self.msg_lock.acquire(False): # only one message should appear once event is set LOG.info("Threads continuing") time.sleep(1) self.msg_lock.release() return True
def generate_hash(ssh_con): LOG.info('Generating hash code into a file') string_to_hash = 'hello' output_file = 'hashed_output.data' hash_command = 'tss2_hash -ic "' + string_to_hash + '" -oh ' + output_file code, output = ssh_con.exec_cmd(hash_command) msg = 'TPM command returned code:' + str( code) + ', output:' + ', command:' + hash_command if 0 == code: LOG.info(msg) # check the output file assert ssh_con.file_exists( output_file), 'Failed to generate hash-output file:' + output_file LOG.info('OK, the hashed ouput file is successfully generated.') else: LOG.error('Failed: ' + msg) assert False, msg return code, output
def _vlm_exec_cmd(action, barcode, reserve=True, fail_ok=False, client=None, count=1): if action not in VLM_CMDS: msg = '"{}" is an invalid action.'.format(action) msg += " Valid actions: {}".format(str(VLM_CMDS)) raise ValueError(msg) if reserve: if int(barcode) not in _vlm_findmine(): # reserve barcode if _reserve_vlm_console(barcode)[0] > 0: msg = "Failed to reserve target {}".format(barcode) if fail_ok: LOG.info(msg) return 1, msg else: raise exceptions.VLMError(msg) if not client: client = local_client() output = None for i in range(count): output = client.exec_cmd('{} {} -t {}'.format(VLM, action, barcode))[1] if i < count: time.sleep(1) if output != "1": msg = 'Failed to execute "{}" on target {}. Output: {}'.format( action, barcode, output) LOG.error(msg) return 1, msg return 0, None
def _reserve_vlm_console(barcode, note=None): cmd = '{} {} -t {}'.format(VLM, VlmAction.VLM_RESERVE, barcode) if note: cmd += ' -n "{}"'.format(note) reserved_barcodes = local_client().exec_cmd(cmd)[1] if str(barcode) not in reserved_barcodes or "Error" in reserved_barcodes: # check if node is already reserved by user attr_dict = _get_attr_dict_for_vlm_console(barcode=barcode, attr='all') reserved_by = attr_dict['Reserved By'] local_user = local_host.get_user() if reserved_by != local_user: msg = "Target {} is not reserved by {}".format(barcode, local_user) LOG.error(msg) return 1, msg else: msg = "Barcode {} already reserved".format(barcode) LOG.info(msg) return -1, msg else: msg = "Barcode {} reserved".format(barcode) LOG.info(msg) return 0, msg
def check_warn_msg(self, msg_import='', existing_profiles=None): err_msg = re.compile( 'error: Storage profile can only be imported into a system with Ceph backend' ) warn_msg = re.compile( 'warning: Local Storage profile (\w+) already exists and is not imported.' ) if 'warn' in msg_import or 'error' in msg_import: for line in msg_import.splitlines(): match = warn_msg.match(line) if match: failed_profile_name = match.group(1) if not self.verify_storprofile_existing( file_name=failed_profile_name, existing_profiles=existing_profiles): LOG.error( 'storprofile {} does not exist but still been rejected to import' .format(failed_profile_name)) return 1 else: LOG.info( 'OK, {} is already existing hence failed to import' .format(failed_profile_name)) else: match = err_msg.match(line) if match: if self.is_storage_node(): LOG.error( 'storprofile been rejected due to non-storage lab' ) return 1 else: LOG.info( 'OK, storage-profiles been rejected because of non-storage lab' ) return 0
def collect_software_logs(con_ssh=None, lab_ip=None): if not con_ssh: con_ssh = ControllerClient.get_active_controller() LOG.info("Collecting all hosts logs...") con_ssh.exec_cmd('source /etc/platform/openrc', get_exit_code=False) con_ssh.send('collect all') expect_list = ['.*password for .*:', 'collecting data.', con_ssh.prompt] index_1 = con_ssh.expect(expect_list, timeout=20) if index_1 == 2: LOG.error( "Something is wrong with collect all. Check ssh console log for " "detail.") return elif index_1 == 0: con_ssh.send(con_ssh.password) con_ssh.expect('collecting data') index_2 = con_ssh.expect(['/scratch/ALL_NODES.*', con_ssh.prompt], timeout=1200) if index_2 == 0: output = con_ssh.cmd_output con_ssh.expect() logpath = re.findall('.*(/scratch/ALL_NODES_.*.tar).*', output)[0] LOG.info( "\n################### TiS server log path: {}".format(logpath)) else: LOG.error("Collecting logs failed. No ALL_NODES logs found.") return if lab_ip is None: lab_ip = ProjVar.get_var('LAB')['floating ip'] dest_path = ProjVar.get_var('LOG_DIR') try: LOG.info("Copying log file from lab {} to local {}".format( lab_ip, dest_path)) scp_to_local(source_path=logpath, source_server=lab_ip, dest_path=dest_path, timeout=300) LOG.info("{} is successfully copied to local directory: {}".format( logpath, dest_path)) except Exception as e: LOG.warning("Failed to copy log file to localhost.") LOG.error(e, exc_info=True)
def are_monitor_pods_running(system_type, con_ssh=None, auth_info=Tenant.get('admin_platform'), matching_dict=PODS_LABEL_MATCHING_DICT): # Get all the pods for stx-monitor monitor_pods = kube_helper.get_pods(field=('NAME', 'NODE'), namespace="monitor", strict=False, con_ssh=con_ssh) LOG.info("Running pods for stx-monitor: %s" % monitor_pods) # Make a dictionary of which hosts are assigned to which stx-monitor # labels. e.g. # # { # 'daemon_set': ['controller-0', 'controller-1'], # 'elastic-client': ['controller-0', 'controller-1'], # 'elastic-controller': ['controller-0', 'controller-1'], # ... # } # host_list = system_helper.get_host_list_data( columns=["hostname", "personality"], con_ssh=con_ssh, auth_info=auth_info) labels_to_host_dict = {} for host in (_host for _host in host_list if _host.get('hostname')): hostname = host.get('hostname') personality = host.get('personality') if personality and personality in str(MONITORING_HOSTS): # Add the daemon set custom label, this is a special label only # for this labels_to_host_dict hosts_for_label = labels_to_host_dict.get('daemon_set', []) hosts_for_label.append(hostname) labels_to_host_dict.update({'daemon_set': hosts_for_label}) # Add the host's assigned labels labels = host_helper.get_host_labels_info(hostname, con_ssh=con_ssh, auth_info=auth_info) for label_name, label_status in labels.items(): if label_status == 'enabled': hosts_for_label = labels_to_host_dict.get(label_name, []) hosts_for_label.append(hostname) labels_to_host_dict.update({label_name: hosts_for_label}) LOG.info('labels_running_hosts:{}'.format(labels_to_host_dict)) # For each labels currently assigned on the system, get the matching # POD names from matching_dict for label, hosts_for_label in labels_to_host_dict.items(): LOG.debug('----------') LOG.debug('label:{} hosts:{}'.format(label, hosts_for_label)) pod_details = None for k, v in matching_dict.items(): if k == label: pod_details = v break if pod_details is None: # Label not found in dict just return True return True # Get the list of pod names we need to search for, a label can have # more than one pods. for partial_pod_name, running_type in pod_details.items(): LOG.info('-----') LOG.info('partial_pod_name:{} running_type:{}'.format( partial_pod_name, running_type)) inst_found_count = 0 for host in hosts_for_label: if is_pod_running_on_host(monitor_pods, host, partial_pod_name): # The pod was found, increment the no of instances running on all hosts for this # pod inst_found_count += 1 # Special case for AIO-DX and mon-elasticsearch-master-x if partial_pod_name == 'mon-elasticsearch-master-' and system_type == SysType.AIO_DX \ and inst_found_count == 1: LOG.info( 'Pod {} only needs to run one instances for AIO-DX'.format( partial_pod_name)) pass # Some pods only run one instances even if the label is on multiple hosts elif inst_found_count == 1 and running_type == POD_RUNNING_ONE_INSTANCE: LOG.info('Pod {} only needs to run one instances'.format( partial_pod_name)) pass # Pod did not match the number of hosts its supposed to run on elif inst_found_count != len(hosts_for_label): LOG.error('Pod check for {} failed, missing instances'.format( partial_pod_name)) return False LOG.info('Check for pod {} SUCCESS'.format(partial_pod_name)) return True
def test_dpdk_live_migrate_latency(ovs_dpdk_1_core, launch_instances, no_simplex, no_duplex): con_ssh = ssh.ControllerClient.get_active_controller() prev_st = None prev_et = None res = list() for i in range(20): LOG.tc_step("Start of iter {}".format(i)) vm_host = vm_helper.get_vm_host(launch_instances) cmd_get_pod_name = ("kubectl get pods -n openstack | " "grep --color=never nova-compute-{} | " "awk '{{print$1}}'".format(vm_host)) pod_name = con_ssh.exec_cmd(cmd=cmd_get_pod_name)[1].rstrip().lstrip() cmd_get_start_date = ( "kubectl -n openstack logs {} -c nova-compute | " "grep --color=never 'instance: {}' | " "grep --color=never 'pre_live_migration on destination host' | " "tail -1 | " "awk '{{print $1 \" \" $2}}'".format(pod_name, launch_instances)) cmd_get_end_date = ( "kubectl -n openstack logs {} -c nova-compute | " "grep --color=never 'instance: {}' | " "egrep --color=never " "'Migrating instance to [a-zA-Z]+-[0-9] finished successfully' | " "tail -1 | " "awk '{{print $1 \" \" $2}}'".format(pod_name, launch_instances)) vm_helper.live_migrate_vm(vm_id=launch_instances) st = con_ssh.exec_cmd(cmd=cmd_get_start_date)[1] et = con_ssh.exec_cmd(cmd=cmd_get_end_date)[1] st_date = datetime.datetime.strptime(st, '%Y-%m-%d %H:%M:%S.%f') et_date = datetime.datetime.strptime(et, '%Y-%m-%d %H:%M:%S.%f') if i == 0: prev_st = st_date prev_et = et_date elif i > 0: if st_date <= prev_st or et_date <= prev_et: msg = ("new start time {} is less " "or equal than old start time {}\n" "or new end time {} is less " "or equal than old end time " "{}".format(st_date, prev_st, et_date, prev_et)) LOG.error(msg) raise Exception(msg) else: prev_st = st_date prev_et = et_date diff = et_date - st_date LOG.info("\nstart time = {}\nend time = {}".format(st, et)) LOG.info("\ndiff = {}".format(diff)) res.append(diff) def calc_avg(lst): rtrn_sum = datetime.timedelta() for i in lst: LOG.info("Iter {}: {}".format(lst.index(i), i)) rtrn_sum += i return rtrn_sum / len(lst) final_res = calc_avg(res) LOG.info("Avg time is : {}".format(final_res))
def _delete_resources(resources, scope): # global SIMPLEX_RECOVERED # if not SIMPLEX_RECOVERED and system_helper.is_simplex(): # LOG.fixture_step('{} Ensure simplex host is up before cleaning # up'.format(scope)) # host_helper.recover_simplex(fail_ok=True) # SIMPLEX_RECOVERED = True def __del_aggregate(aggregate_, **kwargs): nova_helper.remove_hosts_from_aggregate(aggregate=aggregate_, check_first=False, **kwargs) return nova_helper.delete_aggregates(names=aggregate_, **kwargs) # List resources in proper order if there are dependencies! del_list = [ # resource, del_fun, fun_params, whether to delete all resources # together. ('port_chain', network_helper.delete_sfc_port_chain, { 'check_first': True }, False), ('flow_classifier', network_helper.delete_flow_classifier, { 'check_first': True }, False), ('vm', vm_helper.delete_vms, { 'delete_volumes': False }, True), ('vm_with_vol', vm_helper.delete_vms, { 'delete_volumes': True }, True), ('vol_snapshot', cinder_helper.delete_volume_snapshots, {}, True), ('volume', cinder_helper.delete_volumes, {}, True), ('volume_type', cinder_helper.delete_volume_types, {}, True), ('volume_qos', cinder_helper.delete_volume_qos, {}, True), ('flavor', nova_helper.delete_flavors, {}, True), ('image', glance_helper.delete_images, {}, True), ('server_group', nova_helper.delete_server_groups, {}, True), ('floating_ip', network_helper.delete_floating_ips, {}, True), ('trunk', network_helper.delete_trunks, {}, True), ('heat_stack', heat_helper.delete_stack, {}, False), ('port_pair_group', network_helper.delete_sfc_port_pair_group, { 'check_first': True }, False), ('port_pair', network_helper.delete_sfc_port_pairs, { 'check_first': True }, True), ('port', network_helper.delete_port, {}, False), ('router', network_helper.delete_router, {}, False), ('subnet', network_helper.delete_subnets, {}, True), ('network_qos', network_helper.delete_qos, {}, False), ('network', network_helper.delete_network, {}, False), ('security_group_rule', network_helper.delete_security_group_rules, {}, True), ('security_group', network_helper.delete_security_group, {}, False), ('aggregate', __del_aggregate, {}, False), ('datanetwork', system_helper.delete_data_network, {}, False), ] err_msgs = [] for item in del_list: resource_type, del_fun, fun_kwargs, del_all = item resource_ids = resources.get(resource_type, []) if not resource_ids: continue LOG.fixture_step("({}) Attempt to delete following {}: " "{}".format(scope, resource_type, resource_ids)) if 'auth_info' not in fun_kwargs: fun_kwargs['auth_info'] = Tenant.get('admin') if del_all: resource_ids = [resource_ids] for resource_id in resource_ids: try: code, msg = del_fun(resource_id, fail_ok=True, **fun_kwargs)[0:2] if code > 0: err_msgs.append(msg) except exceptions.TiSError as e: err_msgs.append(e.__str__()) # Attempt all deletions before raising exception. if err_msgs: LOG.error("ERROR: Failed to delete resource(s). \nDetails: {}".format( err_msgs))
def select(self, telnet_conn=None, index=None, pattern=None, tag=None, curser_move=1): if not self.options: try: if self.option_identifier: self.find_options(telnet_conn, option_identifier=self.option_identifier) else: self.find_options(telnet_conn) except TypeError: LOG.error("{} has no options".format(self.name)) raise if index is pattern is tag is None: raise ValueError( "index, pattern or tag has to be provided to determin the option to select" ) option = None if index is not None: option = self.options[index] elif pattern is not None: for item in self.options: if hasattr(pattern, "search"): if pattern.search(item.name): option = item break else: LOG.info( "checking pattern option = {} pattern = {}".format( item.name, pattern)) if pattern in item.name: option = item break if not option and tag is not None: LOG.info("checking tag option matching tag {}".format(tag)) for item in self.options: if item.tag is not None: LOG.info("option tag {}".format(item.tag)) if tag == item.tag: option = item break if not option: raise TelnetError("Unable to determine option to select") LOG.info("Attempt to select {} option {} index {}".format( self.name, option.name, option.index)) if option.key == "Enter" or option.key == "Return": while self.index != option.index: LOG.info("Current index = {}".format(self.index)) if option.index > self.index: self.move_down(telnet_conn, curser_move=curser_move) else: self.move_up(telnet_conn, curser_move=curser_move) option.enter(telnet_conn) self.index = 0
def check_numa_affinity(self, msg_prefx='', retries=3, retry_interval=20): LOG.tc_step( 'Check PCIPT/SRIOV numa/irq-cpu-affinity/alias on VM afer {}'. format(msg_prefx)) vm_pci_infos, vm_topology = vm_helper.get_vm_pcis_irqs_from_hypervisor( self.vm_id) assert vm_pci_infos > 0, "No pci_devices info found" # pci_addr_list = vm_pci_infos.pop('pci_addr_list') # LOG.debug('after {}: pci addr list for VM:\nVM ID={}\nPCI-ADDR-LIST:{}\n'.format( # msg_prefx, self.vm_id, pci_addr_list)) # pci_numa_affinity pci_irq_affinity_mask', 'pci_alias' if self.pci_numa_affinity == 'required' and \ (self.pci_irq_affinity_mask is not None or self.pci_alias is not None): numa_nodes_for_pcis = sorted( list(set([v['node'] for v in vm_pci_infos.values()]))) vm_numa_nodes = sorted( [top_for_numa['node'] for top_for_numa in vm_topology]) if len(numa_nodes_for_pcis) > 1: LOG.warn('after {}: PCIs on multiple Numa Nodes:'.format( numa_nodes_for_pcis)) assert set(numa_nodes_for_pcis) <= set(vm_numa_nodes), \ 'after {}: 1st Numa Nodes for PCIs differ from those of CPU, PCIs:{}, CPUs:{}'.format( msg_prefx, numa_nodes_for_pcis, vm_numa_nodes) LOG.debug( 'OK, after {}: numa node for PCI is the same as numa node for CPU' .format(msg_prefx)) # 'pci-passthrough', 'pci-sriov' if self.vif_model == 'pci-passthrough': assert 'PF' in [v['type'] for v in vm_pci_infos.values()], \ '{}: No PF/PCI-passthrough device found while having NIC of type:{}'.format(msg_prefx, self.vif_model) LOG.debug('OK, after {}: PCI of type:{} is created'.format( msg_prefx, self.vif_model)) if self.vif_model == 'pci-sriov': assert 'VF' in [v['type'] for v in vm_pci_infos.values()], \ '{}: No VF/PCI-sriov device found while having NIC of type:{}'.format(msg_prefx, self.vif_model) LOG.debug('OK, after {}: PCI of type:{} is created'.format( msg_prefx, self.vif_model)) expected_num_pci_alias = 0 if self.pci_alias is not None: expected_num_pci_alias += int(self.pci_alias) if expected_num_pci_alias < 1: LOG.error( '{}: zero or less number of PCI Alias specified in extra-specs:{}' .format(msg_prefx, expected_num_pci_alias)) expected_num_pci_alias += 1 if self.vif_model in ['pci-sriov'] else 0 if expected_num_pci_alias > 0: cnt_vf = len([ v['type'] for v in vm_pci_infos.values() if v['type'] == 'VF' ]) assert cnt_vf == expected_num_pci_alias, \ '{}: Missmatched Number of PCI Alias, expected:{}, actual:{}'.format( msg_prefx, expected_num_pci_alias, cnt_vf) LOG.debug( 'OK, after {}: correct number of PCI alias/devices are created' .format(msg_prefx, cnt_vf)) if self.pci_irq_affinity_mask is not None: count = 0 cpus_matched = False while not cpus_matched and count < retries: count += 1 indices_to_pcpus = parse_cpu_list(self.pci_irq_affinity_mask) vm_pcpus = [] for top_per_numa in vm_topology: vm_pcpus += top_per_numa['pcpus'] expected_pcpus_for_irqs = sorted( [vm_pcpus[i] for i in indices_to_pcpus]) cpus_matched = True for pci_info in vm_pci_infos.values(): if 'cpulist' in pci_info and expected_pcpus_for_irqs != sorted( pci_info['cpulist']): LOG.warn( 'Mismatched CPU list after {}: expected/affin-mask cpu list:{}, actual:{}, ' 'pci_info:{}'.format(msg_prefx, expected_pcpus_for_irqs, pci_info['cpulist'], pci_info)) LOG.warn('retries:{}'.format(count)) cpus_matched = False break vm_pci_infos.clear() vm_topology.clear() time.sleep(retry_interval) vm_pci_infos, vm_topology = vm_helper.get_vm_pcis_irqs_from_hypervisor( self.vm_id) # vm_pci_infos.pop('pci_addr_list') assert cpus_matched, \ '{}: CPU list is not matching expected mask after tried {} times'.format(msg_prefx, count) LOG.info( 'after {}: pci_irq_affinity_mask checking passed after retries:{}\n' .format(msg_prefx, count)) LOG.info('OK, after {}: check_numa_affinity passed'.format(msg_prefx)) return True
def _rsync_files_to_con1(con_ssh=None, central_region=False, file_to_check=None): region = 'RegionOne' if central_region else None auth_info = Tenant.get('admin_platform', dc_region=region) if less_than_two_controllers(auth_info=auth_info, con_ssh=con_ssh): LOG.info("Less than two controllers on system. Skip copying file to " "controller-1.") return LOG.info("rsync test files from controller-0 to controller-1 if not " "already done") stx_home = HostLinuxUser.get_home() if not file_to_check: file_to_check = '{}/images/tis-centos-guest.img'.format(stx_home) try: with host_helper.ssh_to_host("controller-1", con_ssh=con_ssh) as con_1_ssh: if con_1_ssh.file_exists(file_to_check): LOG.info( "Test files already exist on controller-1. Skip rsync.") return except Exception as e: LOG.error("Cannot ssh to controller-1. Skip rsync. " "\nException caught: {}".format(e.__str__())) return cmd = "rsync -avr -e 'ssh -o UserKnownHostsFile=/dev/null -o " \ "StrictHostKeyChecking=no ' " \ "{}/* controller-1:{}".format(stx_home, stx_home) timeout = 1800 with host_helper.ssh_to_host("controller-0", con_ssh=con_ssh) as con_0_ssh: LOG.info("rsync files from controller-0 to controller-1...") con_0_ssh.send(cmd) end_time = time.time() + timeout while time.time() < end_time: index = con_0_ssh.expect( [con_0_ssh.prompt, PASSWORD_PROMPT, Prompt.ADD_HOST], timeout=timeout, searchwindowsize=100) if index == 2: con_0_ssh.send('yes') if index == 1: con_0_ssh.send(HostLinuxUser.get_password()) if index == 0: output = int(con_0_ssh.exec_cmd('echo $?')[1]) if output in [0, 23]: LOG.info( "Test files are successfully copied to controller-1 " "from controller-0") break else: raise exceptions.SSHExecCommandFailed( "Failed to rsync files from controller-0 to " "controller-1") else: raise exceptions.TimeoutException( "Timed out rsync files to controller-1")
def get_auth_token(region=None, auth_info=Tenant.get('admin_platform'), use_dnsname=True): """ Get an authentication token from keystone Args: region(str): the cloud region for get the keystone token auth_info: use_dnsname(bool): True if use dns name instead of IP to perform the rest request Returns(str|None): Authentication token """ keystone_endpoint = keystone_helper.get_endpoints(field='URL', service_name='keystone', interface="public", region=region, auth_info=auth_info)[0] keystone_url = '{}/{}'.format(keystone_endpoint, 'auth/tokens') if use_dnsname: lab_ip = common.get_lab_fip(region=region) lab_dns_name = common.get_dnsname(region=region) keystone_url = keystone_url.replace(lab_ip, lab_dns_name) LOG.info( 'Get authentication token from keystone url {}'.format(keystone_url)) headers = {'Content-type': 'application/json'} body = { 'auth': { 'identity': { 'methods': ['password'], 'password': { 'user': { 'domain': { 'name': 'Default' }, 'name': 'admin', 'password': '******' } } }, 'scope': { 'project': { 'name': 'admin', 'domain': { 'name': 'Default' } } } } } try: req = requests.post(url=keystone_url, headers=headers, data=json.dumps(body), verify=False) except Exception as e: LOG.error('Error trying to get a token') LOG.debug(e) return None LOG.debug( '\n{} {}\nHeaders: {}\nBody: {}\nResponse code: {}\nResponse body: {}'. format(req.request.method, req.request.url, req.request.headers, req.request.body, req.status_code, req.text)) LOG.info('Status: [{}]'.format(req.status_code)) req.raise_for_status() return req.headers.get('X-Subject-Token')