def wait(self, timeout=None): """ Block thread until barrier breaks Exception is raised if timeout expires Args: timeout (int): max wait time at barrier. Preferred over self.timeout, if given Returns (int): number from 0 to (parties - 1), randomly assigned and unique for each thread that waited. -1 if thread did not wait """ self.barrier_status() if self.broken: return -1 if not timeout: if not self.timeout: LOG.warning("This thread does not have a timeout for wait()") else: timeout = self.timeout try: LOG.info("Start waiting at barrier") id_ = threading.Barrier.wait(self, timeout) # only one thread will report the barrier being passed if id_ == 0: self._barrier_passed() return id except: LOG.info("Barrier broke before {} threads were waiting".format( self.parties)) raise
def simplex_host_upgrade(con_ssh=None, fail_ok=False): """ Simplex host_upgrade is to handle simplex host-upgrade cli. Args: con_ssh (SSHClient): fail_ok (bool): Returns (tuple): (0, dict/list) (1, <stderr>) # cli returns stderr, applicable if fail_ok is true """ if con_ssh is None: con_ssh = ControllerClient.get_active_controller() cmd = "source /etc/nova/openrc; system host-upgrade controller-0" con_ssh.send(cmd) index = con_ssh.expect([con_ssh.prompt, Prompt.YES_N_PROMPT]) con_ssh.send('yes') if index == 0: err_msg = "CLI system host upgrade rejected" LOG.warning(err_msg) if fail_ok: return 1, err_msg else: raise exceptions.CLIRejected(err_msg) else: return 0, "host upgrade success"
def stop(self): if not self.is_launched: LOG.warning('Video recording is stopped already') return self._popen.send_signal(signal.SIGINT) def terminate_avconv(): limit = time.time() + 10 while time.time() < limit: time.sleep(0.1) if self._popen.poll() is not None: LOG.debug("Video stopped") return LOG.info("Killing video recorder process") os.kill(self._popen.pid, signal.SIGTERM) t = Thread(target=terminate_avconv) t.start() self._popen.communicate() t.join() self.is_launched = False
def wait_for_upgrade_states(states, timeout=60, check_interval=6, fail_ok=False): """ Waits for the upgrade state to be changed. Args: states: timeout: check_interval fail_ok Returns: """ end_time = time.time() + timeout if not states: raise ValueError("Expected host state(s) has to be specified via keyword argument states") state_match = False while time.time() < end_time: table_ = system_upgrade_show()[1] act_state = table_parser.get_value_two_col_table(table_, "state") if act_state == states: state_match = True break time.sleep(check_interval) msg = "{} state was not reached ".format(states) if state_match: return True if fail_ok: LOG.warning(msg) return False raise exceptions.TimeoutException(msg)
def find_options_( self, telnet_conn, end_of_menu=r"[A|a]utomatic(ally)?( boot)? in|Press \[Tab\] to edit". encode(), option_identifier=r"(\dm?\)\s[\w]+)|Boot from hard drive\s+|([\w]+\s)+\s+> " .encode(), newline=r'(\x1b\[\d+;\d+H)+'.encode()): super().find_options(telnet_conn, end_of_menu=end_of_menu, option_identifier=option_identifier, newline=newline) # TODO: this is a wasteful way to initialize the Options. self.options = [ KickstartOption(name=option.name, index=option.index, key=option.key) for option in self.options ] for option in self.options: # TODO: would like to make this more general, but it's impossible to determine the prompt matches = re.search(r"([A-Za-z\-]{2,}\s)+\s", option.name) if matches: option_name = matches.group(0).strip() else: option_name = option.name LOG.info("Kickstart option: {}".format(option.name)) if " >" in option.name.lower() or "options" in option.name.lower( ): if "controller configuration" in option.name.lower(): kwargs = getattr(bios.BootMenus.Kickstart, 'Controller_Configuration') elif "console" in option.name.lower(): kwargs = getattr(bios.BootMenus.Kickstart, 'Console') else: if hasattr(bios.BootMenus.Kickstart, option_name): kwargs = getattr(bios.BootMenus.Kickstart, option_name) elif any([ attr for attr in dir(bios.BootMenus.Kickstart) if attr in option_name ]): kwargs = getattr(bios.BootMenus.Kickstart, [ attr for attr in dir(bios.BootMenus.Kickstart) if attr in option_name ].pop()) else: LOG.warning( "The option {} has submenu but attributes are not define Kickstart menu dictionary" .format(option_name)) kwargs = None sub_menu = KickstartMenu(name=option_name, kwargs=kwargs) LOG.info("Option {} sub_menu {}".format( option_name, sub_menu.name)) option.sub_menu = sub_menu current_option = self.get_current_option(telnet_conn) LOG.info("Kickstart current option: {}; index {}".format( current_option.name, current_option.index)) self.index = current_option.index
def wait_for_image_status(image_id, status='active', timeout=ImageTimeout.STATUS_CHANGE, check_interval=3, fail_ok=True, con_ssh=None, auth_info=None): actual_status = None end_time = time.time() + timeout while time.time() < end_time: actual_status = get_image_values(image_id, fields='status', auth_info=auth_info, con_ssh=con_ssh)[0] if status.lower() == actual_status.lower(): LOG.info("Image {} has reached status: {}".format( image_id, status)) return True time.sleep(check_interval) else: msg = "Timed out waiting for image {} status to change to {}. Actual " \ "status: {}".format(image_id, status, actual_status) if fail_ok: LOG.warning(msg) return False raise exceptions.TimeoutException(msg)
def _wait_for_heat_stack_deleted(stack_name=None, timeout=120, check_interval=3, con_ssh=None, auth_info=None): """ This will wait for the heat stack to be deleted Args: stack_name(str): Heat stack name to check for state con_ssh (SSHClient): If None, active controller ssh will be used. auth_info (dict): Tenant dict. If None, primary tenant will be used. Returns: """ LOG.info("Waiting for {} to be deleted...".format(stack_name)) end_time = time.time() + timeout while time.time() < end_time: stack_status = get_stack_status(stack=stack_name, auth_info=auth_info, con_ssh=con_ssh, fail_ok=True) if not stack_status: return True elif stack_status[0] == HeatStackStatus.DELETE_FAILED: LOG.warning('Heat stack in DELETE_FAILED state') return False time.sleep(check_interval) msg = "Heat stack {} did not get deleted within timeout".format(stack_name) LOG.warning(msg) return False
def delete_stack(stack, fail_ok=False, check_first=False, con_ssh=None, auth_info=None): """ Delete the given heat stack for a given tenant. Args: con_ssh (SSHClient): If None, active controller ssh will be used. fail_ok (bool): check_first (bool): whether or not to check the stack existence before attempt to delete auth_info (dict): Tenant dict. If None, primary tenant will be used. stack (str): Given name for the heat stack Returns (tuple): Status and msg of the heat deletion. """ if not stack: raise ValueError("stack_name is not provided.") if check_first: if not get_stack_status( stack, con_ssh=con_ssh, auth_info=auth_info, fail_ok=True): msg = "Heat stack {} doesn't exist on the system. Do " \ "nothing.".format(stack) LOG.info(msg) return -1, msg LOG.info("Deleting Heat Stack %s", stack) exitcode, output = cli.openstack('stack delete -y', stack, ssh_client=con_ssh, fail_ok=fail_ok, auth_info=auth_info) if exitcode > 1: LOG.warning("Delete heat stack request rejected.") return 1, output if not _wait_for_heat_stack_deleted(stack_name=stack, auth_info=auth_info): stack_id = get_stack_values(stack=stack, fields='id', auth_info=auth_info, con_ssh=con_ssh)[0] get_stack_resources(stack=stack_id, auth_info=auth_info, con_ssh=con_ssh) msg = "heat stack {} is not removed after stack-delete.".format(stack) if fail_ok: LOG.warning(msg) return 2, msg raise exceptions.HeatError(msg) succ_msg = "Heat stack {} is successfully deleted.".format(stack) LOG.info(succ_msg) return 0, succ_msg
def wait_for_running_pods_ready(pod_names=None, namespace=None, all_namespaces=False, labels=None, timeout=300, fail_ok=False, con_ssh=None, exclude=False, strict=False, **kwargs): """ Wait for Running pods to be Ready, such as 1/1, 3/3 Args: pod_names: namespace: all_namespaces: labels: timeout: fail_ok: con_ssh: exclude: strict: **kwargs: Returns (bool): """ unready_pods = get_unready_running_pods(namespace=namespace, all_namespaces=all_namespaces, pod_names=pod_names, labels=labels, exclude=exclude, strict=strict, con_ssh=con_ssh, **kwargs) if not unready_pods: return True end_time = time.time() + timeout while time.time() < end_time: pods_info = get_pods(field=('NAME', 'READY'), namespace=namespace, all_namespaces=all_namespaces, pod_names=unready_pods, con_ssh=con_ssh) for pod_info in pods_info: pod_name, pod_ready = pod_info ready_count, total_count = pod_ready.split('/') if ready_count == total_count: unready_pods.remove(pod_name) if not unready_pods: return True msg = "Some pods are not ready within {}s: {}".format( timeout, unready_pods) LOG.warning(msg) if fail_ok: return False raise exceptions.KubeError(msg)
def ping_server(self, server, ping_count=5, timeout=60, fail_ok=False, retry=0): """ Args: server (str): server ip to ping ping_count (int): timeout (int): max time to wait for ping response in seconds fail_ok (bool): whether to raise exception if packet loss rate is 100% retry (int): Returns (int): packet loss percentile, such as 100, 0, 25 """ output = packet_loss_rate = None for i in range(max(retry + 1, 1)): cmd = 'ping -c {} {}'.format(ping_count, server) code, output = self.exec_cmd(cmd=cmd, expect_timeout=timeout, fail_ok=True) if code != 0: packet_loss_rate = 100 else: packet_loss_rate = re.findall(PING_LOSS_RATE, output)[-1] packet_loss_rate = int(packet_loss_rate) if packet_loss_rate < 100: if packet_loss_rate > 0: LOG.warning( "Some packets dropped when ping from {} ssh session " "to {}. Packet loss rate: {}%".format( self.host, server, packet_loss_rate)) else: LOG.info("All packets received by {}".format(server)) break LOG.info("retry in 3 seconds") time.sleep(3) else: msg = "Ping from {} to {} failed.".format(self.host, server) if not fail_ok: raise exceptions.LocalHostError(msg) else: LOG.warning(msg) untransmitted_packets = re.findall(r"(\d+) packets transmitted,", output) if untransmitted_packets: untransmitted_packets = int(ping_count) - int( untransmitted_packets[0]) else: untransmitted_packets = ping_count return packet_loss_rate, untransmitted_packets
def start(self): if self.is_launched: LOG.warning('Video recording is running already') return fnull = open(os.devnull, 'w') LOG.info('Record video via %s', ' '.join(self._cmd)) self._popen = subprocess.Popen(self._cmd, stdout=fnull, stderr=fnull) self.is_launched = True
def login_as_linux_user(user, password, host, cmd='whoami', expecting_fail=False): if is_on_action_controller(host): LOG.info('Login to the active controller:{}\n'.format(host)) if user != HostLinuxUser.get_user(): skip( 'Login to the active controller(will not skip if controller-1 is active), ' 'host:{}, user:{}'.format(host, user)) return False, '' if user == 'sysadmin': LOG.info('Login to the host:{} as "sysadmin"!\n'.format(host)) LOG.info('Attempt to login to host:{}, user:{}, password:{}\n'.format( host, user, password)) # todo: if host is the active-controller, ssh_to_host will ignore username # and using 'sysadmin', which leads to error cmd = '(date; uuid; hostname; {}) 2>/dev/null'.format(cmd) try: with host_helper.ssh_to_host(host, username=user, password=password) as conn: code, output = conn.exec_cmd(cmd, fail_ok=True) LOG.info('code={}, output={}\n'.format(code, output)) if 0 != code: msg = 'Failed to execute cmd:{} on host:{} as user:{}, password:{}'.format( cmd, host, user, password) LOG.info(msg) assert expecting_fail, msg return False, output else: assert not expecting_fail, \ 'Expecting logged in but failed: host:{} as user:{} with password:{}'.format(host, user, password) return True, output except Exception as e: # LOG.info('Caught exception:\n{}\n'.format(e)) msg = 'Expecting to login but failed with exception:{}'.format(e) assert expecting_fail, msg if not 'Permission denied,' in str(e): LOG.warning( 'Login as {}/{} failed without Permission denied error.'. format(user, password)) else: LOG.info( 'Failed to login as expected on host:{}, user:{}, password:{}, for "Permission denied"' .format(host, user, password)) return False, str(e)
def clear(self): if self.is_launched: LOG.error("Video recording is running still") return if not os.path.isfile(self.file_path): LOG.warning("%s is absent already", self.file_path) return os.remove(self.file_path)
def delete_flavors(flavors, check_first=True, fail_ok=False, con_ssh=None, auth_info=Tenant.get('admin')): """ Delete given flavor(s) Args: flavors (list|str): id(s) of flavor(s) to delete check_first (bool) fail_ok (bool): whether to raise exception if any flavor fails to delete con_ssh (SSHClient): auth_info (dict): Returns (tuple): (-1, 'None of the flavor(s) exists. Do nothing.') (0, 'Flavor is successfully deleted') (1, <std_out>) (2, "Flavor <flavor_id> still exists on system after deleted.") """ if isinstance(flavors, str): flavors = [flavors] if check_first: existing_favors = get_flavors(con_ssh=con_ssh, auth_info=auth_info) flavors = list(set(flavors) & set(existing_favors)) if not flavors: msg = "None of the given flavors exist. Do nothing." LOG.info(msg) return -1, msg LOG.info("Flavor(s) to delete: {}".format(flavors)) code, output = cli.openstack('flavor delete', ' '.join(flavors), ssh_client=con_ssh, fail_ok=fail_ok, auth_info=auth_info) if code > 0: return 1, output existing_favors = get_flavors(con_ssh=con_ssh, auth_info=auth_info) flavors_still_exist = list(set(flavors) & set(existing_favors)) if flavors_still_exist: err_msg = "Flavor(s) still exist after deletion: {}".format( flavors_still_exist) LOG.warning(err_msg) if fail_ok: return 2, err_msg else: raise exceptions.FlavorError(err_msg) success_msg = "Flavor(s) deleted successfully." LOG.info(success_msg) return 0, success_msg
def sys_lock_unlock_hosts(number_of_hosts_to_lock): """ This is to test the evacuation of vms due to compute lock/unlock :return: """ # identify a host with atleast 5 vms vms_by_compute_dic = vm_helper.get_vms_per_host() compute_to_lock = [] vms_to_check = [] hosts_threads = [] timeout = 1000 for k, v in vms_by_compute_dic.items(): if len(v) >= 5: compute_to_lock.append(k) vms_to_check.append(v) if compute_to_lock is None: skip("There are no compute with 5 or moer vms") if len(compute_to_lock) > number_of_hosts_to_lock: compute_to_lock = compute_to_lock[0:number_of_hosts_to_lock] vms_to_check = vms_to_check[0:number_of_hosts_to_lock] else: LOG.warning( "There are only {} computes available with more than 5 vms ". format(len(compute_to_lock))) for host in compute_to_lock: new_thread = MThread(host_helper.lock_host, host) new_thread.start_thread(timeout=timeout + 30) hosts_threads.append(new_thread) for host_thr in hosts_threads: host_thr.wait_for_thread_end() LOG.tc_step("Verify lock succeeded and vms still in good state") for vm_list in vms_to_check: vm_helper.wait_for_vms_values(vms=vm_list, fail_ok=False) for host, vms in zip(compute_to_lock, vms_to_check): for vm in vms: vm_host = vm_helper.get_vm_host(vm_id=vm) assert vm_host != host, "VM is still on {} after lock".format(host) vm_helper.wait_for_vm_pingable_from_natbox( vm_id=vm, timeout=VMTimeout.DHCP_RETRY) hosts_threads = [] for host in compute_to_lock: new_thread = MThread(host_helper.unlock_host, host) new_thread.start_thread(timeout=timeout + 30) hosts_threads.append(new_thread) for host_thr in hosts_threads: host_thr.wait_for_thread_end()
def post(container=None, object_=None, read_acl=None, write_acl=None, sync_to=None, sync_key=None, meta=None, header=None, con_ssh=None, fail_ok=False): """ Updates a metadata of a container or objects. if container is not found, it will be created automatically. Args: container (str): the name of container to post to object_ (str): the name of object to post to read_acl (str): Read ACL for containers. Quick summary of ACL syntax: .r:*, .r:-.example.com, .r:www.example.com, account1 (v1.0 identity API only), account1:*, account2:user2 (v2.0+ identity API). write_acl (str): Write ACL for containers. Quick summary of ACL syntax: account1 (v1.0 identity API only), account1:*, account2:user2 (v2.0+ identity API). sync_to (str): Sync To for containers, for multi-cluster replication. sync_key (str): Sync Key for containers, for multi-cluster replication. meta (dict): meta data item dictionary to set in {<metadata_name>:<value>, [<metadata_name>:<value>,..]} header (dict): sets customized request header in {<header_name>:<value>, [<header_name>:<value>,..]} con_ssh: fail_ok: Returns: """ args_ = '' if read_acl: args_ += " --read-acl {}".format(read_acl) if write_acl: args_ += " --write-acl {}".format(write_acl) if sync_to: args_ += " --sync-to {}".format(sync_to) if sync_key: args_ += " --sync-key {}".format(sync_key) if meta: for k, v in meta.items(): args_ += " --meta {}:{}".format(k, v) if header: for k, v in header.items(): args_ += " --header {}:{}".format(k, v) if container: args_ += " {}".format(container) if object_: args_ += " {}".format(object_) rc, out = cli.swift('post', args_, ssh_client=con_ssh, fail_ok=True) if rc == 0: return 0, "Swift post executed successfully" else: msg = "Fail to swift post cli: {}".format(out) LOG.warning(msg) if fail_ok: return rc, msg else: raise exceptions.SwiftError(msg)
def get_suitable_hypervisors(): """ Get low latency hypervisors with HT-off TODO: following settings should checked, but most of them cannot be easily done automatically # Processor Configuration # Hyper-Threading = Disabled # Power & Performance # Policy = Performance # Workload = Balanced # P-States # SpeedStep = Enabled # Turbo Boost = Enabled # Energy Efficient Turbo = Disabled # C-States # CPU C-State = Disabled # Acoustic and Performance # Fan Profile = Performance: """ global testable_hypervisors LOG.fixture_step( 'Check if the lab meets conditions required by this test case') hypervisors = host_helper.get_hypervisors() for hypervisor in hypervisors: personality, subfunc = system_helper.get_host_values( hypervisor, ('personality', 'subfunctions')) personalities = subfunc + personality if not personalities or 'lowlatency' not in personalities: continue cpu_info, num_threads, vm_cores, num_cores = get_cpu_info(hypervisor) if cpu_info and 'topology' in cpu_info and cpu_info['topology'][ 'threads'] == 1: if num_threads != 1: LOG.warn( 'conflicting info: num_threads={}, while cpu_info.threads={}' .format(num_threads, cpu_info['topology']['threads'])) testable_hypervisors[hypervisor] = { 'personalities': personalities, 'cpu_info': cpu_info, 'vm_cores': vm_cores, 'num_cores': num_cores, 'for_host_test': False, 'for_vm_test': False, } else: LOG.warning( 'hypervisor:{} has HT-on, ignore it'.format(hypervisor)) return testable_hypervisors.keys()
def verify_swift_object_setup(): LOG.info("Verifying swift endpoints...") port = '7480' endpoints_url = keystone_helper.get_endpoints(field='URL', service_name='swift', interface='public')[0] LOG.info("Swift public endpoint url: {}".format(endpoints_url)) url_port = endpoints_url.split(':')[2].split('/')[0].strip() if url_port != port: LOG.warning( "Swift endpoint use unexpected port {}. Expected port is {}.". format(url_port, port)) return False LOG.info("Verifying if swift object pools are setup...") if 'ceph' in storage_helper.get_storage_backends(): con_ssh = ControllerClient.get_active_controller() cmd = "rados df | awk 'NR>1 && NR < 11 {{print $1}}'" rc, output = con_ssh.exec_cmd(cmd, fail_ok=True) LOG.info("Swift object pools:{}".format(output)) if rc == 0: pools = output.split('\n') if set(SWIFT_POOLS).issubset(pools): LOG.info( "Swift object pools: {} are set...".format(SWIFT_POOLS)) else: LOG.info("Expected Swift object pools: {}" " are NOT set. Pools = {}".format(SWIFT_POOLS, pools)) return False else: return False LOG.info( "Verifying if swift object service (ceph-radosgw) is listed via 'sudo sm-dump' on the " "active controller...") cmd = "sm-dump | grep ceph-radosgw | awk ' {print $1\" \" $2\" \" $3}'" con_ssh = ControllerClient.get_active_controller() rc, output = con_ssh.exec_sudo_cmd(cmd, fail_ok=True) if rc == 0 and "ceph-radosgw enabled-active enabled-active" in output: LOG.info( "swift object service (ceph-radosgw) is listed via 'sudo sm-dump' on the active controller..." ) else: LOG.warning( " Unable to verify Swift object service ceph-radosgw: {}.".format( output)) return False return True
def abort_upgrade(con_ssh=None, timeout=60, fail_ok=False): """ Aborts upgrade Args: con_ssh (SSHClient): timeout (int) fail_ok (bool): Returns (tuple): (0, dict/list) (1, <stderr>) # cli returns stderr, applicable if fail_ok is true """ if con_ssh is None: con_ssh = ControllerClient.get_active_controller() cmd = "source /etc/nova/openrc; system upgrade-abort" con_ssh.send(cmd) end_time = time.time() + timeout rc = 1 while time.time() < end_time: index = con_ssh.expect([con_ssh.prompt, Prompt.YES_N_PROMPT], timeout=timeout) if index == 1: con_ssh.send('yes') index = con_ssh.expect([con_ssh.prompt, Prompt.CONFIRM_PROMPT], timeout=timeout) if index == 1: con_ssh.send('abort') index = con_ssh.expect([con_ssh.prompt, Prompt.CONFIRM_PROMPT], timeout=timeout) if index == 0: rc = con_ssh.exec_cmd("echo $?")[0] con_ssh.flush() break if rc != 0: err_msg = "CLI system upgrade-abort rejected" LOG.warning(err_msg) if fail_ok: return 1, err_msg else: raise exceptions.CLIRejected(err_msg) table_ = system_upgrade_show()[1] state = table_parser.get_value_two_col_table(table_, "state") if "aborting" in state: return 0, "Upgrade aborting" else: err_msg = "Upgrade abort failed" if fail_ok: LOG.warn(err_msg) return 1, err_msg else: raise exceptions.CLIRejected(err_msg)
def check_for_upgrade_abort(): upgrade_info = dict() lab = InstallVars.get_install_var('LAB') upgrade_info['LAB'] = lab table_ = upgrade_helper.system_upgrade_show()[1] print("Upgrade show {}".format(table_)) if "No upgrade in progress" in table_: LOG.warning("No upgrade in progress, cannot be aborted") return 1, None upgrade_release = table_parser.get_value_two_col_table( table_, "to_release") current_release = table_parser.get_value_two_col_table( table_, "from_release") upgraded_hostnames = upgrade_helper.get_upgraded_host_names( upgrade_release) upgraded = len(upgraded_hostnames) upgrade_info['current_release'] = current_release upgrade_info['upgrade_release'] = upgrade_release upgrade_info['upgraded_hostnames'] = upgraded_hostnames if upgraded >= 2: LOG.warning( "Both controllers are upgraded; Full system installation required to abort" ": {} ".format(upgraded_hostnames)) return 2, upgrade_info elif upgraded == 1: LOG.warning( "Only one controller is upgraded; In service abort is possible: " "{} ".format(upgraded_hostnames)) return 0, upgrade_info else: LOG.warning("No host is upgraded. ") return 3, upgrade_info
def tables(output_lines, combine_multiline_entry=False): """Find all ascii-tables in output and parse them. Return list of tables parsed from cli output as dicts. (see OutputParser.table()) And, if found, label key (separated line preceding the table_) is added to each tables dict. Returns (list): """ tables_ = [] table_ = [] label = None start = False header = False if not isinstance(output_lines, list): output_lines = output_lines.split('\n') for line in output_lines: if delimiter_line.match(line): if not start: start = True elif not header: # we are after head area header = True else: # table ends here start = header = None table_.append(line) parsed = table(table_, combine_multiline_entry=combine_multiline_entry) parsed['label'] = label tables_.append(parsed) table_ = [] label = None continue if start: table_.append(line) else: if label is None: label = line else: LOG.warning('Invalid line between tables: %s' % line) if len(table_) > 0: LOG.warning('Missing end of table') return tables_
def get_symlink(ssh_client, file_path): code, output = ssh_client.exec_cmd( 'ls -l {} | grep --color=never ""'.format(file_path)) if code != 0: LOG.warning('{} not found!'.format(file_path)) return None res = re.findall('> (.*)', output) if not res: LOG.warning('No symlink found for {}'.format(file_path)) return None link = res[0].strip() return link
def delete_imported_load(load_version=None, con_ssh=None, fail_ok=False): load_id = get_imported_load_id(load_version=load_version, con_ssh=con_ssh) rc, output = cli.system('load-delete', load_id, ssh_client=con_ssh, fail_ok=True) if rc == 1: return 1, output if not wait_for_delete_imported_load(load_id, con_ssh=con_ssh, fail_ok=True): err_msg = "Unable to delete imported load {}".format(load_id) LOG.warning(err_msg) if fail_ok: return 1, err_msg else: raise exceptions.HostError(err_msg)
def wait_for_nodes_ready(hosts=None, timeout=120, check_interval=5, con_ssh=None, fail_ok=False): """ Wait for hosts in ready state via kubectl get nodes Args: hosts (None|list|str|tuple): Wait for all hosts ready if None is specified timeout: check_interval: con_ssh: fail_ok: Returns (tuple): (True, None) (False, <nodes_not_ready>(list)) """ if hosts and isinstance(hosts, str): hosts = [hosts] end_time = time.time() + timeout nodes_not_ready = None while time.time() < end_time: nodes_not_ready = get_nodes(status='Ready', field='NAME', exclude=True, con_ssh=con_ssh, fail_ok=True) if nodes_not_ready and hosts: nodes_not_ready = list(set(nodes_not_ready) & set(hosts)) if nodes_not_ready: LOG.info('{} not ready yet'.format(nodes_not_ready)) elif nodes_not_ready is not None: LOG.info("All nodes are ready{}".format( ': {}'.format(hosts) if hosts else '')) return True, None time.sleep(check_interval) msg = '{} are not ready within {}s'.format(nodes_not_ready, timeout) LOG.warning(msg) if fail_ok: return False, nodes_not_ready else: raise exceptions.KubeError(msg)
def __init__(self, parties, action=None, timeout=180): """ Args: parties (int): number of threads to wait for action (function): additional function to call when barrier breaks timeout (int): max wait time """ if len(get_multi_threads()) + 1 < parties: LOG.warning( "This barrier will wait for more threads than are currently running" ) threading.Barrier.__init__(self, parties, action, timeout) self.timeout = timeout LOG.info("Created a barrier waiting for {} threads".format(parties))
def wait_for_environment_status(env_id, status, timeout=180, check_interval=6, fail_ok=False): """ Waits for the Murano environment deployment status Args: env_id: status: timeout: check_interval fail_ok: Returns: """ end_time = time.time() + timeout if not status: raise ValueError("Expected deployment state(s) has to be specified " "via keyword argument states") if isinstance(status, str): status = [status] status_match = False act_status, prev_status = None, None while time.time() < end_time: act_status = get_environment_status(env_id) if act_status != prev_status: LOG.info("Current Murano environment deploy status = " "{}".format(act_status)) prev_status = act_status if act_status in status: status_match = True break time.sleep(check_interval) msg = "Environment id {} did not reach {} status within specified " \ "time ".format(env_id, status) if status_match: return True, act_status else: LOG.warning(msg) if fail_ok: return False, act_status else: raise exceptions.MuranoError(msg)
def update_stack(stack_name, params_string, fail_ok=False, con_ssh=None, auth_info=None, timeout=300): """ Update the given heat stack for a given tenant. Args: con_ssh (SSHClient): If None, active controller ssh will be used. fail_ok (bool): params_string: Parameters to pass to the heat create cmd. ex: -f <stack.yaml> -P IMAGE=tis <stack_name> auth_info (dict): Tenant dict. If None, primary tenant will be used. stack_name (str): Given name for the heat stack timeout (int) Returns (tuple): Status and msg of the heat deletion. """ if not params_string: raise ValueError("Parameters not provided.") LOG.info("Create Heat Stack %s", params_string) exitcode, output = cli.heat('stack-update', params_string, ssh_client=con_ssh, fail_ok=fail_ok, auth_info=auth_info) if exitcode == 1: LOG.warning("Create heat stack request rejected.") return 1, output LOG.info( "Wait for Heat Stack Status to reach UPDATE_COMPLETE for stack %s", stack_name) res, msg = wait_for_heat_status(stack_name=stack_name, status=HeatStackStatus.UPDATE_COMPLETE, auth_info=auth_info, fail_ok=fail_ok, timeout=timeout) if not res: return 2, msg LOG.info("Stack {} updated successfully".format(stack_name)) return 0, stack_name
def check_alarms(before_alarms, timeout=300, auth_info=Tenant.get('admin_platform'), con_ssh=None, fail_ok=False): after_alarms = system_helper.get_alarms(auth_info=auth_info, con_ssh=con_ssh) new_alarms = [] check_interval = 5 for item in after_alarms: if item not in before_alarms: alarm_id, entity_id = item.split('::::') if alarm_id == EventLogID.CPU_USAGE_HIGH: check_interval = 45 elif alarm_id == EventLogID.NTP_ALARM: # NTP alarm handling LOG.info("NTP alarm found, checking ntpq stats") host = entity_id.split('host=')[1].split('.ntp')[0] system_helper.wait_for_ntp_sync(host=host, fail_ok=False, auth_info=auth_info, con_ssh=con_ssh) continue new_alarms.append((alarm_id, entity_id)) res = True remaining_alarms = None if new_alarms: LOG.info("New alarms detected. Waiting for new alarms to clear.") res, remaining_alarms = \ system_helper.wait_for_alarms_gone(new_alarms, fail_ok=True, timeout=timeout, check_interval=check_interval, auth_info=auth_info, con_ssh=con_ssh) if not res: msg = "New alarm(s) found and did not clear within {} seconds. " \ "Alarm IDs and Entity IDs: {}".format(timeout, remaining_alarms) LOG.warning(msg) if not fail_ok: assert res, msg return res, remaining_alarms
def backup_cinder_volumes(backup_info): """ Backup cinder volumes Args: backup_info - settings for doing system backup Returns: None """ LOG.tc_step("Cinder Volumes backup ...") backup_dest = backup_info.get('backup_dest', None) dest_server = backup_info.get('dest_server', None) copy_to_usb = backup_info.get('copy_to_usb', None) cinder_backup = backup_info.get('cinder_backup', False) if not is_cinder_export_supported(): LOG.warning( 'cinder export is NOT supported on this load, forced to use "cinder backup-xxxx"' ) cinder_backup = True vol_ids = cinder_helper.get_volumes(auth_info=Tenant.get('admin'), status='Available') vol_ids += cinder_helper.get_volumes(auth_info=Tenant.get('admin'), status='in-use') if len(vol_ids) > 0: LOG.info("Exporting cinder volumes: {}".format(vol_ids)) exported = install_helper.export_cinder_volumes( backup_dest=backup_dest, backup_dest_path=backup_info['backup_dest_full_path'], dest_server=dest_server, copy_to_usb=copy_to_usb, con_ssh=backup_info['con_ssh'], cinder_backup=cinder_backup) assert len(exported) > 0, "None volume was successfully exported" assert len(exported) == len( vol_ids), "Some volumes failed export: {}".format( set(vol_ids) - set(exported)) else: LOG.info( "No cinder volumes are avaialbe or in-use states in the system; skipping cinder volume export..." )
def wait_for_upgrade_activate_complete(timeout=300, check_interval=60, fail_ok=False): upgrade_state = '' end_time = time.time() + timeout while time.time() < end_time: upgrade_state = get_upgrade_state() if "activation-complete" in upgrade_state: LOG.info('Upgrade activation-complete') return True time.sleep(check_interval) err_msg = "Upgrade activation did not complete after waiting for {} seconds. Current state is {}".\ format(timeout, upgrade_state) if fail_ok: LOG.warning(err_msg) return False, None raise exceptions.TimeoutException(err_msg)