def login_all_targets(self, udev_timeout=_UDEV_TIMEOUT, iface_name="default"): """ Do an iSCSI login to all discovered targets Args: udev_timeout: (int) the default udev timeout is 120 seconds, this can become a problem with high volumes of """ targets = [ target for target in DISCOVERED_TARGETS[self.system.name] if target.iface == iface_name ] log.debug("{}, iface: {}, targets: {}, DISCOVERED_TARGETS: {}".format( self.system.name, iface_name, len(targets), len(DISCOVERED_TARGETS[self.system.name]))) if len(targets) == 0: msg = "Num of discovered targets with iface_name {} is 0".format( iface_name) log.warning(msg) funcs_list = [] args_list = [] for target in DISCOVERED_TARGETS[self.system.name]: funcs_list.append(self.login) args_list.append((target.iqn, target.ip, 3260, target.iface)) pt = Parallel(funcs=funcs_list, args_list=args_list, max_workers=self.max_iscsi_worker) pt.run_threads() cmd = "sleep 1 ; udevadm trigger ; sleep 1 ;" \ " udevadm settle --timeout=%s" % udev_timeout with _UDEV_SEMIS[self.system.name]: self.system.run_cmd_check(cmd)
def wait_for_all_nodes_online(self): """ Waits until all nodes in the cluster are in running or recovered. """ nodes = self._clusterutil.list_nodes() funcs_list = list() args_list = list() for node in nodes: funcs_list.append(self.wait_for_node_online) args_list.append([node]) parent = Parallel(funcs=funcs_list, args_list=args_list) parent.run_threads() logger.info("All nodes are online")
def rotate_logs(self): """ Force log rotation on all clients """ def _force_log_rotation_on_client(client): logger.info("Forcing log rotation on client {}".format( client.name)) client.force_log_rotation() funcs = list() args = list() for client in self.clients: funcs.append(_force_log_rotation_on_client) args.append([client]) parent = Parallel(funcs=funcs, args_list=args, max_workers=len(funcs)) parent.run_threads()
def upload(self, logdir): # TODO kill this if it takes too long! exec_command timeout? self._logger.info("Beginning cluster log upload. This may take some time.") fns = [self._process_node for _ in self._whitebox_list] args_list = [[node, logdir] for node in self._whitebox_list] Parallel(fns, args_list=args_list, max_workers=len(self._whitebox_list)).run_threads()
def upload(self, testcase): if not self._client_list: logger.debug("No client systems found, no nothing to upload") return fns = [self._process_client for _ in self._client_list] args_list = [[client, testcase] for client in self._client_list] Parallel(fns, args_list=args_list, max_workers=len(self._client_list)).run_threads()
def make_parallel(fn, args, results): def run_fn(fn, arg, result_list): if fn(arg) is False: result_list.append(arg) return results = list() funcs = [run_fn for arg in args] args = [(fn, arg, results) for arg in args] return Parallel(funcs, args_list=args)
def stop(self): """ Stop all IO objects """ fn_list = [] for io in self._io_list: fn_list.append(io.stop) self._should_run = False if self._thread is not None: self._thread.join() Parallel(fn_list).run_threads() self.__io_has_been_stopped = True
def _log_time(self): """ As a means of keep time straight we want to log the current view of time from everything that may have logs generated. This includes: Executors, where the test is running. Clients, where the IO is generated from. """ funcs = list() args = list() funcs.append(self._log_executor_time) funcs.append(self._log_all_clients_time) Parallel(funcs=funcs, max_workers=len(funcs)).run_threads()
def clients_can_reach_ips(opts): """ Threads each Client for all IPS """ ips = get_access_vips(opts) if opts.all_ips: ips.extend(get_all_ips(opts)) funcs = list() args = list() for client in qalib.client.list_from_equipment(opts.equipment, required=False): funcs.append(can_client_reach_ips) args.append([client, ips]) parent = Parallel(funcs=funcs, args_list=args) try: parent.run_threads() return True except ValueError: cluster = opts.equipment.get_cluster() describe_cluster(cluster) return False
def destroy_all_ifaces(self): """Destroys all ifaces that were created on the host.""" funcs = list() args = list() non_deletable_ifaces = ["default", "iser"] for iface in self.list_ifaces(): if iface in non_deletable_ifaces: continue funcs.append(self.destroy_iface) args.append([iface]) workers = min(16, len(funcs)) Parallel(funcs=funcs, args_list=args, max_workers=workers).run_threads()
def logout_all_targets(self): """ Logout all targets which were logged into by this object """ # for iface scoped targets, lets do batch processing # we will do this only for non default ifaces if self.system.name in ISCSI_LOGGEDIN_TARGETS: ifaces = [] for target in ISCSI_LOGGEDIN_TARGETS[self.system.name]: if target.iface != "default": ifaces.append(target.iface) unique_ifaces = list(set(ifaces)) for iface in unique_ifaces: # For unique interfaces, we can assume that only this client # instance was used to do the iscsi login, so we can just log # them all out with a single command: cmd = ["iscsiadm", "-m node", "-I", iface, "-u"] cmd_str = ' '.join(cmd) with _ISCSIADM_SEMIS[self.system.name]: ret, out = self.system.run_cmd(cmd_str) # no error or exception raised, remove the entries from # list if ret == 0 and self.system.name in ISCSI_LOGGEDIN_TARGETS: self.remove_target_from_iscsi_loggedin_targets(iface) # Now do logouts individually for targets using the default # interface. Run them in parallel funcs_list = [] args_list = [] if (self.system.name in ISCSI_LOGGEDIN_TARGETS and ISCSI_LOGGEDIN_TARGETS[self.system.name]): for target in ISCSI_LOGGEDIN_TARGETS[self.system.name][:]: funcs_list.append(self.logout) args_list.append( (target.iqn, target.ip, 3260, target.iface)) del ISCSI_LOGGEDIN_TARGETS[self.system.name] pt = Parallel(funcs_list, args_list, max_workers=self.max_iscsi_worker) pt.run_threads()
def _log_all_clients_time(self): """ Logs the client time in native time format, will need to be looked at for windows or ESXi support. """ clients = qalib.client.list_from_equipment(self.equipment, required=False) funcs = list() args = list() for client in clients: funcs.append(self._log_client_time) args.append([client]) Parallel(funcs=funcs, args_list=args, max_workers=len(funcs)).run_threads()
def configure_clients_from_client_si_map(self): """ Configure client with subset of si_list based on client_si_map """ # initialize initiator groups self.add_client_initiators_to_storage_instances() client_si_map = self.get_client_and_si_mapping() _client_attach_funcs = [] _client_attach_args = [] success = False try: for client, si_list in client_si_map.viewitems(): # this is needed to handle fewer clients than SI's if len(si_list) > 0: _client_attach_funcs.append( self._configure_clients_from_si_list) _client_attach_args.append([client, si_list]) p = Parallel(_client_attach_funcs, args_list=_client_attach_args, max_workers=len(client_si_map.keys())) p.run_threads() success = True finally: if not success: raise EnvironmentError( "iscsi login failed from client {}: [{}]".format( client.name, self._client_ifaces[client])) else: logger.debug( "From client {}: [{}], iSCSI login succeed".format( client.name, self._client_ifaces[client])) return True
def ensure_cluster_ready(self): """ Run cluster health-checks required before beginning testing """ logger.info("Waiting for all nodes to be online") self.cluster_util.health.wait_for_all_nodes_online() logger.info("Running network diagnostics") checks = ["interfaces", "ntp"] for check in checks: if not check_cluster_config_passes( self.nodes[0], check, additional_flags=["parallel"]): raise RuntimeError( "check_cluster_config {} --parallel failed".format(check)) logger.info("Can clients reach VIPs?") # TODO[jsp]: more of this can probably be # moved into can_client_reach_ips vips = list() for vip in self.sdk.system.network.access_vip.get()["network_paths"]: vips.append(vip["ip"]) if not vips: raise RuntimeError("No VIPs detected on cluster") funcs = list() args = list() for client in self.clients: funcs.append(can_client_reach_ips) args.append([client, vips]) parent = Parallel(funcs=funcs, args_list=args) parent.run_threads() logger.info("Can equipment be cleaned up?") logger.info("...cleaning clients") funcs = list() args = list() for client in self.clients: funcs.append(lambda c: c.force_cleanup_all()) args.append([client]) continue parent = Parallel(funcs=funcs, args_list=args, max_workers=len(self.clients)) parent.run_threads() logger.info("...cleaning cluster") self.cluster_util.force_clean() logger.info("All cluster health checks completed successfully")
def start(self): """ Start all IO objects """ if self.__io_has_been_started: raise ValueError("start() cannot be called more than once") fn_list = [] for io in self._io_list: fn_list.append(io.start) success = False self._should_run = True try: Parallel(fn_list, max_workers=self.__max_workers).run_threads() success = True finally: if not success: self.stop() self.__io_has_been_started = True return self
def logout_all_sessions(self): """ Logs out of all sactive sessions on the host. """ funcs = list() kwargs_list = list() for session_info in self.list_session_info(): funcs.append(self.logout) kwargs = { "iqn": session_info["target_iqn"], "ip": session_info["target_ip"], "port": session_info["target_port"], "iface_name": None } kwargs_list.append(kwargs) workers = min(16, len(funcs)) Parallel(funcs=funcs, kwargs_list=kwargs_list, max_workers=workers).run_threads()
def discover_all_targets(self, targets, iface_name="default", iscsi_redirect=None): """ Performs iscsi discovery on a list of target addresses. Parameter: targets (list) - list of tuples with following values, "ip, port, iqn" iface_name (str) - interface name to limit discovery on Raises: EnvironmentError - If iqn is not discovered """ if iscsi_redirect is True: # Save all the iqns from targets to tgt_iqns # Pass tgt_iqns to discover() tgt_iqns = [] for _, target in enumerate(targets): (tgt_ip, tgt_port, tgt_iqn) = target tgt_iqns.append(tgt_iqn) self.discover(ip=tgt_ip, iqn=tgt_iqns, port=tgt_port, iface_name=iface_name) else: funcs = list() kwargs = list() for _, target in enumerate(targets): (tgt_ip, tgt_port, tgt_iqn) = target funcs.append(self.discover) kwargs.append({ "ip": tgt_ip, "iqn": tgt_iqn, "port": tgt_port, "iface_name": iface_name }) Parallel(funcs=funcs, kwargs_list=kwargs).run_threads()
def _thread_attach_all_targets(self, redirect_mode=None, redirect_ip=None): """Allocating a thread for each client to login.""" funcs = list() kwargs = list() for client in self._clients: # update all targets with iface names before sending them out # we need to get all targets funcs.append(client.iscsi.attach_all_targets) kwargs.append({ "targets": self._targets, "iface_name": self._client_ifaces[client], "redirect_mode": redirect_mode, "redirect_ip": redirect_ip }) Parallel(funcs=funcs, kwargs_list=kwargs, max_workers=len(self._clients)).run_threads() for client in self._clients: # TODO parallelize this block if needed if not self._block_io: client.filesystem.format_all_volumes(self.volumes) client.filesystem.mount_all_volumes(self.volumes) client.rescan_iscsi_bus()
def can_client_reach_ips(client, ips): """Will raise if client can not reach vip""" failed_ips = list() funcs = list() args = list() for ip in ips: funcs.append(can_client_reach_ip) args.append([client, ip, failed_ips]) Parallel(funcs=funcs, args_list=args).run_threads() if failed_ips: success_ips = (set(ips) - set(failed_ips)).union(set(failed_ips) - set(ips)) msg = ("{}, is not able to ping {} ips, but is able to ping {} ips." "failed IPs:\n".format(client.name, len(failed_ips), len(success_ips))) for ip in failed_ips: msg += "{}\n".format(ip) logger.error(msg) raise ValueError(msg) else: msg = ("Client {} is able to ping {} ips.".format( client.name, len(ips))) logger.info(msg)
def main(): opts = get_args() sys.excepthook = log_exceptions setup_logging() requests.packages.urllib3.disable_warnings( requests.packages.urllib3.exceptions.InsecureRequestWarning) logging.getLogger("requests").setLevel(logging.DEBUG) logger.info("Reading cluster details...") cluster = opts.equipment.get_cluster(required=True) node_ips = cluster.get_server_ip_list() nodes = map( lambda n: wb.from_hostname( n, username=cluster.admin_user, password=cluster.admin_password), node_ips) clients = qalib.client.list_from_equipment(opts.equipment, required=False) # BEGIN PRE-CLUSTER SETUP logger.info("Beginning pre-deploy checks") logger.info("Are clients reachable?") unpingable_clients = list() parent = make_parallel(lambda c: c.is_pingable(), clients, unpingable_clients) parent.run_threads() if unpingable_clients: raise RuntimeError("Couldn't ping these client(s): {}".\ format(", ".join(unpingable_clients))) logger.info("Are we able to login on client systems?") def signin_client(client): # this is basically client.run_cmd shell = client._system_conn._get_shell() try: shell.exec_command("true") return True except paramiko.ssh_exception.AuthenticationException as _err: return False login_failed_clients = list() parent = make_parallel(signin_client, clients, login_failed_clients) parent.run_threads() if login_failed_clients: raise RuntimeError("Unable to login to client(s): {}".\ format(", ".join(login_failed_clients))) logger.info("Installing required packages on clients") funcs = list() args = list() for client in clients: funcs.append(lambda c: c.setup()) args.append([client]) parent = Parallel(funcs=funcs, args_list=args) parent.run_threads() logger.info("Are storage nodes reachable?") unpingable_nodes = list() parent = make_parallel(lambda n: n.is_pingable(), nodes, unpingable_nodes) parent.run_threads() if unpingable_nodes: raise RuntimeError("Unable to ping node(s): {}".format( ", ".join(unpingable_nodes))) logger.info("Can we sign-in to all nodes as admin?") failed_login_nodes = list() def login_node(node): ssh = SSH(node.ip, username=cluster.admin_user, password=cluster.admin_password) try: tp = ssh._open_transport() tp.close() return True except paramiko.ssh_exception.AuthenticationException: return False parent = make_parallel(login_node, nodes, failed_login_nodes) parent.run_threads() if failed_login_nodes: raise RuntimeError("Authentication failed: {}".format( ", ".join(failed_login_nodes))) logger.info("Is REST interface up for all nodes?") def is_rest_up(node, max_tries=3, wait_time=10): tries = 0 path = "http://{}:8500".format(node.ip) # TODO[jsp]: probably smarter to allow requests to handle max tries # and backoff/wait time while tries < max_tries: tries += 1 try: resp = requests.get(path) if resp.status_code == 200: return True except requests.exceptions.ConnectTimeout as err: logger.debug(str(err)) time.sleep(wait_time) return False rest_not_up = list() parent = make_parallel(is_rest_up, nodes, rest_not_up) parent.run_threads() if rest_not_up: raise RuntimeError("REST interface not up: {}".format( ", ".join(rest_not_up))) logger.info("Verifying cluster network") def check_ifaces(node): return qalib.preflight.check_cluster_config_passes(node, "interfaces") nodes_w_network_problems = list() parent = make_parallel(check_ifaces, nodes, nodes_w_network_problems) parent.run_threads() logger.info("Pre-deploy work complete!") logger.info("Deploying cluster") deploy_cluster(opts.init, nodes[0]) if len(nodes) > 1: funcs = list() args = list() requests_session = requests.Session() retries = Retry(total=5, backoff_factor=0.1, status_forcelist=[111]) # TODO[jsp]: consider also using this adapter for init adapter = HTTPAdapter(max_retries=retries) requests_session.mount("http://", adapter) for node in nodes[1:]: funcs.append(join_cluster) args.append([cluster.name, node, requests_session]) parent = Parallel(funcs=funcs, args_list=args) parent.run_threads() logger.info("Waiting for cluster to form") sdk = get_cluster_api(cluster) wait_for_cluster(sdk, node_ips) logger.info("Cluster formed correctly.") logger.info("Beginning post-deploy checks.") preflight_helper = qalib.preflight.ClusterReady(opts.equipment) preflight_helper.ensure_cluster_ready() logger.info("Your cluster is ready.")