def _node_reboot(self): storage_hostname = (g.config["gluster_servers"] [self.gluster_servers[0]]["storage"]) cmd = "sleep 3; /sbin/shutdown -r now 'Reboot triggered by Glusto'" ret, out, err = g.run(storage_hostname, cmd) self.addCleanup(self._wait_for_gluster_pod_to_be_ready) if ret != 255: err_msg = "failed to reboot host %s error: %s" % ( storage_hostname, err) g.log.error(err_msg) raise AssertionError(err_msg) try: g.ssh_close_connection(storage_hostname) except Exception as e: g.log.error("failed to close connection with host %s" " with error: %s" % (storage_hostname, e)) raise # added sleep as node will restart after 3 sec time.sleep(3) for w in Waiter(timeout=600, interval=10): try: if g.rpyc_get_connection(storage_hostname, user="******"): g.rpyc_close_connection(storage_hostname, user="******") break except Exception as err: g.log.info("exception while getting connection: '%s'" % err) if w.expired: error_msg = ("exceeded timeout 600 sec, node '%s' is " "not reachable" % storage_hostname) g.log.error(error_msg) raise ExecutionError(error_msg) # wait for the gluster pod to be in 'Running' state self._wait_for_gluster_pod_to_be_ready() # glusterd and gluster-blockd service should be up and running service_names = ("glusterd", "gluster-blockd", "tcmu-runner") for gluster_pod in self.gluster_pod_list: for service in service_names: g.log.info("gluster_pod - '%s' : gluster_service '%s'" % ( gluster_pod, service)) check_service_status_on_pod( self.oc_node, gluster_pod, service, "running" )
def check_if_dir_is_filled(mnode, dirname, percent_to_fill, timeout=3600): """ Module to check if the directory is filled with given percentage. Args: mnode (str): node to check if directory is filled dirname (str): absolute path of directory percent_to_fill (int): percentage to fill the volume Kwargs: timeout (int): overall timeout value for wait till the dir fills with given percentage Returns: bool: True, if volume is filled with given percent, False otherwise Example: check_if_dir_is_filled("abc.com", "/mnt/glusterfs", 10) """ flag = 0 count = 0 while count < timeout: output = get_disk_usage(mnode, dirname) used = output['used_percent'] if int(percent_to_fill) > int(used): g.log.info("Directory %s used percent: %s" % (dirname, used)) if int(percent_to_fill) <= int(used): flag = 1 g.rpyc_close_connection(host=mnode) break time.sleep(5) count = count + 5 else: g.log.info("Diectory %s is filled with given percent already" % dirname) g.rpyc_close_connection(host=mnode) flag = 1 break if flag: g.log.info("Directory is filled with given percentage") return True else: g.log.info("Timeout reached before filling directory with given" " percentage") return True return False
def list_files(mnode, dir_path, parse_str="", user="******"): """This module list files from the given file path Example: list_files("/root/dir1/") Args: mnode (str): Node on which cmd has to be executed. dir_path (str): directory path name Kwargs: parse_str (str): sub string of the filename to be fetched user (str): username. Defaults to 'root' user. Returns: NoneType: None if command execution fails, parse errors. list: files with absolute name """ try: conn = g.rpyc_get_connection(mnode, user=user) if conn is None: g.log.error("Unable to get connection to 'root' of node %s" % mnode) return None filepaths = [] for root, directories, files in conn.modules.os.walk(dir_path): for filename in files: if parse_str != "": if parse_str in filename: filepath = conn.modules.os.path.join(root, filename) filepaths.append(filepath) else: filepath = conn.modules.os.path.join(root, filename) filepaths.append(filepath) return filepaths except: g.log.error("Exception occured in list_files()") return None finally: g.rpyc_close_connection(host=mnode, user=user)
def node_reboot_by_command(node, timeout=600, wait_step=10): """Reboot node and wait to start for given timeout. Args: node (str) : Node which needs to be rebooted. timeout (int) : Seconds to wait before node to be started. wait_step (int): Interval in seconds to wait before checking status of node again. """ cmd = "sleep 3; /sbin/shutdown -r now 'Reboot triggered by Glusto'" ret, out, err = g.run(node, cmd) if ret != 255: err_msg = "failed to reboot host '%s' error %s" % (node, err) g.log.error(err_msg) raise AssertionError(err_msg) try: g.ssh_close_connection(node) except Exception as e: g.log.error("failed to close connection with host %s " "with error: %s" % (node, e)) raise # added sleep as node will restart after 3 sec time.sleep(3) for w in waiter.Waiter(timeout=timeout, interval=wait_step): try: if g.rpyc_get_connection(node, user="******"): g.rpyc_close_connection(node, user="******") return except Exception as err: g.log.info("exception while getting connection: '%s'" % err) if w.expired: error_msg = ("exceeded timeout %s sec, node '%s' is " "not reachable" % (timeout, node)) g.log.error(error_msg) raise exceptions.ExecutionError(error_msg)
def append_string_to_file(mnode, filename, str_to_add_in_file, user="******"): """Appends the given string in the file. Example: append_string_to_file("abc.def.com", "/var/log/messages", "test_1_string") Args: mnode (str): Node on which cmd has to be executed. filename (str): absolute file path to append the string str_to_add_in_file (str): string to be added in the file, which is used as a start and stop string for parsing the file in search_pattern_in_file(). Kwargs: user (str): username. Defaults to 'root' user. Returns: True, on success, False otherwise """ try: conn = g.rpyc_get_connection(mnode, user=user) if conn is None: g.log.error("Unable to get connection to 'root' of node %s" " in append_string_to_file()" % mnode) return False with conn.builtin.open(filename, 'a') as _filehandle: _filehandle.write(str_to_add_in_file) return True except IOError: g.log.error( "Exception occured while adding string to " "file %s in append_string_to_file()", filename) return False finally: g.rpyc_close_connection(host=mnode, user=user)
def setUpClass(cls): """Setup nfs-ganesha cluster tests. """ # Check if gdeploy is installed on glusto-tests management node. ret, _, _ = g.run_local("gdeploy --version") if ret != 0: raise ConfigError("Please install gdeploy to run the scripts") GlusterBaseClass.setUpClass.im_func(cls) # Check if enable_nfs_ganesha is set in config file if not cls.enable_nfs_ganesha: raise ConfigError("Please enable nfs ganesha in config") # Read num_of_nfs_ganesha_nodes from config file and create # nfs ganesha cluster accordingly cls.num_of_nfs_ganesha_nodes = int(cls.num_of_nfs_ganesha_nodes) cls.servers_in_nfs_ganesha_cluster = ( cls.servers[:cls.num_of_nfs_ganesha_nodes]) cls.vips_in_nfs_ganesha_cluster = ( cls.vips[:cls.num_of_nfs_ganesha_nodes]) # Create nfs ganesha cluster if not exists already if (is_nfs_ganesha_cluster_exists( cls.servers_in_nfs_ganesha_cluster[0])): if is_nfs_ganesha_cluster_in_healthy_state( cls.servers_in_nfs_ganesha_cluster[0]): g.log.info("Nfs-ganesha Cluster exists and is in healthy " "state. Skipping cluster creation...") else: g.log.info("Nfs-ganesha Cluster exists and is not in " "healthy state.") g.log.info("Tearing down existing cluster which is not in " "healthy state") ganesha_ha_file = ("/var/run/gluster/shared_storage/" "nfs-ganesha/ganesha-ha.conf") g.log.info("Collecting server details of existing " "nfs ganesha cluster") conn = g.rpyc_get_connection( cls.servers_in_nfs_ganesha_cluster[0], user="******") if conn is None: tmp_node = cls.servers_in_nfs_ganesha_cluster[0] raise ExecutionError("Unable to get connection to 'root' " " of node %s " % tmp_node) if not conn.modules.os.path.exists(ganesha_ha_file): raise ExecutionError("Unable to locate %s" % ganesha_ha_file) with conn.builtin.open(ganesha_ha_file, "r") as fh: ganesha_ha_contents = fh.read() g.rpyc_close_connection( host=cls.servers_in_nfs_ganesha_cluster[0], user="******") servers_in_existing_cluster = re.findall( r'VIP_(.*)\=.*', ganesha_ha_contents) ret = teardown_nfs_ganesha_cluster(servers_in_existing_cluster, force=True) if not ret: raise ExecutionError("Failed to teardown nfs " "ganesha cluster") g.log.info("Existing cluster got teardown successfully") g.log.info("Creating nfs-ganesha cluster of %s nodes" % str(cls.num_of_nfs_ganesha_nodes)) g.log.info("Nfs-ganesha cluster node info: %s" % cls.servers_in_nfs_ganesha_cluster) g.log.info("Nfs-ganesha cluster vip info: %s" % cls.vips_in_nfs_ganesha_cluster) ret = create_nfs_ganesha_cluster( cls.servers_in_nfs_ganesha_cluster, cls.vips_in_nfs_ganesha_cluster) if not ret: raise ExecutionError("Failed to create " "nfs-ganesha cluster") else: g.log.info("Creating nfs-ganesha cluster of %s nodes" % str(cls.num_of_nfs_ganesha_nodes)) g.log.info("Nfs-ganesha cluster node info: %s" % cls.servers_in_nfs_ganesha_cluster) g.log.info("Nfs-ganesha cluster vip info: %s" % cls.vips_in_nfs_ganesha_cluster) ret = create_nfs_ganesha_cluster( cls.servers_in_nfs_ganesha_cluster, cls.vips_in_nfs_ganesha_cluster) if not ret: raise ExecutionError("Failed to create " "nfs-ganesha cluster") if is_nfs_ganesha_cluster_in_healthy_state( cls.servers_in_nfs_ganesha_cluster[0]): g.log.info("Nfs-ganesha Cluster exists is in healthy state") else: raise ExecutionError("Nfs-ganesha Cluster setup Failed") ret = set_nfs_ganesha_client_configuration(cls.clients) if not ret: raise ExecutionError("Failed to do client nfs ganesha " "configuration") for server in cls.servers: for client in cls.clients: cmd = ("if [ -z \"$(grep -R \"%s\" /etc/hosts)\" ]; then " "echo \"%s %s\" >> /etc/hosts; fi" % (client, socket.gethostbyname(client), client)) ret, _, _ = g.run(server, cmd) if ret != 0: g.log.error("Failed to add entry of client %s in " "/etc/hosts of server %s" % (client, server)) for client in cls.clients: for server in cls.servers: cmd = ("if [ -z \"$(grep -R \"%s\" /etc/hosts)\" ]; then " "echo \"%s %s\" >> /etc/hosts; fi" % (server, socket.gethostbyname(server), server)) ret, _, _ = g.run(client, cmd) if ret != 0: g.log.error("Failed to add entry of server %s in " "/etc/hosts of client %s" % (server, client))
def test_metadata_self_heal(self): """ Test MetaData Self-Heal (heal command) Description: - set the volume option "metadata-self-heal": "off" "entry-self-heal": "off" "data-self-heal": "off" - create IO - set the volume option "self-heal-daemon": "off" - bring down all bricks processes from selected set - Change the permissions, ownership and the group of the files under "test_meta_data_self_heal" folder - get arequal before getting bricks online - bring bricks online - set the volume option "self-heal-daemon": "on" - check daemons and start healing - check is heal is completed - check for split-brain - get arequal after getting bricks online and compare with arequal before getting bricks online - check group and user are 'qa' """ # pylint: disable=too-many-locals,too-many-statements # Setting options g.log.info('Setting options...') options = {"metadata-self-heal": "off", "entry-self-heal": "off", "data-self-heal": "off"} ret = set_volume_options(self.mnode, self.volname, options) self.assertTrue(ret, 'Failed to set options') g.log.info("Options " "'metadata-self-heal', " "'entry-self-heal', " "'data-self-heal', " "are set to 'off' successfully") # Creating files on client side all_mounts_procs = [] test_meta_data_self_heal_folder = 'test_meta_data_self_heal' g.log.info("Generating data for %s:%s", self.mounts[0].client_system, self.mounts[0].mountpoint) # Create files g.log.info('Creating files...') command = ("cd %s/ ; " "mkdir %s ;" "cd %s/ ;" "for i in `seq 1 50` ; " "do dd if=/dev/urandom of=test.$i bs=10k count=1 ; " "done ;" % (self.mounts[0].mountpoint, test_meta_data_self_heal_folder, test_meta_data_self_heal_folder)) proc = g.run_async(self.mounts[0].client_system, command, user=self.mounts[0].user) all_mounts_procs.append(proc) # Validate IO self.assertTrue( validate_io_procs(all_mounts_procs, self.mounts), "IO failed on some of the clients") # Setting options g.log.info('Setting options...') options = {"self-heal-daemon": "off"} ret = set_volume_options(self.mnode, self.volname, options) self.assertTrue(ret, 'Failed to set options') g.log.info("Option 'self-heal-daemon' is set to 'off' successfully") # Select bricks to bring offline bricks_to_bring_offline_dict = (select_bricks_to_bring_offline( self.mnode, self.volname)) bricks_to_bring_offline = filter(None, ( bricks_to_bring_offline_dict['hot_tier_bricks'] + bricks_to_bring_offline_dict['cold_tier_bricks'] + bricks_to_bring_offline_dict['volume_bricks'])) # Bring brick offline g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline) ret = bring_bricks_offline(self.volname, bricks_to_bring_offline) self.assertTrue(ret, 'Failed to bring bricks %s offline' % bricks_to_bring_offline) ret = are_bricks_offline(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue(ret, 'Bricks %s are not offline' % bricks_to_bring_offline) g.log.info('Bringing bricks %s offline is successful', bricks_to_bring_offline) # Changing the permissions, ownership and the group # of the files under "test_meta_data_self_heal" folder g.log.info("Modifying data for %s:%s", self.mounts[0].client_system, self.mounts[0].mountpoint) # Change permissions to 444 g.log.info('Changing permissions...') command = ("cd %s/%s/ ; " "chmod -R 444 *" % (self.mounts[0].mountpoint, test_meta_data_self_heal_folder)) ret, out, err = g.run(self.mounts[0].client_system, command) self.assertEqual(ret, 0, err) g.log.info('Permissions are changed successfully') # Change the ownership to qa g.log.info('Changing the ownership...') command = ("cd %s/%s/ ; " "chown -R qa *" % (self.mounts[0].mountpoint, test_meta_data_self_heal_folder)) ret, out, err = g.run(self.mounts[0].client_system, command) self.assertEqual(ret, 0, err) g.log.info('Ownership is changed successfully') # Change the group to qa g.log.info('Changing the group...') command = ("cd %s/%s/ ; " "chgrp -R qa *" % (self.mounts[0].mountpoint, test_meta_data_self_heal_folder)) ret, out, err = g.run(self.mounts[0].client_system, command) self.assertEqual(ret, 0, err) g.log.info('Group is changed successfully') # Get arequal before getting bricks online g.log.info('Getting arequal before getting bricks online...') ret, result_before_online = collect_mounts_arequal(self.mounts) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Getting arequal before getting bricks online ' 'is successful') # Bring brick online g.log.info('Bringing bricks %s online...', bricks_to_bring_offline) ret = bring_bricks_online(self.mnode, self.volname, bricks_to_bring_offline) self.assertTrue(ret, 'Failed to bring bricks %s online' % bricks_to_bring_offline) g.log.info('Bringing bricks %s online is successful', bricks_to_bring_offline) # Setting options g.log.info('Setting options...') options = {"self-heal-daemon": "on"} ret = set_volume_options(self.mnode, self.volname, options) self.assertTrue(ret, 'Failed to set options') g.log.info("Option 'self-heal-daemon' is set to 'on' successfully") # Wait for volume processes to be online g.log.info("Wait for volume processes to be online") ret = wait_for_volume_process_to_be_online(self.mnode, self.volname) self.assertTrue(ret, ("Volume process %s not online " "despite waiting for 5 minutes", self.volname)) g.log.info("Successful in waiting for volume %s processes to be " "online", self.volname) # Verify volume's all process are online g.log.info("Verifying volume's all process are online") ret = verify_all_process_of_volume_are_online(self.mnode, self.volname) self.assertTrue(ret, ("Volume %s : All process are not online" % self.volname)) g.log.info("Volume %s : All process are online", self.volname) # Wait for self-heal-daemons to be online g.log.info("Waiting for self-heal-daemons to be online") ret = is_shd_daemonized(self.all_servers) self.assertTrue(ret, "Either No self heal daemon process found") g.log.info("All self-heal-daemons are online") # Start healing ret = trigger_heal(self.mnode, self.volname) self.assertTrue(ret, 'Heal is not started') g.log.info('Healing is started') # Monitor heal completion ret = monitor_heal_completion(self.mnode, self.volname) self.assertTrue(ret, 'Heal has not yet completed') # Check if heal is completed ret = is_heal_complete(self.mnode, self.volname) self.assertTrue(ret, 'Heal is not complete') g.log.info('Heal is completed successfully') # Check for split-brain ret = is_volume_in_split_brain(self.mnode, self.volname) self.assertFalse(ret, 'Volume is in split-brain state') g.log.info('Volume is not in split-brain state') # Get arequal after getting bricks online g.log.info('Getting arequal after getting bricks online...') ret, result_after_online = collect_mounts_arequal(self.mounts) self.assertTrue(ret, 'Failed to get arequal') g.log.info('Getting arequal after getting bricks online ' 'is successful') # Checking arequals before bringing bricks online # and after bringing bricks online self.assertItemsEqual(result_before_online, result_after_online, 'Checksums are not equal') g.log.info('Checksums before bringing bricks online ' 'and after bringing bricks online are equal') # Adding servers and client in single dict to check permissions nodes_to_check = {} all_bricks = get_all_bricks(self.mnode, self.volname) for brick in all_bricks: node, brick_path = brick.split(':') nodes_to_check[node] = brick_path nodes_to_check[self.mounts[0].client_system] = \ self.mounts[0].mountpoint # Checking for user and group for node in nodes_to_check: # Get file list command = ("cd %s/%s/ ; " "ls" % (nodes_to_check[node], test_meta_data_self_heal_folder)) ret, out, err = g.run(node, command) file_list = out.split() g.log.info('Checking for user and group on %s...', node) conn = g.rpyc_get_connection(node) if conn is None: raise Exception("Unable to get connection on node %s" % node) for file_name in file_list: file_to_check = '%s/%s/%s' % (nodes_to_check[node], test_meta_data_self_heal_folder, file_name) g.log.info('Checking for permissions, user and group for %s', file_name) # Check for permissions permissions = oct( conn.modules.os.stat(file_to_check).st_mode)[-3:] self.assertEqual(permissions, '444', 'Permissions %s is not equal to 444' % permissions) g.log.info("Permissions are '444' for %s", file_name) # Check for user uid = conn.modules.os.stat(file_to_check).st_uid username = conn.modules.pwd.getpwuid(uid).pw_name self.assertEqual(username, 'qa', 'User %s is not equal qa' % username) g.log.info("User is 'qa' for %s", file_name) # Check for group gid = conn.modules.os.stat(file_to_check).st_gid groupname = conn.modules.grp.getgrgid(gid).gr_name self.assertEqual(groupname, 'qa', 'Group %s is not equal qa' % groupname) g.log.info("Group is 'qa' for %s", file_name) g.rpyc_close_connection(host=node)
def mount_volume(volname, mtype, mpoint, mserver, mclient, options='', smbuser=None, smbpasswd=None, user='******'): """Mount the gluster volume with specified options. Args: volname (str): Name of the volume to mount. mtype (str): Protocol to be used to mount. mpoint (str): Mountpoint dir. mserver (str): Server to mount. mclient (str): Client from which it has to be mounted. Kwargs: option (str): Options for the mount command. smbuser (str): SMB USERNAME. Used with mtype = 'cifs' smbpasswd (str): SMB PASSWD. Used with mtype = 'cifs' user (str): Super user of the node mclient Returns: tuple: Tuple containing three elements (ret, out, err). (0, '', '') if already mounted. (1, '', '') if setup_samba_service fails in case of smb. (ret, out, err) of mount commnd execution otherwise. """ if is_mounted(volname, mpoint, mserver, mclient, mtype, user): g.log.debug("Volume %s is already mounted at %s" % (volname, mpoint)) return (0, '', '') if options != '': options = "-o %s" % options if mtype == 'smb': if smbuser is None or smbpasswd is None: g.log.error("smbuser and smbpasswd to be passed as parameters " "for cifs mounts") return (1, '', '') mcmd = ("net use %s \\\\%s\\gluster-%s " % (mpoint, mserver, volname) + " /user:%s " % (smbuser) + '"' + smbpasswd + '"') mcmd = powershell(mcmd) ret, out, err = g.run(mclient, mcmd, user=user) if ret != 0: g.log.error("net use command failed on windows client %s " "failed: %s" % (mclient, err)) return (ret, out, err) if out.startswith('Drive'): drv_ltr = out.split(' ')[1] g.log.info("Samba share mount success on windows client %s. " "Share is : %s" % (mclient, drv_ltr)) return (ret, drv_ltr, err) g.log.error("net use command successful but error in mount of samba " " share for windows client %s for reason %s" % (mclient, err)) return (1, out, err) if mtype == 'nfs': if not options: options = "-o vers=3" elif options and 'vers' not in options: options = options + ",vers=3" if mserver: mcmd = ("mount -t %s %s %s:/%s %s" % (mtype, options, mserver, volname, mpoint)) else: mcmd = ("mount -t %s %s %s %s" % (mtype, options, volname, mpoint)) if mtype == 'cifs': if smbuser is None or smbpasswd is None: g.log.error("smbuser and smbpasswd to be passed as parameters " "for cifs mounts") return (1, '', '') # Check if client is running rhel. If so add specific options cifs_options = "" try: conn = g.rpyc_get_connection(mclient, user=user) if conn is None: g.log.error("Unable to get connection to %s on node %s" " in mount_volume()", user, mclient) return (1, '', '') os, version, name = conn.modules.platform.linux_distribution() if "Santiago" in name: cifs_options = "sec=ntlmssp" except Exception as e: g.log.error("Exception occurred while getting the platform " "of node %s: %s", mclient, str(e)) return (1, '', '') finally: g.rpyc_close_connection(host=mclient, user=user) mcmd = ("mount -t cifs -o username=%s,password=%s,%s " "\\\\\\\\%s\\\\gluster-%s %s" % (smbuser, smbpasswd, cifs_options, mserver, volname, mpoint)) # Create mount dir _, _, _ = g.run(mclient, "test -d %s || mkdir -p %s" % (mpoint, mpoint), user=user) # Create mount return g.run(mclient, mcmd, user=user)
def enable_pvc_resize(master_node): ''' This function edits the /etc/origin/master/master-config.yaml file - to enable pv_resize feature and restarts atomic-openshift service on master node Args: master_node (str): hostname of masternode on which want to edit the master-config.yaml file Returns: bool: True if successful, otherwise raise Exception ''' version = get_openshift_version() if version < "3.9": msg = ("pv resize is not available in openshift " "version %s " % version) g.log.error(msg) raise NotSupportedException(msg) try: conn = g.rpyc_get_connection(master_node, user="******") if conn is None: err_msg = ("Failed to get rpyc connection of node %s" % master_node) g.log.error(err_msg) raise ExecutionError(err_msg) with conn.builtin.open(MASTER_CONFIG_FILEPATH, 'r') as f: data = yaml.load(f) dict_add = data['admissionConfig']['pluginConfig'] if "PersistentVolumeClaimResize" in dict_add: g.log.info("master-config.yaml file is already edited") return True dict_add['PersistentVolumeClaimResize'] = { 'configuration': { 'apiVersion': 'v1', 'disable': 'false', 'kind': 'DefaultAdmissionConfig' } } data['admissionConfig']['pluginConfig'] = dict_add kube_config = data['kubernetesMasterConfig'] for key in ('apiServerArguments', 'controllerArguments'): kube_config[key] = (kube_config.get(key) if isinstance( kube_config.get(key), dict) else {}) value = ['ExpandPersistentVolumes=true'] kube_config[key]['feature-gates'] = value with conn.builtin.open(MASTER_CONFIG_FILEPATH, 'w+') as f: yaml.dump(data, f, default_flow_style=False) except Exception as err: raise ExecutionError("failed to edit master-config.yaml file " "%s on %s" % (err, master_node)) finally: g.rpyc_close_connection(master_node, user="******") g.log.info("successfully edited master-config.yaml file " "%s" % master_node) if version == "3.9": cmd = ("systemctl restart atomic-openshift-master-api " "atomic-openshift-master-controllers") else: cmd = ("/usr/local/bin/master-restart api && " "/usr/local/bin/master-restart controllers") ret, out, err = g.run(master_node, cmd, "root") if ret != 0: err_msg = "Failed to execute cmd %s on %s\nout: %s\nerr: %s" % ( cmd, master_node, out, err) g.log.error(err_msg) raise ExecutionError(err_msg) return True
def setup_nfs_ganesha(cls): """ Create nfs-ganesha cluster if not exists Set client configurations for nfs-ganesha Returns: True(bool): If setup is successful False(bool): If setup is failure """ # pylint: disable = too-many-statements, too-many-branches # pylint: disable = too-many-return-statements cluster_exists = is_nfs_ganesha_cluster_exists( cls.servers_in_nfs_ganesha_cluster[0]) if cluster_exists: is_healthy = is_nfs_ganesha_cluster_in_healthy_state( cls.servers_in_nfs_ganesha_cluster[0]) if is_healthy: g.log.info("Nfs-ganesha Cluster exists and is in healthy " "state. Skipping cluster creation...") else: g.log.info("Nfs-ganesha Cluster exists and is not in " "healthy state.") g.log.info("Tearing down existing cluster which is not in " "healthy state") ganesha_ha_file = ("/var/run/gluster/shared_storage/" "nfs-ganesha/ganesha-ha.conf") g.log.info("Collecting server details of existing " "nfs ganesha cluster") conn = g.rpyc_get_connection( cls.servers_in_nfs_ganesha_cluster[0], user="******") if not conn: tmp_node = cls.servers_in_nfs_ganesha_cluster[0] g.log.error( "Unable to get connection to 'root' of node" " %s", tmp_node) return False if not conn.modules.os.path.exists(ganesha_ha_file): g.log.error("Unable to locate %s", ganesha_ha_file) return False with conn.builtin.open(ganesha_ha_file, "r") as fhand: ganesha_ha_contents = fhand.read() g.rpyc_close_connection( host=cls.servers_in_nfs_ganesha_cluster[0], user="******") servers_in_existing_cluster = re.findall( r'VIP_(.*)\=.*', ganesha_ha_contents) ret = teardown_nfs_ganesha_cluster(servers_in_existing_cluster, force=True) if not ret: g.log.error("Failed to teardown unhealthy ganesha " "cluster") return False g.log.info("Existing unhealthy cluster got teardown " "successfully") if (not cluster_exists) or (not is_healthy): g.log.info("Creating nfs-ganesha cluster of %s nodes" % str(cls.num_of_nfs_ganesha_nodes)) g.log.info("Nfs-ganesha cluster node info: %s" % cls.servers_in_nfs_ganesha_cluster) g.log.info("Nfs-ganesha cluster vip info: %s" % cls.vips_in_nfs_ganesha_cluster) ret = create_nfs_ganesha_cluster(cls.ganesha_servers_hostname, cls.vips_in_nfs_ganesha_cluster) if not ret: g.log.error("Creation of nfs-ganesha cluster failed") return False if not is_nfs_ganesha_cluster_in_healthy_state( cls.servers_in_nfs_ganesha_cluster[0]): g.log.error("Nfs-ganesha cluster is not healthy") return False g.log.info("Nfs-ganesha Cluster exists is in healthy state") ret = configure_ports_on_clients(cls.clients) if not ret: g.log.error("Failed to configure ports on clients") return False ret = ganesha_client_firewall_settings(cls.clients) if not ret: g.log.error("Failed to do firewall setting in clients") return False for server in cls.servers: for client in cls.clients: cmd = ("if [ -z \"$(grep -R \"%s\" /etc/hosts)\" ]; then " "echo \"%s %s\" >> /etc/hosts; fi" % (client, socket.gethostbyname(client), client)) ret, _, _ = g.run(server, cmd) if ret != 0: g.log.error("Failed to add entry of client %s in " "/etc/hosts of server %s" % (client, server)) for client in cls.clients: for server in cls.servers: cmd = ("if [ -z \"$(grep -R \"%s\" /etc/hosts)\" ]; then " "echo \"%s %s\" >> /etc/hosts; fi" % (server, socket.gethostbyname(server), server)) ret, _, _ = g.run(client, cmd) if ret != 0: g.log.error("Failed to add entry of server %s in " "/etc/hosts of client %s" % (server, client)) return True
def get_disk_usage(mnode, path, user="******"): """ This module gets disk usage of the given path Args: path (str): path for which disk usage to be calculated conn (obj): connection object of the remote node Kwargs: user (str): username Returns: dict: disk usage in dict format on success None Type, on failure Example: get_disk_usage("abc.com", "/mnt/glusterfs") """ inst = random.randint(10, 100) conn = g.rpyc_get_connection(mnode, user=user, instance=inst) if conn is None: g.log.error("Failed to get rpyc connection") return None cmd = 'stat -f ' + path p = conn.modules.subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() ret = p.returncode if ret != 0: g.log.error("Failed to execute stat command") return None g.rpyc_close_connection(host=mnode, user=user, instance=inst) res = ''.join(out) match = re.match( r'.*Block size:\s(\d+).*Blocks:\sTotal:\s(\d+)\s+?' r'Free:\s(\d+)\s+?Available:\s(\d+).*Inodes:\s' r'Total:\s(\d+)\s+?Free:\s(\d+)', res, re.S) if match is None: g.log.error("Regex mismatch in get_disk_usage()") return None usage_info = dict() keys = ['b_size', 'b_total', 'b_free', 'b_avail', 'i_total', 'i_free'] val = list(match.groups()) info = dict(zip(keys, val)) usage_info['total'] = ((int(info['b_total']) * int(info['b_size'])) / ONE_GB_BYTES) usage_info['free'] = ((int(info['b_free']) * int(info['b_size'])) / ONE_GB_BYTES) usage_info['used_percent'] = ( 100 - (100.0 * usage_info['free'] / usage_info['total'])) usage_info['total_inode'] = int(info['i_total']) usage_info['free_inode'] = int(info['i_free']) usage_info['used_percent_inode'] = ( 100 - (100.0 * usage_info['free_inode'] / usage_info['total_inode'])) usage_info['used'] = usage_info['total'] - usage_info['free'] usage_info['used_inode'] = (usage_info['total_inode'] - usage_info['free_inode']) return usage_info
def create_nfs_passwordless_ssh(mnode, gnodes, guser='******'): """ Enable key-based SSH authentication without password on all the HA nodes Args: mnode(str): Hostname of ganesha maintenance node. gnodes(list): Hostname of all ganesha nodes including maintenance node guser(str): User for setting password less ssh Returns: True(bool): On success False(bool): On failure """ loc = "/var/lib/glusterd/nfs/" mconn_inst = random.randint(20, 100) mconn = g.rpyc_get_connection(host=mnode, instance=mconn_inst) if not mconn.modules.os.path.isfile('/root/.ssh/id_rsa'): # Generate key on mnode if not already present if not mconn.modules.os.path.isfile('%s/secret.pem' % loc): ret, _, _ = g.run( mnode, "ssh-keygen -f %s/secret.pem -q -N ''" % loc) if ret != 0: g.log.error("Failed to generate the secret pem file") return False g.log.info("Key generated on %s" % mnode) else: mconn.modules.shutil.copyfile("/root/.ssh/id_rsa", "%s/secret.pem" % loc) g.log.info("Copying the id_rsa.pub to secret.pem.pub") mconn.modules.shutil.copyfile("/root/.ssh/id_rsa.pub", "%s/secret.pem.pub" % loc) # Create password less ssh from mnode to all ganesha nodes for gnode in gnodes: gconn_inst = random.randint(20, 100) gconn = g.rpyc_get_connection(gnode, user=guser, instance=gconn_inst) try: glocal = gconn.modules.os.path.expanduser('~') gfhand = gconn.builtin.open("%s/.ssh/authorized_keys" % glocal, "a") with mconn.builtin.open("/root/.ssh/id_rsa.pub", 'r') as fhand: for line in fhand: gfhand.write(line) gfhand.close() except Exception as exep: g.log.error("Exception occurred while trying to establish " "password less ssh from %s@%s to %s@%s. Exception: %s" % ('root', mnode, guser, gnode, exep)) return False finally: g.rpyc_close_connection( host=gnode, user=guser, instance=gconn_inst) g.rpyc_close_connection(host=mnode, instance=mconn_inst) # Copy the ssh key pair from mnode to all the nodes in the Ganesha-HA # cluster g.log.info("Copy the ssh key pair from %s to other nodes in the " "Ganesha-HA cluster" % mnode) for gnode in gnodes: # Add ganesha nodes to known_hosts g.run(mnode, "ssh-keyscan -H %s >> ~/.ssh/known_hosts" % gnode) if gnode != mnode: cmd = ("scp -i %s/secret.pem %s/secret.* %s@%s:%s/" % (loc, loc, guser, gnode, loc)) ret, _, _ = g.run(mnode, cmd) if ret != 0: g.log.error("Failed to copy the ssh key pair from %s to %s", mnode, gnode) return False return True
def enable_pvc_resize(master_node): ''' This function edits the /etc/origin/master/master-config.yaml file - to enable pv_resize feature and restarts atomic-openshift service on master node Args: master_node (str): hostname of masternode on which want to edit the master-config.yaml file Returns: bool: True if successful, otherwise raise Exception ''' version = get_openshift_version() if version < "3.9": msg = ("pv resize is not available in openshift " "version %s " % version) g.log.error(msg) raise NotSupportedException(msg) try: conn = g.rpyc_get_connection(master_node, user="******") if conn is None: err_msg = ("Failed to get rpyc connection of node %s" % master_node) g.log.error(err_msg) raise ExecutionError(err_msg) with conn.builtin.open(MASTER_CONFIG_FILEPATH, 'r') as f: data = yaml.load(f) dict_add = data['admissionConfig']['pluginConfig'] if "PersistentVolumeClaimResize" in dict_add: g.log.info("master-config.yaml file is already edited") return True dict_add['PersistentVolumeClaimResize'] = { 'configuration': { 'apiVersion': 'v1', 'disable': 'false', 'kind': 'DefaultAdmissionConfig'}} data['admissionConfig']['pluginConfig'] = dict_add kube_config = data['kubernetesMasterConfig'] for key in ('apiServerArguments', 'controllerArguments'): kube_config[key] = ( kube_config.get(key) if isinstance(kube_config.get(key), dict) else {}) value = ['ExpandPersistentVolumes=true'] kube_config[key]['feature-gates'] = value with conn.builtin.open(MASTER_CONFIG_FILEPATH, 'w+') as f: yaml.dump(data, f, default_flow_style=False) except Exception as err: raise ExecutionError("failed to edit master-config.yaml file " "%s on %s" % (err, master_node)) finally: g.rpyc_close_connection(master_node, user="******") g.log.info("successfully edited master-config.yaml file " "%s" % master_node) if version == "3.9": cmd = ("systemctl restart atomic-openshift-master-api " "atomic-openshift-master-controllers") else: cmd = ("/usr/local/bin/master-restart api && " "/usr/local/bin/master-restart controllers") ret, out, err = g.run(master_node, cmd, "root") if ret != 0: err_msg = "Failed to execute cmd %s on %s\nout: %s\nerr: %s" % ( cmd, master_node, out, err) g.log.error(err_msg) raise ExecutionError(err_msg) # Wait for API service to be ready after the restart for w in waiter.Waiter(timeout=120, interval=1): try: cmd_run("oc get nodes", master_node) return True except AssertionError: continue err_msg = "Exceeded 120s timeout waiting for OCP API to start responding." g.log.error(err_msg) raise ExecutionError(err_msg)