def get_pool_acl_list(self, uuid): ''' Deascription: Get daos pool acl list by dmg get-acl. Args: uuid: pool uuid number. Return: pool_permission_list: daos pool acl list. ''' dmg = DmgCommand(os.path.join(self.prefix, "bin")) dmg.request.value = "pool" dmg.action.value = "get-acl --pool " + uuid port = self.params.get("port", "/run/server_config/*") servers_with_ports = [ "{}:{}".format(host, port) for host in self.hostlist_servers ] dmg.hostlist.update(",".join(servers_with_ports), "dmg.hostlist") result = dmg.run() pool_permission_list = [] for line in result.stdout.splitlines(): if not line.startswith("A:"): continue elif line.startswith("A::"): found_user = re.search(r"A::(.+)@:(.*)", line) if found_user: pool_permission_list.append(line) elif line.startswith("A:G:"): found_group = re.search(r"A:G:(.+)@:(.*)", line) if found_group: pool_permission_list.append(line) return pool_permission_list
def update_pool_acl_entry(self, uuid, action, entry): ''' Deascription: Update daos pool acl list by dmg tool. Args: uuid: pool uuid. action: update-acl or delete-acl. entry: pool acl entry or principal to be updated. Return: none. ''' dmg = DmgCommand(os.path.join(self.prefix, "bin")) dmg.request.value = "pool" if action is "delete": dmg.action.value = "delete-acl --pool " + uuid dmg.action.value += " --principal " + entry elif action is "update": dmg.action.value = "update-acl --pool " + uuid dmg.action.value += " --entry " + entry else: self.fail("##update_pool_acl_entry, action: {} is not supported." "\n supported action: update, delete.".format(action)) port = self.params.get("port", "/run/server_config/*") servers_with_ports = [ "{}:{}".format(host, port) for host in self.hostlist_servers ] dmg.hostlist.update(",".join(servers_with_ports), "dmg.hostlist") result = dmg.run() self.log.info(" At update_pool_acl_entry, dmg.run result=\n %s",\ result)
def test_dmg_nvme_scan_basic(self): """ JIRA ID: DAOS-2485 Test Description: Test basic dmg functionality to scan the nvme storage. on the system. :avocado: tags=all,tiny,pr,dmg,nvme_scan,basic """ # Create dmg command dmg = DmgCommand(os.path.join(self.prefix, "bin")) dmg.get_params(self) # Update hostlist value for dmg command port = self.params.get("port", "/run/server_config/*") servers_with_ports = [ "{}:{}".format(host, port) for host in self.hostlist_servers] dmg.hostlist = servers_with_ports try: dmg.run() except process.CmdError as details: self.fail("dmg command failed: {}".format(details))
class NvmeHealth(ServerFillUp): # pylint: disable=too-many-ancestors """ Test Class Description: To validate NVMe health test cases :avocado: recursive """ def test_monitor_for_large_pools(self): """Jira ID: DAOS-4722. Test Description: Test Health monitor for large number of pools. Use Case: This tests will create the 40 number of pools and verify the dmg list-pools, device-health and nvme-health works for all pools. :avocado: tags=all,full_regression :avocado: tags=hw,medium :avocado: tags=nvme :avocado: tags=nvme_health """ # pylint: disable=attribute-defined-outside-init # pylint: disable=too-many-branches no_of_pools = self.params.get("number_of_pools", '/run/pool/*') pool_capacity = self.params.get("pool_used_percentage", '/run/pool/*') pool_capacity = pool_capacity / 100 storage = self.get_max_storage_sizes() #Create the pool from available of storage space single_pool_nvme_size = int((storage[1] * pool_capacity) / no_of_pools) single_pool_scm_size = int((storage[0] * pool_capacity) / no_of_pools) self.pool = [] # Create the Large number of pools for _pool in range(no_of_pools): self.log.info("-- Creating pool number = %s", _pool) self.pool.append(self.get_pool(create=False)) self.pool[-1].scm_size.update(single_pool_scm_size, "scm_size") self.pool[-1].nvme_size.update(single_pool_nvme_size, "nvme_size") self.pool[-1].create() # initialize the dmg command self.dmg = DmgCommand(os.path.join(self.prefix, "bin")) self.dmg.get_params(self) self.dmg.insecure.update( self.server_managers[0].get_config_value("allow_insecure"), "dmg.insecure") # List all pools self.dmg.set_sub_command("storage") self.dmg.sub_command_class.set_sub_command("query") self.dmg.sub_command_class.sub_command_class.set_sub_command( "list-pools") for host in self.hostlist_servers: self.dmg.hostlist = host try: result = self.dmg.run() except CommandFailure as error: self.fail("dmg command failed: {}".format(error)) #Verify all pools UUID listed as part of query for pool in self.pool: if pool.uuid.lower() not in result.stdout_text: self.fail('Pool uuid {} not found in smd query'.format( pool.uuid.lower())) # Get the device ID from all the servers. device_ids = get_device_ids(self.dmg, self.hostlist_servers) # Get the device health for host in device_ids: self.dmg.hostlist = host for _dev in device_ids[host]: try: result = self.dmg.storage_query_device_health(_dev) except CommandFailure as error: self.fail("dmg get device states failed {}".format(error)) if 'State:NORMAL' not in result.stdout_text: self.fail("device {} on host {} is not NORMAL".format( _dev, host)) # Get the nvme-health try: self.dmg.storage_scan_nvme_health() except CommandFailure as error: self.fail("dmg storage scan --nvme-health failed {}".format(error))
def pool_acl_verification(self, current_user_acl, read, write): ''' Deascription: Daos pool security verification with acl file. Steps: (1)Setup dmg tool for creating a pool (2)Generate acl file with permissions (3)Create a pool with acl (4)Verify the pool create status (5)Get the pool's acl list (6)Verify pool read operation (7)Verify pool write operation (8)Cleanup user and destroy pool Args: current_user_acl: acl with read write access credential. read: expecting read permission. write: expecting write permission. Return: pass to continue. fail to report the testlog and stop. ''' # (1)Create daos_shell command dmg = DmgCommand(os.path.join(self.prefix, "bin")) dmg.get_params(self) port = self.params.get("port", "/run/server_config/*", 10001) get_acl_file = self.params.get("acl_file", "/run/pool_acl/*", "acl_test.txt") acl_file = os.path.join(self.tmp, get_acl_file) num_user = self.params.get("num_user", "/run/pool_acl/*") num_group = self.params.get("num_group", "/run/pool_acl/*") servers_with_ports = [ "{}:{}".format(host, port) for host in self.hostlist_servers ] dmg.hostlist.update(",".join(servers_with_ports), "dmg.hostlist") self.log.info(" (1)dmg= %s", dmg) # (2)Generate acl file with permissions self.log.info(" (2)Generate acl file with user/group permissions") permission_list = self.create_pool_acl(num_user, num_group, current_user_acl, acl_file) # (3)Create a pool with acl self.log.info(" (3)Create a pool with acl") dmg.action_command.acl_file.value = acl_file dmg.exit_status_exception = False result = dmg.run() # (4)Verify the pool create status self.log.info(" (4)dmg.run() result=\n%s", result) if result.stderr == "": uuid, svc = dmg_utils.get_pool_uuid_service_replicas_from_stdout( result.stdout) else: self.fail("##(4)Unable to parse pool uuid and svc.") # (5)Get the pool's acl list # dmg pool get-acl --pool <UUID> self.log.info(" (5)Get a pool's acl list by: " "dmg pool get-acl --pool --hostlist") pool_acl_list = self.get_pool_acl_list(uuid) self.log.info(" pool original permission_list: %s", permission_list) self.log.info(" pool get_acl permission_list: %s", pool_acl_list) # (6)Verify pool read operation # daos pool query --pool <uuid> self.log.info(" (6)Verify pool read by: daos pool query --pool") self.verify_pool_readwrite(svc, uuid, "read", expect=read) # (7)Verify pool write operation # daos continer create --pool <uuid> self.log.info(" (7)Verify pool write by: daos continer create --pool") self.verify_pool_readwrite(svc, uuid, "write", expect=write) # (8)Cleanup user and destroy pool self.log.info(" (8)Cleanup user and destroy pool") self.cleanup_user_group(num_user, num_group) dmg = DmgCommand(os.path.join(self.prefix, "bin")) dmg.request.value = "pool" dmg.action.value = "destroy --pool={}".format(uuid) dmg.hostlist.update(",".join(servers_with_ports), "dmg.hostlist") result = dmg.run() return
class NvmeHealth(ServerFillUp): # pylint: disable=too-many-ancestors """ Test Class Description: To validate NVMe health test cases :avocado: recursive """ @skipForTicket("DAOS-7011") def test_monitor_for_large_pools(self): """Jira ID: DAOS-4722. Test Description: Test Health monitor for large number of pools. Use Case: This tests will create the 40 number of pools and verify the dmg list-pools, device-health and nvme-health works for all pools. :avocado: tags=all,hw,medium,nvme,ib2,full_regression :avocado: tags=nvme_health """ # pylint: disable=attribute-defined-outside-init # pylint: disable=too-many-branches no_of_pools = self.params.get("number_of_pools", '/run/pool/*') # Stop the servers to run SPDK too to get the server capacity self.stop_servers() storage = self.get_nvme_max_capacity() self.start_servers() # Create the pool from 80% of available of storage space single_pool_nvme_size = int((storage * 0.80) / no_of_pools) self.pool = [] # Create the Large number of pools for _pool in range(no_of_pools): pool = TestPool(self.context, self.get_dmg_command()) pool.get_params(self) # SCM size is 10% of NVMe pool.scm_size.update('{}'.format(int(single_pool_nvme_size * 0.10))) pool.nvme_size.update('{}'.format(single_pool_nvme_size)) pool.create() self.pool.append(pool) # initialize the dmg command self.dmg = DmgCommand(os.path.join(self.prefix, "bin")) self.dmg.get_params(self) self.dmg.insecure.update( self.server_managers[0].get_config_value("allow_insecure"), "dmg.insecure") # List all pools self.dmg.set_sub_command("storage") self.dmg.sub_command_class.set_sub_command("query") self.dmg.sub_command_class.sub_command_class.set_sub_command( "list-pools") for host in self.hostlist_servers: self.dmg.hostlist = host try: result = self.dmg.run() except CommandFailure as error: self.fail("dmg command failed: {}".format(error)) #Verify all pools UUID listed as part of query for pool in self.pool: if pool.uuid.lower() not in result.stdout_text: self.fail('Pool uuid {} not found in smd query'.format( pool.uuid.lower())) # Get the device ID from all the servers. device_ids = get_device_ids(self.dmg, self.hostlist_servers) # Get the device health for host in device_ids: self.dmg.hostlist = host for _dev in device_ids[host]: try: result = self.dmg.storage_query_device_health(_dev) except CommandFailure as error: self.fail("dmg get device states failed {}".format(error)) if 'State:NORMAL' not in result.stdout_text: self.fail("device {} on host {} is not NORMAL".format( _dev, host)) # Get the nvme-health try: self.dmg.storage_scan_nvme_health() except CommandFailure as error: self.fail("dmg storage scan --nvme-health failed {}".format(error))
class CSumErrorLog(DaosCoreBase): """ Test Class Description: This test runs daos_test -z (Checksum tests) and verifies whether Checksum Error Counters are incremented in the NVME device due to checksum fault injection. :avocado: recursive """ # pylint: disable=too-many-instance-attributes def setUp(self): super(CSumErrorLog, self).setUp() self.dmg = DmgCommand(os.path.join(self.prefix, "bin")) self.dmg.get_params(self) self.dmg.hostlist = self.hostlist_servers[0] self.dmg.insecure.update( self.server_managers[0].get_config_value("allow_insecure"), "dmg.insecure") self.dmg.set_sub_command("storage") self.dmg.sub_command_class.set_sub_command("query") def get_nvme_device_id(self): self.dmg.sub_command_class.sub_command_class.set_sub_command("smd") self.dmg.sub_command_class. \ sub_command_class.sub_command_class.devices.value = True self.dmg.sub_command_class. \ sub_command_class.sub_command_class.pools.value = True try: result = self.dmg.run() except process.CmdError as details: self.fail("dmg command failed: {}".format(details)) uid = None for line in result.stdout.splitlines(): line = line.strip() if re.search("^UUID:", line): temp = line.split() uid = temp[1] break return uid def get_checksum_error_value(self, device_id=None): if device_id is None: self.fail("No device id provided") return self.dmg.sub_command_class. \ sub_command_class.set_sub_command("blobstore-health") self.dmg.sub_command_class. \ sub_command_class. \ sub_command_class.devuuid.value = "{}".format(device_id) try: result = self.dmg.run() except process.CmdError as details: self.fail("dmg command failed: {}".format(details)) csum_count = None for line in result.stdout.splitlines(): line = line.strip() if re.search("^Checksum", line): temp = line.split() csum_count = int(temp[2]) break return csum_count def test_csum_error_logging(self): """ Test ID: DAOS-3927 Test Description: Write Avocado Test to verify single data after pool/container disconnect/reconnect. :avocado: tags=all,pr,hw,medium,ib2,csum_error_log """ dev_id = self.get_nvme_device_id() self.log.info("%s", dev_id) csum = self.get_checksum_error_value(dev_id) self.log.info("Checksum Errors : %d", csum) DaosCoreBase.run_subtest(self) csum_latest = self.get_checksum_error_value(dev_id) self.log.info("Checksum Errors : %d", csum_latest) self.assertTrue(csum_latest > csum, "Checksum Error Log not incremented") self.log.info("Checksum Error Logging Test Passed")
class CSumErrorLog(DaosCoreBase): """ Test Class Description: This test runs daos_test -z (Checksum tests) and verifies whether Checksum Error Counters are incremented in the NVME device due to checksum fault injection. :avocado: recursive """ # pylint: disable=too-many-instance-attributes def setUp(self): super(CSumErrorLog, self).setUp() self.dmg = DmgCommand(os.path.join(self.prefix, "bin")) self.dmg.get_params(self) self.dmg.hostlist = self.hostlist_servers[0] self.dmg.insecure.update( self.server_managers[0].get_config_value("allow_insecure"), "dmg.insecure") self.dmg.set_sub_command("storage") self.dmg.sub_command_class.set_sub_command("query") def get_nvme_device_id(self): self.dmg.json.value = True self.dmg.sub_command_class. \ sub_command_class.set_sub_command("list-devices") try: result = self.dmg.run() except process.CmdError as details: self.fail("dmg command failed: {}".format(details)) data = json.loads(result.stdout) if len(data['host_errors']) > 0: self.fail("dmg command failed: {}".format(data['host_errors'])) for v in data['host_storage_map'].values(): if v['storage']['smd_info']['devices']: return v['storage']['smd_info']['devices'][0]['uuid'] def get_checksum_error_value(self, device_id=None): if device_id is None: self.fail("No device id provided") return self.dmg.json.value = True self.dmg.sub_command_class. \ sub_command_class.set_sub_command("device-health") self.dmg.sub_command_class. \ sub_command_class. \ sub_command_class.uuid.value = device_id try: result = self.dmg.run() except process.CmdError as details: self.fail("dmg command failed: {}".format(details)) data = json.loads(result.stdout) if len(data['host_errors']) > 0: self.fail("dmg command failed: {}".format(data['host_errors'])) for v in data['host_storage_map'].values(): if v['storage']['smd_info']['devices']: dev = v['storage']['smd_info']['devices'][0] return dev['health']['checksum_errors'] def test_csum_error_logging(self): """ Test ID: DAOS-3927 Test Description: Write Avocado Test to verify single data after pool/container disconnect/reconnect. :avocado: tags=all,pr,hw,medium,ib2,csum_error_log """ dev_id = self.get_nvme_device_id() self.log.info("%s", dev_id) csum = self.get_checksum_error_value(dev_id) self.log.info("Checksum Errors : %d", csum) DaosCoreBase.run_subtest(self) csum_latest = self.get_checksum_error_value(dev_id) self.log.info("Checksum Errors : %d", csum_latest) self.assertTrue(csum_latest > csum, "Checksum Error Log not incremented") self.log.info("Checksum Error Logging Test Passed")
class TestPool(TestDaosApiBase): """A class for functional testing of DaosPools objects.""" # Constants to define whether to use API or dmg to create and destroy # pool. USE_API = "API" USE_DMG = "dmg" def __init__(self, context, log=None, cb_handler=None, dmg_bin_path=None): # pylint: disable=unused-argument """Initialize a TestPool object. Note: 'log' is now a defunct argument and will be removed in the future Args: context (DaosContext): [description] log (logging): logging object used to report the pool status cb_handler (CallbackHandler, optional): callback object to use with the API methods. Defaults to None. """ super(TestPool, self).__init__("/run/pool/*", cb_handler) self.context = context self.uid = os.geteuid() self.gid = os.getegid() self.mode = BasicParameter(None) self.name = BasicParameter(None) # server group name self.svcn = BasicParameter(None) self.target_list = BasicParameter(None) self.scm_size = BasicParameter(None) self.nvme_size = BasicParameter(None) # Set USE_API to use API or USE_DMG to use dmg. If it's not set, API is # used. self.control_method = BasicParameter(self.USE_API, self.USE_API) uname = getpass.getuser() gname = grp.getgrnam(uname)[0] self.username = BasicParameter(uname, uname) self.groupname = BasicParameter(gname, gname) self.pool = None self.uuid = None self.info = None self.svc_ranks = None self.connected = False self.dmg = None # Required to use dmg. It defined the directory where dmg is installed. # Use self.basepath + '/install/bin' in the test self.dmg_bin_path = dmg_bin_path if dmg_bin_path is not None: # We make dmg as the member of this class because the test would # have more flexibility over the usage of the command. self.dmg = DmgCommand(self.dmg_bin_path) self.dmg.insecure.value = True self.dmg.request.value = "pool" @fail_on(CommandFailure) @fail_on(DaosApiError) def create(self): """Create a pool with either API or dmg. To use dmg, the test needs to set control_method.value to USE_DMG prior to calling this method. The recommended way is to specify the pool block in yaml. For example, pool: control_method: dmg This tells this method to use dmg. The test also needs to set dmg_bin_path through the constructor if dmg is used. For example, self.pool = TestPool(self.context, dmg_bin_path=self.basepath + '/install/bin') If it wants to use --nsvc option, it needs to set the value to svcn.value. Otherwise, 1 is used. If it wants to use --group, it needs to set groupname.value. If it wants to use --user, it needs to set username.value. If it wants to add other options, directly set it to self.dmg.action_command. Refer dmg_utils.py pool_create method for more details. To test the negative case on create, the test needs to catch CommandFailure for dmg and DaosApiError for API. Thus, we need to make more than one line modification to the test only for this purpose. Currently, pool_svc is the only test that needs this change. """ self.destroy() if self.target_list.value is not None: self.log.info("Creating a pool on targets %s", self.target_list.value) else: self.log.info("Creating a pool") self.pool = DaosPool(self.context) if self.control_method.value == self.USE_API: kwargs = { "mode": self.mode.value, "uid": self.uid, "gid": self.gid, "scm_size": self.scm_size.value, "group": self.name.value } for key in ("target_list", "svcn", "nvme_size"): value = getattr(self, key).value if value is not None: kwargs[key] = value self._call_method(self.pool.create, kwargs) self.svc_ranks = [ int(self.pool.svc.rl_ranks[index]) for index in range(self.pool.svc.rl_nr) ] else: if self.dmg is None: raise DaosTestError( "self.dmg is None. dmg_bin_path needs to be set through " "the constructor of TestPool to create pool with dmg.") # Currently, there is one test that creates the pool over the # subset of the server hosts; pool/evict_test. To do so, the test # needs to set the rank(s) to target_list.value starting from 0. # e.g., if you're using 4 server hosts; wolf-1, wolf-2, wolf-3, and # wolf-4, and want to create a pool over the first two hosts; # wolf-1 and 2, then set the list [0, 1] to target_list.value. # We'll convert it to the comma separated string and set it to dmg. # For instance, [0, 1] will result in dmg pool create -r 0,1. If # you don't set target_list.value, -r won't be used, in which case # the pool is created over all the server hosts. if self.target_list.value is None: ranks_comma_separated = None else: ranks_comma_separated = "" for i in range(len(self.target_list.value)): ranks_comma_separated += str(self.target_list.value[i]) # If this element is not the last one, append comma if i < len(self.target_list.value) - 1: ranks_comma_separated += "," # Call the dmg pool create command self.dmg.action.value = "create" self.dmg.get_action_command() # uid/gid used in API correspond to --user and --group in dmg. # group, or self.name.value, used in API is called server group and # it's different from the group name passed in to --group. Server # group isn't used in dmg. We don't pass it into the command, but # we'll still use it to set self.pool.group self.dmg.action_command.group.value = self.groupname.value self.dmg.action_command.user.value = self.username.value self.dmg.action_command.scm_size.value = self.scm_size.value self.dmg.action_command.ranks.value = ranks_comma_separated self.dmg.action_command.nsvc.value = self.svcn.value create_result = self.dmg.run() self.log.info("Result stdout = %s", create_result.stdout) self.log.info("Result exit status = %s", create_result.exit_status) # Get UUID and service replica from the output uuid_svc = get_pool_uuid_service_replicas_from_stdout( create_result.stdout) new_uuid = uuid_svc[0] service_replica = uuid_svc[1] # 3. Create DaosPool object. The process is similar to the one in # DaosPool.create, but there are some modifications if self.name.value is None: self.pool.group = None else: self.pool.group = ctypes.create_string_buffer(self.name.value) # Modification 1: Use the length of service_replica returned by dmg # to calculate rank_t. Note that we assume we always get a single # number. I'm not sure if we ever get multiple numbers, but in that # case, we need to modify this implementation to create a list out # of the multiple numbers possibly separated by comma service_replicas = [int(service_replica)] rank_t = ctypes.c_uint * len(service_replicas) # Modification 2: Use the service_replicas list to generate rank. # In DaosPool, we first use some garbage 999999 values and let DAOS # set the correct values, but we can't do that here, so we need to # set the correct rank value by ourself rank = rank_t(*list([svc for svc in service_replicas])) rl_ranks = ctypes.POINTER(ctypes.c_uint)(rank) # Modification 3: Similar to 1. Use the length of service_replicas # list instead of self.svcn.value self.pool.svc = daos_cref.RankList(rl_ranks, len(service_replicas)) # 4. Set UUID and attached to the DaosPool object self.pool.set_uuid_str(new_uuid) self.pool.attached = 1 self.uuid = self.pool.get_uuid_str() @fail_on(DaosApiError) def connect(self, permission=1): """Connect to the pool. Args: permission (int, optional): connect permission. Defaults to 1. Returns: bool: True if the pool has been connected; False if the pool was already connected or the pool is not defined. """ if self.pool and not self.connected: kwargs = {"flags": 1 << permission} self.log.info( "Connecting to pool %s with permission %s (flag: %s)", self.uuid, permission, kwargs["flags"]) self._call_method(self.pool.connect, kwargs) self.connected = True return True return False @fail_on(DaosApiError) def disconnect(self): """Disconnect from connected pool. Returns: bool: True if the pool has been disconnected; False if the pool was already disconnected or the pool is not defined. """ if self.pool and self.connected: self.log.info("Disonnecting from pool %s", self.uuid) self._call_method(self.pool.disconnect, {}) self.connected = False return True return False @fail_on(CommandFailure) @fail_on(DaosApiError) def destroy(self, force=1): """Destroy the pool with either API or dmg. It uses control_method member previously set, so if you want to use the other way for some reason, update it before calling this method. Args: force (int, optional): force flag. Defaults to 1. Returns: bool: True if the pool has been destroyed; False if the pool is not defined. """ if self.pool: self.disconnect() self.log.info("Destroying pool %s", self.uuid) if self.control_method.value == self.USE_API: if self.pool.attached: self._call_method(self.pool.destroy, {"force": force}) elif self.control_method.value == self.USE_DMG: if self.pool.attached: self.dmg.action.value = "destroy" self.dmg.get_action_command() self.dmg.action_command.pool.value = self.uuid self.dmg.action_command.force.value = force self.dmg.run() else: self.log.error("Cannot destroy pool! Use USE_API or USE_DMG") return False self.pool = None self.uuid = None self.info = None self.svc_ranks = None return True return False @fail_on(DaosApiError) def get_info(self): """Query the pool for information. Sets the self.info attribute. """ if self.pool: self.connect() self._call_method(self.pool.pool_query, {}) self.info = self.pool.pool_info def check_pool_info(self, pi_uuid=None, pi_ntargets=None, pi_nnodes=None, pi_ndisabled=None, pi_map_ver=None, pi_leader=None, pi_bits=None): # pylint: disable=unused-argument """Check the pool info attributes. Note: Arguments may also be provided as a string with a number preceeded by '<', '<=', '>', or '>=' for other comparisions besides the default '=='. Args: pi_uuid (str, optional): pool uuid. Defaults to None. pi_ntargets (int, optional): number of targets. Defaults to None. pi_nnodes (int, optional): number of nodes. Defaults to None. pi_ndisabled (int, optional): number of disabled. Defaults to None. pi_map_ver (int, optional): pool map version. Defaults to None. pi_leader (int, optional): pool leader. Defaults to None. pi_bits (int, optional): pool bits. Defaults to None. Note: Arguments may also be provided as a string with a number preceeded by '<', '<=', '>', or '>=' for other comparisions besides the default '=='. Returns: bool: True if at least one expected value is specified and all the specified values match; False otherwise """ self.get_info() checks = [(key, c_uuid_to_str(getattr(self.info, key)) if key == "pi_uuid" else getattr(self.info, key), val) for key, val in locals().items() if key != "self" and val is not None] return self._check_info(checks) def check_pool_space(self, ps_free_min=None, ps_free_max=None, ps_free_mean=None, ps_ntargets=None, ps_padding=None): # pylint: disable=unused-argument """Check the pool info space attributes. Note: Arguments may also be provided as a string with a number preceeded by '<', '<=', '>', or '>=' for other comparisions besides the default '=='. Args: ps_free_min (list, optional): minimum free space per device. Defaults to None. ps_free_max (list, optional): maximum free space per device. Defaults to None. ps_free_mean (list, optional): mean free space per device. Defaults to None. ps_ntargets (int, optional): number of targets. Defaults to None. ps_padding (int, optional): space padding. Defaults to None. Note: Arguments may also be provided as a string with a number preceeded by '<', '<=', '>', or '>=' for other comparisions besides the default '=='. Returns: bool: True if at least one expected value is specified and all the specified values match; False otherwise """ self.get_info() checks = [] for key in ("ps_free_min", "ps_free_max", "ps_free_mean"): val = locals()[key] if isinstance(val, list): for index, item in val: checks.append(("{}[{}]".format(key, index), getattr(self.info.pi_space, key)[index], item)) for key in ("ps_ntargets", "ps_padding"): val = locals()[key] if val is not None: checks.append(key, getattr(self.info.pi_space, key), val) return self._check_info(checks) def check_pool_daos_space(self, s_total=None, s_free=None): # pylint: disable=unused-argument """Check the pool info daos space attributes. Note: Arguments may also be provided as a string with a number preceeded by '<', '<=', '>', or '>=' for other comparisions besides the default '=='. Args: s_total (list, optional): total space per device. Defaults to None. s_free (list, optional): free space per device. Defaults to None. Note: Arguments may also be provided as a string with a number preceeded by '<', '<=', '>', or '>=' for other comparisions besides the default '=='. Returns: bool: True if at least one expected value is specified and all the specified values match; False otherwise """ self.get_info() checks = [("{}_{}".format(key, index), getattr(self.info.pi_space.ps_space, key)[index], item) for key, val in locals().items() if key != "self" and val is not None for index, item in enumerate(val)] return self._check_info(checks) def check_rebuild_status(self, rs_version=None, rs_seconds=None, rs_errno=None, rs_done=None, rs_padding32=None, rs_fail_rank=None, rs_toberb_obj_nr=None, rs_obj_nr=None, rs_rec_nr=None, rs_size=None): # pylint: disable=unused-argument # pylint: disable=too-many-arguments """Check the pool info rebuild attributes. Note: Arguments may also be provided as a string with a number preceeded by '<', '<=', '>', or '>=' for other comparisions besides the default '=='. Args: rs_version (int, optional): rebuild version. Defaults to None. rs_seconds (int, optional): rebuild seconds. Defaults to None. rs_errno (int, optional): rebuild error number. Defaults to None. rs_done (int, optional): rebuild done flag. Defaults to None. rs_padding32 (int, optional): padding. Defaults to None. rs_fail_rank (int, optional): rebuild fail target. Defaults to None. rs_toberb_obj_nr (int, optional): number of objects to be rebuilt. Defaults to None. rs_obj_nr (int, optional): number of rebuilt objects. Defaults to None. rs_rec_nr (int, optional): number of rebuilt records. Defaults to None. rs_size (int, optional): size of all rebuilt records. Note: Arguments may also be provided as a string with a number preceeded by '<', '<=', '>', or '>=' for other comparisions besides the default '=='. Returns: bool: True if at least one expected value is specified and all the specified values match; False otherwise """ self.get_info() checks = [(key, getattr(self.info.pi_rebuild_st, key), val) for key, val in locals().items() if key != "self" and val is not None] return self._check_info(checks) def rebuild_complete(self): """Determine if the pool rebuild is complete. Returns: bool: True if pool rebuild is complete; False otherwise """ self.display_pool_rebuild_status() return self.info.pi_rebuild_st.rs_done == 1 def wait_for_rebuild(self, to_start, interval=1): """Wait for the rebuild to start or end. Args: to_start (bool): whether to wait for rebuild to start or end interval (int): number of seconds to wait in between rebuild completion checks """ self.log.info("Waiting for rebuild to %s ...", "start" if to_start else "complete") while self.rebuild_complete() == to_start: self.log.info(" Rebuild %s ...", "has not yet started" if to_start else "in progress") sleep(interval) self.log.info("Rebuild %s detected", "start" if to_start else "completion") @fail_on(DaosApiError) def start_rebuild(self, ranks, daos_log): """Kill the specific server ranks using this pool. Args: ranks (list): a list of daos server ranks (int) to kill daos_log (DaosLog): object for logging messages Returns: bool: True if the server ranks have been killed and the ranks have been excluded from the pool; False if the pool is undefined """ msg = "Killing DAOS ranks {} from server group {}".format( ranks, self.name.value) self.log.info(msg) daos_log.info(msg) for rank in ranks: server = DaosServer(self.context, self.name.value, rank) self._call_method(server.kill, {"force": 1}) return self.exclude(ranks, daos_log) @fail_on(DaosApiError) def exclude(self, ranks, daos_log): """Manually exclude a rank from this pool. Args: ranks (list): a list daos server ranks (int) to exclude daos_log (DaosLog): object for logging messages Returns: bool: True if the ranks were excluded from the pool; False if the pool is undefined """ if self.pool: msg = "Excluding server ranks {} from pool {}".format( ranks, self.uuid) self.log.info(msg) daos_log.info(msg) self._call_method(self.pool.exclude, {"rank_list": ranks}) return True return False def check_files(self, hosts): """Check if pool files exist on the specified list of hosts. Args: hosts (list): list of hosts Returns: bool: True if the files for this pool exist on each host; False otherwise """ return check_pool_files(self.log, hosts, self.uuid.lower()) def write_file(self, orterun, processes, hostfile, size, timeout=60): """Write a file to the pool. Args: orterun (str): full path to the orterun command processes (int): number of processes to launch hosts (list): list of clients from which to write the file size (int): size of the file to create in bytes timeout (int, optional): number of seconds before timing out the command. Defaults to 60 seconds. Returns: process.CmdResult: command execution result """ self.log.info("Writing %s bytes to pool %s", size, self.uuid) env = { "DAOS_POOL": self.uuid, "DAOS_SVCL": "1", "DAOS_SINGLETON_CLI": "1", "PYTHONPATH": os.getenv("PYTHONPATH", ""), } load_mpi("openmpi") current_path = os.path.dirname(os.path.abspath(__file__)) command = "{} --np {} --hostfile {} {} {} testfile".format( orterun, processes, hostfile, os.path.join(current_path, "write_some_data.py"), size) return process.run(command, timeout, True, False, "both", True, env) def get_pool_daos_space(self): """Get the pool info daos space attributes as a dictionary. Returns: dict: a dictionary of lists of the daos space attributes """ self.get_info() keys = ("s_total", "s_free") return {key: getattr(self.info.pi_space.ps_space, key) for key in keys} def display_pool_daos_space(self, msg=None): """Display the pool info daos space attributes. Args: msg (str, optional): optional text to include in the output. Defaults to None. """ daos_space = self.get_pool_daos_space() sizes = [ "{}[{}]={}".format(key, index, item) for key in sorted(daos_space.keys()) for index, item in enumerate(daos_space[key]) ] self.log.info("Pool %s space%s:\n %s", self.uuid, " " + msg if isinstance(msg, str) else "", "\n ".join(sizes)) def get_pool_rebuild_status(self): """Get the pool info rebuild status attributes as a dictionary. Returns: dict: a dictionary of lists of the rebuild status attributes """ self.get_info() keys = ("rs_version", "rs_pad_32", "rs_errno", "rs_done", "rs_toberb_obj_nr", "rs_obj_nr", "rs_rec_nr") return {key: getattr(self.info.pi_rebuild_st, key) for key in keys} def display_pool_rebuild_status(self): """Display the pool info rebuild status attributes.""" status = self.get_pool_rebuild_status() self.log.info( "Pool rebuild status: %s", ", ".join( ["{}={}".format(key, status[key]) for key in sorted(status)])) def read_data_during_rebuild(self, container): """Read data from the container while rebuild is active. Args: container (TestContainer): container from which to read data Returns: bool: True if all the data is read sucessfully befoire rebuild completes; False otherwise """ container.open() self.log.info("Reading objects in container %s during rebuild", self.uuid) # Attempt to read all of the data from the container during rebuild index = 0 status = read_incomplete = index < len(container.written_data) while not self.rebuild_complete() and read_incomplete: try: status &= container.written_data[index].read_object(container) except DaosTestError as error: self.log.error(str(error)) status = False index += 1 read_incomplete = index < len(container.written_data) # Verify that all of the container data was read successfully if read_incomplete: self.log.error( "Rebuild completed before all the written data could be read") status = False elif not status: self.log.error("Errors detected reading data during rebuild") return status
def verify_pool_acl_prim_sec_groups(self, pool_acl_list, acl_file,\ uuid, svc): ''' Deascription: Verify daos pool acl access with primary and secondary groups access permission. Args: pool_acl_list: pool acl entry list. acl_file: acl file to be used. uuid: daos pool uuid. svc: daos pool svc. Return: None. ''' sec_group = self.params.get("secondary_group_name", "/run/pool_acl/*") sec_group_perm = self.params.get("sg_permission", "/run/pool_acl/*") sec_group_rw = self.params.get("sg_read_write", "/run/pool_acl/*") user_gid = os.getegid() current_group = grp.getgrgid(user_gid)[0] primary_grp_perm = self.params.get(\ "pg_permission", "/run/pool_acl/primary_secondary_group_test/*")[0] sec_group = self.params.get(\ "secondary_group_name", \ "/run/pool_acl/primary_secondary_group_test/*") sec_group_perm = self.params.get(\ "sg_permission", "/run/pool_acl/primary_secondary_group_test/*") sec_group_rw = self.params.get(\ "sg_read_write", "/run/pool_acl/primary_secondary_group_test/*") l_group = grp.getgrgid(os.getegid())[0] for group in sec_group: add_del_user(self.hostlist_clients, "groupadd", group) cmd = "usermod -G " + ",".join(sec_group) self.log.info(" (8-1)verify_pool_acl_prim_sec_groups, cmd= %s", cmd) add_del_user(self.hostlist_clients, cmd, l_group) self.log.info( " (8-2)Before update sec_group permission,\ pool_acl_list= %s", pool_acl_list) for group, permission in zip(sec_group, sec_group_perm): if permission == "none": permission = "" n_acl = acl_entry("group", group, permission) pool_acl_list.append(n_acl) self.log.info( " (8-3)After update sec_group permission,\ pool_acl_list= %s", pool_acl_list) self.log.info(" pool acl_file= %s", acl_file) create_acl_file(acl_file, pool_acl_list) #modify primary-group permission for secondary-group test grp_entry = acl_entry("group", current_group, primary_grp_perm) new_grp_entry = acl_entry("group", current_group, "") self.modify_acl_file_entry(acl_file, grp_entry, new_grp_entry) #dmg pool overwrite-acl --pool <uuid> --acl-file <file> dmg = DmgCommand(os.path.join(self.prefix, "bin")) dmg.request.value = "pool" dmg.action.value = "overwrite-acl --pool={} --acl-file={}".\ format(uuid, acl_file) port = self.params.get("port", "/run/server_config/*", 10001) servers_with_ports = [ "{}:{}".format(host, port) for host in self.hostlist_servers ] dmg.hostlist.update(",".join(servers_with_ports), "dmg.hostlist") self.log.info(" (8-4)dmg= %s", dmg) result = dmg.run() self.log.info(" (8-5)dmg.run() result=\n %s", result) #Verify pool read operation #daos pool query --pool <uuid> self.log.info(" (8-6)Verify pool read by: daos pool query --pool") exp_read = sec_group_rw[0] self.verify_pool_readwrite(svc, uuid, "read", expect=exp_read) #Verify pool write operation #daos continer create --pool <uuid> self.log.info(" (8-7)Verify pool write by: daos continer create pool") exp_write = sec_group_rw[1] self.verify_pool_readwrite(svc, uuid, "write", expect=exp_write) for group in sec_group: add_del_user(self.hostlist_clients, "groupdel", group)