Пример #1
0
    def get_pool_acl_list(self, uuid):
        '''
        Deascription:
            Get daos pool acl list by dmg get-acl.
        Args:
            uuid: pool uuid number.
        Return:
            pool_permission_list: daos pool acl list.
        '''
        dmg = DmgCommand(os.path.join(self.prefix, "bin"))
        dmg.request.value = "pool"
        dmg.action.value = "get-acl --pool " + uuid
        port = self.params.get("port", "/run/server_config/*")
        servers_with_ports = [
            "{}:{}".format(host, port) for host in self.hostlist_servers
        ]
        dmg.hostlist.update(",".join(servers_with_ports), "dmg.hostlist")
        result = dmg.run()

        pool_permission_list = []
        for line in result.stdout.splitlines():
            if not line.startswith("A:"):
                continue
            elif line.startswith("A::"):
                found_user = re.search(r"A::(.+)@:(.*)", line)
                if found_user:
                    pool_permission_list.append(line)
            elif line.startswith("A:G:"):
                found_group = re.search(r"A:G:(.+)@:(.*)", line)
                if found_group:
                    pool_permission_list.append(line)
        return pool_permission_list
Пример #2
0
 def update_pool_acl_entry(self, uuid, action, entry):
     '''
     Deascription:
         Update daos pool acl list by dmg tool.
     Args:
         uuid: pool uuid.
         action: update-acl or delete-acl.
         entry: pool acl entry or principal to be updated.
     Return:
         none.
     '''
     dmg = DmgCommand(os.path.join(self.prefix, "bin"))
     dmg.request.value = "pool"
     if action is "delete":
         dmg.action.value = "delete-acl --pool " + uuid
         dmg.action.value += " --principal " + entry
     elif action is "update":
         dmg.action.value = "update-acl --pool " + uuid
         dmg.action.value += " --entry " + entry
     else:
         self.fail("##update_pool_acl_entry, action: {} is not supported."
                   "\n  supported action: update, delete.".format(action))
     port = self.params.get("port", "/run/server_config/*")
     servers_with_ports = [
         "{}:{}".format(host, port) for host in self.hostlist_servers
     ]
     dmg.hostlist.update(",".join(servers_with_ports), "dmg.hostlist")
     result = dmg.run()
     self.log.info(" At update_pool_acl_entry, dmg.run result=\n %s",\
         result)
Пример #3
0
    def test_dmg_nvme_scan_basic(self):
        """
        JIRA ID: DAOS-2485
        Test Description: Test basic dmg functionality to scan the nvme storage.
        on the system.
        :avocado: tags=all,tiny,pr,dmg,nvme_scan,basic
        """
        # Create dmg command
        dmg = DmgCommand(os.path.join(self.prefix, "bin"))
        dmg.get_params(self)

        # Update hostlist value for dmg command
        port = self.params.get("port", "/run/server_config/*")
        servers_with_ports = [
            "{}:{}".format(host, port) for host in self.hostlist_servers]
        dmg.hostlist = servers_with_ports

        try:
            dmg.run()
        except process.CmdError as details:
            self.fail("dmg command failed: {}".format(details))
Пример #4
0
class NvmeHealth(ServerFillUp):
    # pylint: disable=too-many-ancestors
    """
    Test Class Description: To validate NVMe health test cases
    :avocado: recursive
    """
    def test_monitor_for_large_pools(self):
        """Jira ID: DAOS-4722.

        Test Description: Test Health monitor for large number of pools.
        Use Case: This tests will create the 40 number of pools and verify the
                  dmg list-pools, device-health and nvme-health works for all
                  pools.

        :avocado: tags=all,full_regression
        :avocado: tags=hw,medium
        :avocado: tags=nvme
        :avocado: tags=nvme_health
        """
        # pylint: disable=attribute-defined-outside-init
        # pylint: disable=too-many-branches
        no_of_pools = self.params.get("number_of_pools", '/run/pool/*')
        pool_capacity = self.params.get("pool_used_percentage", '/run/pool/*')
        pool_capacity = pool_capacity / 100
        storage = self.get_max_storage_sizes()

        #Create the pool from available of storage space
        single_pool_nvme_size = int((storage[1] * pool_capacity) / no_of_pools)
        single_pool_scm_size = int((storage[0] * pool_capacity) / no_of_pools)

        self.pool = []
        # Create the Large number of pools
        for _pool in range(no_of_pools):
            self.log.info("-- Creating pool number = %s", _pool)
            self.pool.append(self.get_pool(create=False))
            self.pool[-1].scm_size.update(single_pool_scm_size, "scm_size")
            self.pool[-1].nvme_size.update(single_pool_nvme_size, "nvme_size")
            self.pool[-1].create()

        # initialize the dmg command
        self.dmg = DmgCommand(os.path.join(self.prefix, "bin"))
        self.dmg.get_params(self)
        self.dmg.insecure.update(
            self.server_managers[0].get_config_value("allow_insecure"),
            "dmg.insecure")

        # List all pools
        self.dmg.set_sub_command("storage")
        self.dmg.sub_command_class.set_sub_command("query")
        self.dmg.sub_command_class.sub_command_class.set_sub_command(
            "list-pools")
        for host in self.hostlist_servers:
            self.dmg.hostlist = host
            try:
                result = self.dmg.run()
            except CommandFailure as error:
                self.fail("dmg command failed: {}".format(error))
            #Verify all pools UUID listed as part of query
            for pool in self.pool:
                if pool.uuid.lower() not in result.stdout_text:
                    self.fail('Pool uuid {} not found in smd query'.format(
                        pool.uuid.lower()))

        # Get the device ID from all the servers.
        device_ids = get_device_ids(self.dmg, self.hostlist_servers)

        # Get the device health
        for host in device_ids:
            self.dmg.hostlist = host
            for _dev in device_ids[host]:
                try:
                    result = self.dmg.storage_query_device_health(_dev)
                except CommandFailure as error:
                    self.fail("dmg get device states failed {}".format(error))
                if 'State:NORMAL' not in result.stdout_text:
                    self.fail("device {} on host {} is not NORMAL".format(
                        _dev, host))

        # Get the nvme-health
        try:
            self.dmg.storage_scan_nvme_health()
        except CommandFailure as error:
            self.fail("dmg storage scan --nvme-health failed {}".format(error))
Пример #5
0
    def pool_acl_verification(self, current_user_acl, read, write):
        '''
        Deascription:
            Daos pool security verification with acl file.
            Steps:
                (1)Setup dmg tool for creating a pool
                (2)Generate acl file with permissions
                (3)Create a pool with acl
                (4)Verify the pool create status
                (5)Get the pool's acl list
                (6)Verify pool read operation
                (7)Verify pool write operation
                (8)Cleanup user and destroy pool
        Args:
            current_user_acl: acl with read write access credential.
            read: expecting read permission.
            write: expecting write permission.
        Return:
            pass to continue.
            fail to report the testlog and stop.
        '''

        # (1)Create daos_shell command
        dmg = DmgCommand(os.path.join(self.prefix, "bin"))
        dmg.get_params(self)
        port = self.params.get("port", "/run/server_config/*", 10001)
        get_acl_file = self.params.get("acl_file", "/run/pool_acl/*",
                                       "acl_test.txt")
        acl_file = os.path.join(self.tmp, get_acl_file)
        num_user = self.params.get("num_user", "/run/pool_acl/*")
        num_group = self.params.get("num_group", "/run/pool_acl/*")
        servers_with_ports = [
            "{}:{}".format(host, port) for host in self.hostlist_servers
        ]
        dmg.hostlist.update(",".join(servers_with_ports), "dmg.hostlist")
        self.log.info("  (1)dmg= %s", dmg)

        # (2)Generate acl file with permissions
        self.log.info("  (2)Generate acl file with user/group permissions")
        permission_list = self.create_pool_acl(num_user, num_group,
                                               current_user_acl, acl_file)

        # (3)Create a pool with acl
        self.log.info("  (3)Create a pool with acl")
        dmg.action_command.acl_file.value = acl_file
        dmg.exit_status_exception = False
        result = dmg.run()

        # (4)Verify the pool create status
        self.log.info("  (4)dmg.run() result=\n%s", result)
        if result.stderr == "":
            uuid, svc = dmg_utils.get_pool_uuid_service_replicas_from_stdout(
                result.stdout)
        else:
            self.fail("##(4)Unable to parse pool uuid and svc.")

        # (5)Get the pool's acl list
        #    dmg pool get-acl --pool <UUID>
        self.log.info("  (5)Get a pool's acl list by: "
                      "dmg pool get-acl --pool --hostlist")
        pool_acl_list = self.get_pool_acl_list(uuid)
        self.log.info("   pool original permission_list: %s", permission_list)
        self.log.info("   pool get_acl  permission_list: %s", pool_acl_list)

        # (6)Verify pool read operation
        #    daos pool query --pool <uuid>
        self.log.info("  (6)Verify pool read by: daos pool query --pool")
        self.verify_pool_readwrite(svc, uuid, "read", expect=read)

        # (7)Verify pool write operation
        #    daos continer create --pool <uuid>
        self.log.info("  (7)Verify pool write by: daos continer create --pool")
        self.verify_pool_readwrite(svc, uuid, "write", expect=write)

        # (8)Cleanup user and destroy pool
        self.log.info("  (8)Cleanup user and destroy pool")
        self.cleanup_user_group(num_user, num_group)
        dmg = DmgCommand(os.path.join(self.prefix, "bin"))
        dmg.request.value = "pool"
        dmg.action.value = "destroy --pool={}".format(uuid)
        dmg.hostlist.update(",".join(servers_with_ports), "dmg.hostlist")
        result = dmg.run()
        return
Пример #6
0
class NvmeHealth(ServerFillUp):
    # pylint: disable=too-many-ancestors
    """
    Test Class Description: To validate NVMe health test cases
    :avocado: recursive
    """
    @skipForTicket("DAOS-7011")
    def test_monitor_for_large_pools(self):
        """Jira ID: DAOS-4722.

        Test Description: Test Health monitor for large number of pools.
        Use Case: This tests will create the 40 number of pools and verify the
                  dmg list-pools, device-health and nvme-health works for all
                  pools.

        :avocado: tags=all,hw,medium,nvme,ib2,full_regression
        :avocado: tags=nvme_health
        """
        # pylint: disable=attribute-defined-outside-init
        # pylint: disable=too-many-branches
        no_of_pools = self.params.get("number_of_pools", '/run/pool/*')
        # Stop the servers to run SPDK too to get the server capacity
        self.stop_servers()
        storage = self.get_nvme_max_capacity()
        self.start_servers()

        # Create the pool from 80% of available of storage space
        single_pool_nvme_size = int((storage * 0.80) / no_of_pools)

        self.pool = []
        # Create the Large number of pools
        for _pool in range(no_of_pools):
            pool = TestPool(self.context, self.get_dmg_command())
            pool.get_params(self)
            # SCM size is 10% of NVMe
            pool.scm_size.update('{}'.format(int(single_pool_nvme_size *
                                                 0.10)))
            pool.nvme_size.update('{}'.format(single_pool_nvme_size))
            pool.create()
            self.pool.append(pool)

        # initialize the dmg command
        self.dmg = DmgCommand(os.path.join(self.prefix, "bin"))
        self.dmg.get_params(self)
        self.dmg.insecure.update(
            self.server_managers[0].get_config_value("allow_insecure"),
            "dmg.insecure")

        # List all pools
        self.dmg.set_sub_command("storage")
        self.dmg.sub_command_class.set_sub_command("query")
        self.dmg.sub_command_class.sub_command_class.set_sub_command(
            "list-pools")
        for host in self.hostlist_servers:
            self.dmg.hostlist = host
            try:
                result = self.dmg.run()
            except CommandFailure as error:
                self.fail("dmg command failed: {}".format(error))
            #Verify all pools UUID listed as part of query
            for pool in self.pool:
                if pool.uuid.lower() not in result.stdout_text:
                    self.fail('Pool uuid {} not found in smd query'.format(
                        pool.uuid.lower()))

        # Get the device ID from all the servers.
        device_ids = get_device_ids(self.dmg, self.hostlist_servers)

        # Get the device health
        for host in device_ids:
            self.dmg.hostlist = host
            for _dev in device_ids[host]:
                try:
                    result = self.dmg.storage_query_device_health(_dev)
                except CommandFailure as error:
                    self.fail("dmg get device states failed {}".format(error))
                if 'State:NORMAL' not in result.stdout_text:
                    self.fail("device {} on host {} is not NORMAL".format(
                        _dev, host))

        # Get the nvme-health
        try:
            self.dmg.storage_scan_nvme_health()
        except CommandFailure as error:
            self.fail("dmg storage scan --nvme-health failed {}".format(error))
Пример #7
0
class CSumErrorLog(DaosCoreBase):
    """
    Test Class Description: This test runs
    daos_test -z (Checksum tests) and verifies
    whether Checksum Error Counters are incremented
    in the NVME device due to checksum fault injection.
    :avocado: recursive
    """
    # pylint: disable=too-many-instance-attributes
    def setUp(self):
        super(CSumErrorLog, self).setUp()
        self.dmg = DmgCommand(os.path.join(self.prefix, "bin"))
        self.dmg.get_params(self)
        self.dmg.hostlist = self.hostlist_servers[0]
        self.dmg.insecure.update(
            self.server_managers[0].get_config_value("allow_insecure"),
            "dmg.insecure")
        self.dmg.set_sub_command("storage")
        self.dmg.sub_command_class.set_sub_command("query")

    def get_nvme_device_id(self):
        self.dmg.sub_command_class.sub_command_class.set_sub_command("smd")
        self.dmg.sub_command_class. \
            sub_command_class.sub_command_class.devices.value = True
        self.dmg.sub_command_class. \
            sub_command_class.sub_command_class.pools.value = True
        try:
            result = self.dmg.run()
        except process.CmdError as details:
            self.fail("dmg command failed: {}".format(details))
        uid = None
        for line in result.stdout.splitlines():
            line = line.strip()
            if re.search("^UUID:", line):
                temp = line.split()
                uid = temp[1]
                break
        return uid

    def get_checksum_error_value(self, device_id=None):
        if device_id is None:
            self.fail("No device id provided")
            return
        self.dmg.sub_command_class. \
            sub_command_class.set_sub_command("blobstore-health")
        self.dmg.sub_command_class. \
            sub_command_class. \
            sub_command_class.devuuid.value = "{}".format(device_id)
        try:
            result = self.dmg.run()
        except process.CmdError as details:
            self.fail("dmg command failed: {}".format(details))
        csum_count = None
        for line in result.stdout.splitlines():
            line = line.strip()
            if re.search("^Checksum", line):
                temp = line.split()
                csum_count = int(temp[2])
                break
        return csum_count

    def test_csum_error_logging(self):
        """
        Test ID: DAOS-3927
        Test Description: Write Avocado Test to verify single data after
                          pool/container disconnect/reconnect.
        :avocado: tags=all,pr,hw,medium,ib2,csum_error_log
        """
        dev_id = self.get_nvme_device_id()
        self.log.info("%s", dev_id)
        csum = self.get_checksum_error_value(dev_id)
        self.log.info("Checksum Errors : %d", csum)
        DaosCoreBase.run_subtest(self)
        csum_latest = self.get_checksum_error_value(dev_id)
        self.log.info("Checksum Errors : %d", csum_latest)
        self.assertTrue(csum_latest > csum,
                        "Checksum Error Log not incremented")
        self.log.info("Checksum Error Logging Test Passed")
Пример #8
0
class CSumErrorLog(DaosCoreBase):
    """
    Test Class Description: This test runs
    daos_test -z (Checksum tests) and verifies
    whether Checksum Error Counters are incremented
    in the NVME device due to checksum fault injection.
    :avocado: recursive
    """

    # pylint: disable=too-many-instance-attributes
    def setUp(self):
        super(CSumErrorLog, self).setUp()
        self.dmg = DmgCommand(os.path.join(self.prefix, "bin"))
        self.dmg.get_params(self)
        self.dmg.hostlist = self.hostlist_servers[0]
        self.dmg.insecure.update(
            self.server_managers[0].get_config_value("allow_insecure"),
            "dmg.insecure")
        self.dmg.set_sub_command("storage")
        self.dmg.sub_command_class.set_sub_command("query")

    def get_nvme_device_id(self):
        self.dmg.json.value = True
        self.dmg.sub_command_class. \
            sub_command_class.set_sub_command("list-devices")
        try:
            result = self.dmg.run()
        except process.CmdError as details:
            self.fail("dmg command failed: {}".format(details))

        data = json.loads(result.stdout)
        if len(data['host_errors']) > 0:
            self.fail("dmg command failed: {}".format(data['host_errors']))
        for v in data['host_storage_map'].values():
            if v['storage']['smd_info']['devices']:
                return v['storage']['smd_info']['devices'][0]['uuid']

    def get_checksum_error_value(self, device_id=None):
        if device_id is None:
            self.fail("No device id provided")
            return
        self.dmg.json.value = True
        self.dmg.sub_command_class. \
            sub_command_class.set_sub_command("device-health")
        self.dmg.sub_command_class. \
            sub_command_class. \
            sub_command_class.uuid.value = device_id
        try:
            result = self.dmg.run()
        except process.CmdError as details:
            self.fail("dmg command failed: {}".format(details))

        data = json.loads(result.stdout)
        if len(data['host_errors']) > 0:
            self.fail("dmg command failed: {}".format(data['host_errors']))
        for v in data['host_storage_map'].values():
            if v['storage']['smd_info']['devices']:
                dev = v['storage']['smd_info']['devices'][0]
                return dev['health']['checksum_errors']

    def test_csum_error_logging(self):
        """
        Test ID: DAOS-3927
        Test Description: Write Avocado Test to verify single data after
                          pool/container disconnect/reconnect.
        :avocado: tags=all,pr,hw,medium,ib2,csum_error_log
        """
        dev_id = self.get_nvme_device_id()
        self.log.info("%s", dev_id)
        csum = self.get_checksum_error_value(dev_id)
        self.log.info("Checksum Errors : %d", csum)
        DaosCoreBase.run_subtest(self)
        csum_latest = self.get_checksum_error_value(dev_id)
        self.log.info("Checksum Errors : %d", csum_latest)
        self.assertTrue(csum_latest > csum,
                        "Checksum Error Log not incremented")
        self.log.info("Checksum Error Logging Test Passed")
Пример #9
0
class TestPool(TestDaosApiBase):
    """A class for functional testing of DaosPools objects."""
    # Constants to define whether to use API or dmg to create and destroy
    # pool.
    USE_API = "API"
    USE_DMG = "dmg"

    def __init__(self, context, log=None, cb_handler=None, dmg_bin_path=None):
        # pylint: disable=unused-argument
        """Initialize a TestPool object.

        Note: 'log' is now a defunct argument and will be removed in the future

        Args:
            context (DaosContext): [description]
            log (logging): logging object used to report the pool status
            cb_handler (CallbackHandler, optional): callback object to use with
                the API methods. Defaults to None.
        """
        super(TestPool, self).__init__("/run/pool/*", cb_handler)
        self.context = context
        self.uid = os.geteuid()
        self.gid = os.getegid()

        self.mode = BasicParameter(None)
        self.name = BasicParameter(None)  # server group name
        self.svcn = BasicParameter(None)
        self.target_list = BasicParameter(None)
        self.scm_size = BasicParameter(None)
        self.nvme_size = BasicParameter(None)
        # Set USE_API to use API or USE_DMG to use dmg. If it's not set, API is
        # used.
        self.control_method = BasicParameter(self.USE_API, self.USE_API)
        uname = getpass.getuser()
        gname = grp.getgrnam(uname)[0]
        self.username = BasicParameter(uname, uname)
        self.groupname = BasicParameter(gname, gname)

        self.pool = None
        self.uuid = None
        self.info = None
        self.svc_ranks = None
        self.connected = False
        self.dmg = None
        # Required to use dmg. It defined the directory where dmg is installed.
        # Use self.basepath + '/install/bin' in the test
        self.dmg_bin_path = dmg_bin_path
        if dmg_bin_path is not None:
            # We make dmg as the member of this class because the test would
            # have more flexibility over the usage of the command.
            self.dmg = DmgCommand(self.dmg_bin_path)
            self.dmg.insecure.value = True
            self.dmg.request.value = "pool"

    @fail_on(CommandFailure)
    @fail_on(DaosApiError)
    def create(self):
        """Create a pool with either API or dmg.

        To use dmg, the test needs to set control_method.value to USE_DMG
        prior to calling this method. The recommended way is to specify the
        pool block in yaml. For example,

        pool:
            control_method: dmg

        This tells this method to use dmg. The test also needs to set
        dmg_bin_path through the constructor if dmg is used. For example,

        self.pool = TestPool(self.context,
                             dmg_bin_path=self.basepath + '/install/bin')

        If it wants to use --nsvc option, it needs to set the value to
        svcn.value. Otherwise, 1 is used. If it wants to use --group, it needs
        to set groupname.value. If it wants to use --user, it needs to set
        username.value. If it wants to add other options, directly set it
        to self.dmg.action_command. Refer dmg_utils.py pool_create method for
        more details.

        To test the negative case on create, the test needs to catch
        CommandFailure for dmg and DaosApiError for API. Thus, we need to make
        more than one line modification to the test only for this purpose.
        Currently, pool_svc is the only test that needs this change.
        """
        self.destroy()
        if self.target_list.value is not None:
            self.log.info("Creating a pool on targets %s",
                          self.target_list.value)
        else:
            self.log.info("Creating a pool")
        self.pool = DaosPool(self.context)
        if self.control_method.value == self.USE_API:
            kwargs = {
                "mode": self.mode.value,
                "uid": self.uid,
                "gid": self.gid,
                "scm_size": self.scm_size.value,
                "group": self.name.value
            }
            for key in ("target_list", "svcn", "nvme_size"):
                value = getattr(self, key).value
                if value is not None:
                    kwargs[key] = value
            self._call_method(self.pool.create, kwargs)

            self.svc_ranks = [
                int(self.pool.svc.rl_ranks[index])
                for index in range(self.pool.svc.rl_nr)
            ]
        else:
            if self.dmg is None:
                raise DaosTestError(
                    "self.dmg is None. dmg_bin_path needs to be set through "
                    "the constructor of TestPool to create pool with dmg.")
            # Currently, there is one test that creates the pool over the
            # subset of the server hosts; pool/evict_test. To do so, the test
            # needs to set the rank(s) to target_list.value starting from 0.
            # e.g., if you're using 4 server hosts; wolf-1, wolf-2, wolf-3, and
            # wolf-4, and want to create a pool over the first two hosts;
            # wolf-1 and 2, then set the list [0, 1] to target_list.value.
            # We'll convert it to the comma separated string and set it to dmg.
            # For instance, [0, 1] will result in dmg pool create -r 0,1. If
            # you don't set target_list.value, -r won't be used, in which case
            # the pool is created over all the server hosts.
            if self.target_list.value is None:
                ranks_comma_separated = None
            else:
                ranks_comma_separated = ""
                for i in range(len(self.target_list.value)):
                    ranks_comma_separated += str(self.target_list.value[i])
                    # If this element is not the last one, append comma
                    if i < len(self.target_list.value) - 1:
                        ranks_comma_separated += ","
            # Call the dmg pool create command
            self.dmg.action.value = "create"
            self.dmg.get_action_command()
            # uid/gid used in API correspond to --user and --group in dmg.
            # group, or self.name.value, used in API is called server group and
            # it's different from the group name passed in to --group. Server
            # group isn't used in dmg. We don't pass it into the command, but
            # we'll still use it to set self.pool.group
            self.dmg.action_command.group.value = self.groupname.value
            self.dmg.action_command.user.value = self.username.value
            self.dmg.action_command.scm_size.value = self.scm_size.value
            self.dmg.action_command.ranks.value = ranks_comma_separated
            self.dmg.action_command.nsvc.value = self.svcn.value
            create_result = self.dmg.run()
            self.log.info("Result stdout = %s", create_result.stdout)
            self.log.info("Result exit status = %s", create_result.exit_status)
            # Get UUID and service replica from the output
            uuid_svc = get_pool_uuid_service_replicas_from_stdout(
                create_result.stdout)
            new_uuid = uuid_svc[0]
            service_replica = uuid_svc[1]

            # 3. Create DaosPool object. The process is similar to the one in
            # DaosPool.create, but there are some modifications
            if self.name.value is None:
                self.pool.group = None
            else:
                self.pool.group = ctypes.create_string_buffer(self.name.value)
            # Modification 1: Use the length of service_replica returned by dmg
            # to calculate rank_t. Note that we assume we always get a single
            # number. I'm not sure if we ever get multiple numbers, but in that
            # case, we need to modify this implementation to create a list out
            # of the multiple numbers possibly separated by comma
            service_replicas = [int(service_replica)]
            rank_t = ctypes.c_uint * len(service_replicas)
            # Modification 2: Use the service_replicas list to generate rank.
            # In DaosPool, we first use some garbage 999999 values and let DAOS
            # set the correct values, but we can't do that here, so we need to
            # set the correct rank value by ourself
            rank = rank_t(*list([svc for svc in service_replicas]))
            rl_ranks = ctypes.POINTER(ctypes.c_uint)(rank)
            # Modification 3: Similar to 1. Use the length of service_replicas
            # list instead of self.svcn.value
            self.pool.svc = daos_cref.RankList(rl_ranks, len(service_replicas))

            # 4. Set UUID and attached to the DaosPool object
            self.pool.set_uuid_str(new_uuid)
            self.pool.attached = 1

        self.uuid = self.pool.get_uuid_str()

    @fail_on(DaosApiError)
    def connect(self, permission=1):
        """Connect to the pool.

        Args:
            permission (int, optional): connect permission. Defaults to 1.

        Returns:
            bool: True if the pool has been connected; False if the pool was
                already connected or the pool is not defined.

        """
        if self.pool and not self.connected:
            kwargs = {"flags": 1 << permission}
            self.log.info(
                "Connecting to pool %s with permission %s (flag: %s)",
                self.uuid, permission, kwargs["flags"])
            self._call_method(self.pool.connect, kwargs)
            self.connected = True
            return True
        return False

    @fail_on(DaosApiError)
    def disconnect(self):
        """Disconnect from connected pool.

        Returns:
            bool: True if the pool has been disconnected; False if the pool was
                already disconnected or the pool is not defined.

        """
        if self.pool and self.connected:
            self.log.info("Disonnecting from pool %s", self.uuid)
            self._call_method(self.pool.disconnect, {})
            self.connected = False
            return True
        return False

    @fail_on(CommandFailure)
    @fail_on(DaosApiError)
    def destroy(self, force=1):
        """Destroy the pool with either API or dmg.

        It uses control_method member previously set, so if you want to use the
        other way for some reason, update it before calling this method.

        Args:
            force (int, optional): force flag. Defaults to 1.

        Returns:
            bool: True if the pool has been destroyed; False if the pool is not
                defined.
        """
        if self.pool:
            self.disconnect()
            self.log.info("Destroying pool %s", self.uuid)
            if self.control_method.value == self.USE_API:
                if self.pool.attached:
                    self._call_method(self.pool.destroy, {"force": force})
            elif self.control_method.value == self.USE_DMG:
                if self.pool.attached:
                    self.dmg.action.value = "destroy"
                    self.dmg.get_action_command()
                    self.dmg.action_command.pool.value = self.uuid
                    self.dmg.action_command.force.value = force
                    self.dmg.run()
            else:
                self.log.error("Cannot destroy pool! Use USE_API or USE_DMG")
                return False
            self.pool = None
            self.uuid = None
            self.info = None
            self.svc_ranks = None
            return True
        return False

    @fail_on(DaosApiError)
    def get_info(self):
        """Query the pool for information.

        Sets the self.info attribute.
        """
        if self.pool:
            self.connect()
            self._call_method(self.pool.pool_query, {})
            self.info = self.pool.pool_info

    def check_pool_info(self,
                        pi_uuid=None,
                        pi_ntargets=None,
                        pi_nnodes=None,
                        pi_ndisabled=None,
                        pi_map_ver=None,
                        pi_leader=None,
                        pi_bits=None):
        # pylint: disable=unused-argument
        """Check the pool info attributes.

        Note:
            Arguments may also be provided as a string with a number preceeded
            by '<', '<=', '>', or '>=' for other comparisions besides the
            default '=='.

        Args:
            pi_uuid (str, optional): pool uuid. Defaults to None.
            pi_ntargets (int, optional): number of targets. Defaults to None.
            pi_nnodes (int, optional): number of nodes. Defaults to None.
            pi_ndisabled (int, optional): number of disabled. Defaults to None.
            pi_map_ver (int, optional): pool map version. Defaults to None.
            pi_leader (int, optional): pool leader. Defaults to None.
            pi_bits (int, optional): pool bits. Defaults to None.

        Note:
            Arguments may also be provided as a string with a number preceeded
            by '<', '<=', '>', or '>=' for other comparisions besides the
            default '=='.

        Returns:
            bool: True if at least one expected value is specified and all the
                specified values match; False otherwise

        """
        self.get_info()
        checks = [(key, c_uuid_to_str(getattr(self.info, key))
                   if key == "pi_uuid" else getattr(self.info, key), val)
                  for key, val in locals().items()
                  if key != "self" and val is not None]
        return self._check_info(checks)

    def check_pool_space(self,
                         ps_free_min=None,
                         ps_free_max=None,
                         ps_free_mean=None,
                         ps_ntargets=None,
                         ps_padding=None):
        # pylint: disable=unused-argument
        """Check the pool info space attributes.

        Note:
            Arguments may also be provided as a string with a number preceeded
            by '<', '<=', '>', or '>=' for other comparisions besides the
            default '=='.

        Args:
            ps_free_min (list, optional): minimum free space per device.
                Defaults to None.
            ps_free_max (list, optional): maximum free space per device.
                Defaults to None.
            ps_free_mean (list, optional): mean free space per device.
                Defaults to None.
            ps_ntargets (int, optional): number of targets. Defaults to None.
            ps_padding (int, optional): space padding. Defaults to None.

        Note:
            Arguments may also be provided as a string with a number preceeded
            by '<', '<=', '>', or '>=' for other comparisions besides the
            default '=='.

        Returns:
            bool: True if at least one expected value is specified and all the
                specified values match; False otherwise

        """
        self.get_info()
        checks = []
        for key in ("ps_free_min", "ps_free_max", "ps_free_mean"):
            val = locals()[key]
            if isinstance(val, list):
                for index, item in val:
                    checks.append(("{}[{}]".format(key, index),
                                   getattr(self.info.pi_space,
                                           key)[index], item))
        for key in ("ps_ntargets", "ps_padding"):
            val = locals()[key]
            if val is not None:
                checks.append(key, getattr(self.info.pi_space, key), val)
        return self._check_info(checks)

    def check_pool_daos_space(self, s_total=None, s_free=None):
        # pylint: disable=unused-argument
        """Check the pool info daos space attributes.

        Note:
            Arguments may also be provided as a string with a number preceeded
            by '<', '<=', '>', or '>=' for other comparisions besides the
            default '=='.

        Args:
            s_total (list, optional): total space per device. Defaults to None.
            s_free (list, optional): free space per device. Defaults to None.

        Note:
            Arguments may also be provided as a string with a number preceeded
            by '<', '<=', '>', or '>=' for other comparisions besides the
            default '=='.

        Returns:
            bool: True if at least one expected value is specified and all the
                specified values match; False otherwise

        """
        self.get_info()
        checks = [("{}_{}".format(key, index),
                   getattr(self.info.pi_space.ps_space, key)[index], item)
                  for key, val in locals().items()
                  if key != "self" and val is not None
                  for index, item in enumerate(val)]
        return self._check_info(checks)

    def check_rebuild_status(self,
                             rs_version=None,
                             rs_seconds=None,
                             rs_errno=None,
                             rs_done=None,
                             rs_padding32=None,
                             rs_fail_rank=None,
                             rs_toberb_obj_nr=None,
                             rs_obj_nr=None,
                             rs_rec_nr=None,
                             rs_size=None):
        # pylint: disable=unused-argument
        # pylint: disable=too-many-arguments
        """Check the pool info rebuild attributes.

        Note:
            Arguments may also be provided as a string with a number preceeded
            by '<', '<=', '>', or '>=' for other comparisions besides the
            default '=='.

        Args:
            rs_version (int, optional): rebuild version. Defaults to None.
            rs_seconds (int, optional): rebuild seconds. Defaults to None.
            rs_errno (int, optional): rebuild error number. Defaults to None.
            rs_done (int, optional): rebuild done flag. Defaults to None.
            rs_padding32 (int, optional): padding. Defaults to None.
            rs_fail_rank (int, optional): rebuild fail target. Defaults to None.
            rs_toberb_obj_nr (int, optional): number of objects to be rebuilt.
                Defaults to None.
            rs_obj_nr (int, optional): number of rebuilt objects.
                Defaults to None.
            rs_rec_nr (int, optional): number of rebuilt records.
                Defaults to None.
            rs_size (int, optional): size of all rebuilt records.

        Note:
            Arguments may also be provided as a string with a number preceeded
            by '<', '<=', '>', or '>=' for other comparisions besides the
            default '=='.

        Returns:
            bool: True if at least one expected value is specified and all the
                specified values match; False otherwise

        """
        self.get_info()
        checks = [(key, getattr(self.info.pi_rebuild_st, key), val)
                  for key, val in locals().items()
                  if key != "self" and val is not None]
        return self._check_info(checks)

    def rebuild_complete(self):
        """Determine if the pool rebuild is complete.

        Returns:
            bool: True if pool rebuild is complete; False otherwise

        """
        self.display_pool_rebuild_status()
        return self.info.pi_rebuild_st.rs_done == 1

    def wait_for_rebuild(self, to_start, interval=1):
        """Wait for the rebuild to start or end.

        Args:
            to_start (bool): whether to wait for rebuild to start or end
            interval (int): number of seconds to wait in between rebuild
                completion checks
        """
        self.log.info("Waiting for rebuild to %s ...",
                      "start" if to_start else "complete")
        while self.rebuild_complete() == to_start:
            self.log.info("  Rebuild %s ...",
                          "has not yet started" if to_start else "in progress")
            sleep(interval)
        self.log.info("Rebuild %s detected",
                      "start" if to_start else "completion")

    @fail_on(DaosApiError)
    def start_rebuild(self, ranks, daos_log):
        """Kill the specific server ranks using this pool.

        Args:
            ranks (list): a list of daos server ranks (int) to kill
            daos_log (DaosLog): object for logging messages

        Returns:
            bool: True if the server ranks have been killed and the ranks have
            been excluded from the pool; False if the pool is undefined

        """
        msg = "Killing DAOS ranks {} from server group {}".format(
            ranks, self.name.value)
        self.log.info(msg)
        daos_log.info(msg)
        for rank in ranks:
            server = DaosServer(self.context, self.name.value, rank)
            self._call_method(server.kill, {"force": 1})
        return self.exclude(ranks, daos_log)

    @fail_on(DaosApiError)
    def exclude(self, ranks, daos_log):
        """Manually exclude a rank from this pool.

        Args:
            ranks (list): a list daos server ranks (int) to exclude
            daos_log (DaosLog): object for logging messages

        Returns:
            bool: True if the ranks were excluded from the pool; False if the
                pool is undefined

        """
        if self.pool:
            msg = "Excluding server ranks {} from pool {}".format(
                ranks, self.uuid)
            self.log.info(msg)
            daos_log.info(msg)
            self._call_method(self.pool.exclude, {"rank_list": ranks})
            return True
        return False

    def check_files(self, hosts):
        """Check if pool files exist on the specified list of hosts.

        Args:
            hosts (list): list of hosts

        Returns:
            bool: True if the files for this pool exist on each host; False
                otherwise

        """
        return check_pool_files(self.log, hosts, self.uuid.lower())

    def write_file(self, orterun, processes, hostfile, size, timeout=60):
        """Write a file to the pool.

        Args:
            orterun (str): full path to the orterun command
            processes (int): number of processes to launch
            hosts (list): list of clients from which to write the file
            size (int): size of the file to create in bytes
            timeout (int, optional): number of seconds before timing out the
                command. Defaults to 60 seconds.

        Returns:
            process.CmdResult: command execution result

        """
        self.log.info("Writing %s bytes to pool %s", size, self.uuid)
        env = {
            "DAOS_POOL": self.uuid,
            "DAOS_SVCL": "1",
            "DAOS_SINGLETON_CLI": "1",
            "PYTHONPATH": os.getenv("PYTHONPATH", ""),
        }
        load_mpi("openmpi")
        current_path = os.path.dirname(os.path.abspath(__file__))
        command = "{} --np {} --hostfile {} {} {} testfile".format(
            orterun, processes, hostfile,
            os.path.join(current_path, "write_some_data.py"), size)
        return process.run(command, timeout, True, False, "both", True, env)

    def get_pool_daos_space(self):
        """Get the pool info daos space attributes as a dictionary.

        Returns:
            dict: a dictionary of lists of the daos space attributes

        """
        self.get_info()
        keys = ("s_total", "s_free")
        return {key: getattr(self.info.pi_space.ps_space, key) for key in keys}

    def display_pool_daos_space(self, msg=None):
        """Display the pool info daos space attributes.

        Args:
            msg (str, optional): optional text to include in the output.
                Defaults to None.
        """
        daos_space = self.get_pool_daos_space()
        sizes = [
            "{}[{}]={}".format(key, index, item)
            for key in sorted(daos_space.keys())
            for index, item in enumerate(daos_space[key])
        ]
        self.log.info("Pool %s space%s:\n  %s", self.uuid,
                      " " + msg if isinstance(msg, str) else "",
                      "\n  ".join(sizes))

    def get_pool_rebuild_status(self):
        """Get the pool info rebuild status attributes as a dictionary.

        Returns:
            dict: a dictionary of lists of the rebuild status attributes

        """
        self.get_info()
        keys = ("rs_version", "rs_pad_32", "rs_errno", "rs_done",
                "rs_toberb_obj_nr", "rs_obj_nr", "rs_rec_nr")
        return {key: getattr(self.info.pi_rebuild_st, key) for key in keys}

    def display_pool_rebuild_status(self):
        """Display the pool info rebuild status attributes."""
        status = self.get_pool_rebuild_status()
        self.log.info(
            "Pool rebuild status: %s", ", ".join(
                ["{}={}".format(key, status[key]) for key in sorted(status)]))

    def read_data_during_rebuild(self, container):
        """Read data from the container while rebuild is active.

        Args:
            container (TestContainer): container from which to read data

        Returns:
            bool: True if all the data is read sucessfully befoire rebuild
                completes; False otherwise

        """
        container.open()
        self.log.info("Reading objects in container %s during rebuild",
                      self.uuid)

        # Attempt to read all of the data from the container during rebuild
        index = 0
        status = read_incomplete = index < len(container.written_data)
        while not self.rebuild_complete() and read_incomplete:
            try:
                status &= container.written_data[index].read_object(container)
            except DaosTestError as error:
                self.log.error(str(error))
                status = False
            index += 1
            read_incomplete = index < len(container.written_data)

        # Verify that all of the container data was read successfully
        if read_incomplete:
            self.log.error(
                "Rebuild completed before all the written data could be read")
            status = False
        elif not status:
            self.log.error("Errors detected reading data during rebuild")
        return status
Пример #10
0
    def verify_pool_acl_prim_sec_groups(self, pool_acl_list, acl_file,\
        uuid, svc):
        '''
        Deascription:
            Verify daos pool acl access with primary and secondary
            groups access permission.
        Args:
            pool_acl_list: pool acl entry list.
            acl_file: acl file to be used.
            uuid: daos pool uuid.
            svc:  daos pool svc.
        Return:
            None.
        '''
        sec_group = self.params.get("secondary_group_name", "/run/pool_acl/*")
        sec_group_perm = self.params.get("sg_permission", "/run/pool_acl/*")
        sec_group_rw = self.params.get("sg_read_write", "/run/pool_acl/*")
        user_gid = os.getegid()
        current_group = grp.getgrgid(user_gid)[0]
        primary_grp_perm = self.params.get(\
            "pg_permission", "/run/pool_acl/primary_secondary_group_test/*")[0]
        sec_group = self.params.get(\
            "secondary_group_name", \
            "/run/pool_acl/primary_secondary_group_test/*")
        sec_group_perm = self.params.get(\
            "sg_permission", "/run/pool_acl/primary_secondary_group_test/*")
        sec_group_rw = self.params.get(\
            "sg_read_write", "/run/pool_acl/primary_secondary_group_test/*")
        l_group = grp.getgrgid(os.getegid())[0]
        for group in sec_group:
            add_del_user(self.hostlist_clients, "groupadd", group)
        cmd = "usermod -G " + ",".join(sec_group)
        self.log.info("  (8-1)verify_pool_acl_prim_sec_groups, cmd= %s", cmd)
        add_del_user(self.hostlist_clients, cmd, l_group)

        self.log.info(
            "  (8-2)Before update sec_group permission,\
            pool_acl_list= %s", pool_acl_list)
        for group, permission in zip(sec_group, sec_group_perm):
            if permission == "none":
                permission = ""
            n_acl = acl_entry("group", group, permission)
            pool_acl_list.append(n_acl)

        self.log.info(
            "  (8-3)After update sec_group permission,\
            pool_acl_list= %s", pool_acl_list)
        self.log.info("      pool acl_file= %s", acl_file)
        create_acl_file(acl_file, pool_acl_list)

        #modify primary-group permission for secondary-group test
        grp_entry = acl_entry("group", current_group, primary_grp_perm)
        new_grp_entry = acl_entry("group", current_group, "")
        self.modify_acl_file_entry(acl_file, grp_entry, new_grp_entry)

        #dmg pool overwrite-acl --pool <uuid> --acl-file <file>
        dmg = DmgCommand(os.path.join(self.prefix, "bin"))
        dmg.request.value = "pool"
        dmg.action.value = "overwrite-acl --pool={} --acl-file={}".\
            format(uuid, acl_file)
        port = self.params.get("port", "/run/server_config/*", 10001)
        servers_with_ports = [
            "{}:{}".format(host, port) for host in self.hostlist_servers
        ]
        dmg.hostlist.update(",".join(servers_with_ports), "dmg.hostlist")
        self.log.info("  (8-4)dmg= %s", dmg)
        result = dmg.run()
        self.log.info("  (8-5)dmg.run() result=\n %s", result)

        #Verify pool read operation
        #daos pool query --pool <uuid>
        self.log.info("  (8-6)Verify pool read by: daos pool query --pool")
        exp_read = sec_group_rw[0]
        self.verify_pool_readwrite(svc, uuid, "read", expect=exp_read)

        #Verify pool write operation
        #daos continer create --pool <uuid>
        self.log.info("  (8-7)Verify pool write by: daos continer create pool")
        exp_write = sec_group_rw[1]
        self.verify_pool_readwrite(svc, uuid, "write", expect=exp_write)

        for group in sec_group:
            add_del_user(self.hostlist_clients, "groupdel", group)