Пример #1
0
    def test_pool_query_ior(self):
        """
        JIRA ID: DAOS-2976

        Test Description: Test that pool query command will properly and
        accurately show the size changes once there is content in the pool.

        :avocado: tags=all,small,daily_regression,hw,dmg,pool_query,basic
        :avocado: tags=poolquerywrite
        """
        # Store original pool info
        out_b = self.get_pool_query_info(self.uuid)
        self.log.info("==>   Pool info before write: \n%s", out_b)

        #  Run ior
        self.log.info("==>   Write data to pool.")
        self.run_ior_with_pool()

        # Check pool written data
        out_a = self.get_pool_query_info(self.uuid)
        self.log.info("==>   Pool info after write: \n%s", out_a)

        # The file should have been written into nvme, compare info
        bytes_orig_val = human_to_bytes(out_b["nvme"]["free"])
        bytes_curr_val = human_to_bytes(out_a["nvme"]["free"])
        if bytes_orig_val <= bytes_curr_val:
            self.fail("NVMe free space should be < {}".format(
                out_b["nvme_info"][1]))
Пример #2
0
    def get_pool_list(self, quantity, scm_ratio, nvme_ratio, svcn=None):
        """Get a list of TestPool objects.

        Set each TestPool's scm_size and nvme_size attributes using the
        specified ratios and the largest SCM or NVMe size common to all the
        configured servers.

        Args:
            quantity (int): number of TestPool objects to create
            scm_ratio (float): percentage of the maximum SCM capacity to use
                for the pool sizes, e.g. 0.9 for 90%
            nvme_ratio (float): percentage of the maximum NVMe capacity to use
                for the pool sizes, e.g. 0.9 for 90%. Specifying None will
                setup each pool without NVMe.
            svcn (int): Number of pool service replicas. The default value
                of None will use the default set on the server.

        Returns:
            list: a list of TestPool objects equal in length to the quantity
                specified, each configured with the same SCM and NVMe sizes.

        """
        sizes = self.get_max_pool_sizes(
            scm_ratio, 1 if nvme_ratio is None else nvme_ratio)
        pool_list = [
            self.get_pool(create=False, connect=False) for _ in range(quantity)
        ]
        for pool in pool_list:
            pool.svcn.update(svcn)
            pool.scm_size.update(bytes_to_human(sizes[0]), "scm_size")
            if nvme_ratio is not None:
                if sizes[1] is None:
                    self.fail(
                        "Unable to assign a max pool NVMe size; NVMe not "
                        "configured!")

                # The I/O server allocates NVMe storage on targets in multiples
                # of 1GiB per target.  A server with 8 targets will have a
                # minimum NVMe size of 8 GiB.  Specify the largest NVMe size in
                # GiB that can be used with the configured number of targets and
                # specified capacity in GiB.
                targets = self.server_managers[0].get_config_value("targets")
                increment = human_to_bytes("{}GiB".format(targets))
                nvme_multiple = increment
                while nvme_multiple + increment <= sizes[1]:
                    nvme_multiple += increment
                self.log.info(
                    "Largest NVMe multiple based on %s targets in %s: %s (%s)",
                    targets, str(sizes[1]), str(nvme_multiple),
                    bytes_to_human(nvme_multiple))
                pool.nvme_size.update(bytes_to_human(nvme_multiple),
                                      "nvme_size")

        return pool_list
Пример #3
0
    def verify_cont_ec_cell_size(self, expected_size):
        """
        Verify the container EC cell size property.

        Args:
            expected_size (int): expected container cell size
        """
        daos_cmd = self.get_daos_command()
        cont_prop = daos_cmd.container_get_prop(self.pool.uuid,
                                                self.container.uuid)
        cont_prop_stdout = cont_prop.stdout_text
        prop_list = cont_prop_stdout.split('\n')[1:]
        cont_index = [
            i for i, word in enumerate(prop_list)
            if word.startswith('EC Cell Size')
        ][0]
        cell_size = (prop_list[cont_index].split('EC Cell Size')[1].strip())
        cont_cell_size = (human_to_bytes(cell_size.replace(" ", "")))
        self.assertEqual(expected_size, cont_cell_size)
Пример #4
0
        def get_host_capacity(key, device_names):
            """Get the total storage capacity per host rank.

            Args:
                key (str): the capacity type, e.g. "scm" or "nvme"
                device_names (list): the device names of this capacity type

            Returns:
                dict: a dictionary of total storage capacity per host rank

            """
            host_capacity = {}
            for host in data:
                device_sizes = []
                for device in data[host][key]:
                    if device in device_names:
                        device_sizes.append(
                            human_to_bytes(
                                data[host][key][device]["capacity"]))
                host_capacity[host] = sum(device_sizes)
            return host_capacity
Пример #5
0
    def get_available_storage(self):
        """Get the available SCM and NVMe storage.

        Raises:
            ServerFailed: if there was an error stopping the servers

        Returns:
            list: a list of the maximum available SCM and NVMe sizes in bytes

        """
        def get_host_capacity(key, device_names):
            """Get the total storage capacity per host rank.

            Args:
                key (str): the capacity type, e.g. "scm" or "nvme"
                device_names (list): the device names of this capacity type

            Returns:
                dict: a dictionary of total storage capacity per host rank

            """
            host_capacity = {}
            for host in data:
                device_sizes = []
                for device in data[host][key]:
                    if device in device_names:
                        device_sizes.append(
                            human_to_bytes(
                                data[host][key][device]["capacity"]))
                host_capacity[host] = sum(device_sizes)
            return host_capacity

        # Default maximum bytes for SCM and NVMe
        storage = [0, 0]

        using_dcpm = self.manager.job.using_dcpm
        using_nvme = self.manager.job.using_nvme

        if using_dcpm or using_nvme:
            # Stop the DAOS IO servers in order to be able to scan the storage
            self.system_stop()

            # Scan all of the hosts for their SCM and NVMe storage
            self.dmg.hostlist = self._hosts
            data = self.dmg.storage_scan(verbose=True)
            self.dmg.hostlist = self.get_config_value("access_points")
            if self.dmg.result.exit_status != 0:
                raise ServerFailed("Error obtaining DAOS storage:\n{}".format(
                    self.dmg.result))

            # Restart the DAOS IO servers
            self.system_start()

        if using_dcpm:
            # Find the sizes of the configured SCM storage
            scm_devices = [
                os.path.basename(path)
                for path in self.get_config_value("scm_list") if path
            ]
            capacity = get_host_capacity("scm", scm_devices)
            for host in sorted(capacity):
                self.log.info("SCM capacity for %s: %s", host, capacity[host])
            # Use the minimum SCM storage across all servers
            storage[0] = capacity[min(capacity, key=capacity.get)]
        else:
            # Use the assigned scm_size
            scm_size = self.get_config_value("scm_size")
            storage[0] = human_to_bytes("{}GB".format(scm_size))

        if using_nvme:
            # Find the sizes of the configured NVMe storage
            capacity = get_host_capacity("nvme",
                                         self.get_config_value("bdev_list"))
            for host in sorted(capacity):
                self.log.info("NVMe capacity for %s: %s", host, capacity[host])
            # Use the minimum SCM storage across all servers
            storage[1] = capacity[min(capacity, key=capacity.get)]

        self.log.info(
            "Total available storage:\n  SCM:  %s (%s)\n  NVMe: %s (%s)",
            str(storage[0]), bytes_to_human(storage[0], binary=False),
            str(storage[1]), bytes_to_human(storage[1], binary=False))
        return storage
Пример #6
0
    def autosize_pool_params(self, size, tier_ratio, scm_size, nvme_size,
                             min_targets=1, quantity=1):
        """Update any pool size parameter ending in a %.

        Use the current NVMe and SCM storage sizes to assign values to the size,
        scm_size, and or nvme_size dmg pool create arguments which end in "%".
        The numerical part of these arguments will be used to assign a value
        that is X% of the available storage capacity.  The updated size and
        nvme_size arguments will be assigned values that are multiples of 1GiB
        times the number of targets assigned to each server engine.  If needed
        the number of targets will be reduced (to not exceed min_targets) in
        order to support the requested size.  An optional number of expected
        pools (quantity) can also be specified to divide the available storage
        capacity.

        Note: depending upon the inputs this method may return dmg pool create
            parameter combinations that are not supported, e.g. tier_ratio +
            nvme_size.  This is intended to allow testing of these combinations.

        Args:
            size (object): the str, int, or None value for the dmg pool create size parameter.
            tier_ratio (object): the int or None value for the dmg pool create size parameter.
            scm_size (object): the str, int, or None value for the dmg pool
                create scm_size parameter.
            nvme_size (object): the str, int, or None value for the dmg pool
                create nvme_size parameter.
            min_targets (int, optional): the minimum number of targets per
                engine that can be configured. Defaults to 1.
            quantity (int, optional): Number of pools to account for in the size
                calculations. The pool size returned is only for a single pool.
                Defaults to 1.

        Raises:
            ServerFailed: if there was a error obtaining auto-sized TestPool parameters.
            AutosizeCancel: if a valid pool parameter size could not be obtained

        Returns:
            dict: the parameters for a TestPool object.

        """
        # Adjust any pool size parameter by the requested percentage
        params = {"tier_ratio": tier_ratio}
        adjusted = {"size": size, "scm_size": scm_size, "nvme_size": nvme_size}
        keys = [
            key for key in ("size", "scm_size", "nvme_size")
            if adjusted[key] is not None and str(adjusted[key]).endswith("%")]
        if keys:
            # Verify the minimum number of targets configured per engine
            targets = min(self.manager.job.get_engine_values("targets"))
            if targets < min_targets:
                raise ServerFailed(
                    "Minimum target quantity ({}) exceeds current target "
                    "quantity ({})".format(min_targets, targets))

            self.log.info("-" * 100)
            pool_msg = "{} pool{}".format(quantity, "s" if quantity > 1 else "")
            self.log.info(
                "Autosizing TestPool parameters ending with a \"%%\" for %s:",
                pool_msg)
            for key in ("size", "scm_size", "nvme_size"):
                self.log.info("  - %-9s : %s (%s)", key, adjusted[key], key in keys)

            # Determine the largest SCM and NVMe pool sizes can be used with
            # this server configuration with an optionally applied ratio.
            try:
                available_storage = self.get_available_storage()
            except ServerFailed as error:
                raise ServerFailed("Error obtaining available storage") from error

            # Determine the SCM and NVMe size limits for the size and tier_ratio
            # arguments for the total number of engines
            if tier_ratio is None:
                # Use the default value if not provided
                tier_ratio = 6
            engine_qty = len(self.manager.job.engine_params) * len(self._hosts)
            available_storage["size"] = min(
                engine_qty * available_storage["nvme"],
                (engine_qty * available_storage["scm"]) / float(tier_ratio / 100)
            )
            available_storage["tier_ratio"] = available_storage["size"] * float(tier_ratio / 100)
            self.log.info(
                "Largest storage size available for %s engines with a %.2f%% "
                "tier_ratio:", engine_qty, tier_ratio)
            self.log.info(
                "  - NVME     : %s",
                get_display_size(available_storage["size"]))
            self.log.info(
                "  - SCM      : %s",
                get_display_size(available_storage["tier_ratio"]))
            self.log.info(
                "  - COMBINED : %s",
                get_display_size(available_storage["size"] + available_storage["tier_ratio"]))

            # Apply any requested percentages to the pool parameters
            available = {
                "size": {"size": available_storage["size"], "type": "NVMe"},
                "scm_size": {"size": available_storage["scm"], "type": "SCM"},
                "nvme_size": {"size": available_storage["nvme"], "type": "NVMe"}
            }
            self.log.info("Adjusted pool sizes for %s:", pool_msg)
            for key in keys:
                try:
                    ratio = int(str(adjusted[key]).replace("%", ""))
                except NameError as error:
                    raise ServerFailed(
                        "Invalid '{}' format: {}".format(key, adjusted[key])) from error
                adjusted[key] = (available[key]["size"] * float(ratio / 100)) / quantity
                self.log.info(
                    "  - %-9s : %-4s storage adjusted by %.2f%%: %s",
                    key, available[key]["type"], ratio,
                    get_display_size(adjusted[key]))

            # Display the pool size increment value for each size argument
            increment = {
                "size": human_to_bytes("1GiB"),
                "scm_size": human_to_bytes("16MiB"),
                "nvme_size": human_to_bytes("1GiB")}
            self.log.info("Increment sizes per target:")
            for key in keys:
                self.log.info("  - %-9s : %s", key, get_display_size(increment[key]))

            # Adjust the size to use a SCM/NVMe target multiplier
            self.log.info("Pool sizes adjusted to fit by increment sizes:")
            adjusted_targets = targets
            for key in keys:
                multiplier = math.floor(adjusted[key] / increment[key])
                params[key] = multiplier * increment[key]
                self.log.info(
                    "  - %-9s : %s * %s = %s",
                    key, multiplier, increment[key],
                    get_display_size(params[key]))
                if multiplier < adjusted_targets:
                    adjusted_targets = multiplier
                    if adjusted_targets < min_targets:
                        raise AutosizeCancel(
                            "Unable to autosize the {} pool parameter due to "
                            "exceeding the minimum of {} targets: {}".format(
                                key, min_targets, adjusted_targets))
                if key == "size":
                    tier_ratio_size = params[key] * float(tier_ratio / 100)
                    self.log.info(
                        "  - %-9s : %.2f%% tier_ratio = %s",
                        key, tier_ratio, get_display_size(tier_ratio_size))
                    params[key] += tier_ratio_size
                    self.log.info(
                        "  - %-9s : NVMe + SCM = %s",
                        key, get_display_size(params[key]))
                params[key] = bytes_to_human(params[key], binary=True)

            # Reboot the servers if a reduced number of targets is required
            if adjusted_targets < targets:
                self.log.info(
                        "Updating targets per server engine: %s -> %s",
                        targets, adjusted_targets)
                self.set_config_value("targets", adjusted_targets)
                self.stop()
                self.start()

            self.log.info("-" * 100)

        return params
Пример #7
0
    def test_io_sys_admin(self):
        """
        Test Description: Performs tests to generate large data sets over
                          various middleware, perform various system admin
                          operations, datamover operations.
        :avocado: tags=all,deployment,full_regression
        :avocado: tags=hw,large
        :avocado: tags=datamover,ior,mdtest
        :avocado: tags=iosysadmin
        """
        # local param
        new_test_user = self.params.get("new_user", "/run/container_acl/*")
        new_test_group = self.params.get("new_group", "/run/container_acl/*")

        dmg = self.get_dmg_command()
        daos = self.get_daos_command()

        secTestBase.add_del_user(self.hostlist_clients, "useradd",
                                 new_test_user)
        secTestBase.add_del_user(self.hostlist_clients, "groupadd",
                                 new_test_group)

        for idx in range(1, 4):
            self.add_pool_qty(1,
                              namespace="/run/pool_{}/".format(idx),
                              create=False)
            PoolTestBase.check_pool_creation(self, 60)
            self.pool[-1].connect()
            for cont_idx in range(1, 4):
                self.add_container_qty(
                    1,
                    self.pool[-1],
                    namespace="/run/container_{}/".format(cont_idx))
                daos.container_set_owner(self.pool[-1].uuid,
                                         self.container[-1].uuid,
                                         new_test_user, new_test_group)

            daos.container_list(self.pool[-1].uuid)
            self.destroy_containers(self.container)
            self.container = None
            self.destroy_pools(self.pool)
            self.pool = None

        # dmg storage scan
        dmg.storage_scan()
        dmg.system_query()
        dmg.system_leader_query()

        # write large data sets
        self.run_file_count()
        # create snapshot
        self.container[-1].create_snap()
        # overwrite the last ior file
        self.ior_cmd.signature.update('456')
        self.run_ior_with_pool(create_pool=False, create_cont=False)

        nvme_free_space_before_snap_destroy = self.get_free_space()[1]
        # delete snapshot
        self.container[-1].destroy_snap(epc=self.container[-1].epoch)
        # Now check if the space is returned back.
        counter = 1
        returned_space = (self.get_free_space()[1] -
                          nvme_free_space_before_snap_destroy)

        data_written = (int(self.ppn) *
                        human_to_bytes(self.ior_cmd.block_size.value))
        while returned_space < int(data_written):
            # try to wait for 4 x 60 secs for aggregation to be completed or
            # else exit the test with a failure.
            if counter > 4:
                self.log.info("Free space before snapshot destroy: %s",
                              nvme_free_space_before_snap_destroy)
                self.log.info("Free space when test terminated: %s",
                              self.get_free_space()[1])
                self.fail("Aggregation did not complete as expected")

            time.sleep(60)
            returned_space = (self.get_free_space()[1] -
                              nvme_free_space_before_snap_destroy)
            counter += 1

        self.log.info("#####Starting FS_COPY Test")
        self.run_dm_activities_with_ior("FS_COPY", pool=self.pool)
        self.log.info("#####Starting DCP Test")
        self.run_dm_activities_with_ior("DCP", pool=self.pool)
        self.log.info("#####Starting DSERIAL Test")
        self.run_dm_activities_with_ior("DSERIAL", pool=self.pool)
        self.log.info("#####Completed all Datamover tests")
        self.container.pop(0)
Пример #8
0
    def test_aggregation_io_small(self):
        """Jira ID: DAOS-3750.

        Test Description:
            Purpose of this test is to run ior wht < 4k transfer size
            and verify the data is initially written into SCM and later
            moved to SSD NV DIMMs.

        :avocado: tags=all,full_regression,hw,large,aggregate,daosio
        :avocado: tags=aggregateiosmall
        """
        # Create pool and container
        self.update_ior_cmd_with_pool()

        # Since the transfer size is 1K, the objects will be inserted
        # into SCM
        scm_index = 0
        ssd_index = 1
        block_size = human_to_bytes(self.params.get("block_size", "/run/ior/*"))
        num_processes = self.params.get("np", "/run/ior/client_processes/*")
        total_ior = block_size * num_processes

        pool_info = self.pool.get_pool_daos_space()
        initial_scm_free_space = pool_info["s_free"][scm_index]
        initial_ssd_free_space = pool_info["s_free"][ssd_index]
        self.log.info(
            "Initial SCM Free Space = {}".format(initial_scm_free_space))
        self.log.info(
            "Initial SSD Free Space = {}".format(initial_ssd_free_space))

        # Disable the aggregation
        self.log.info("Disabling the aggregation")
        self.pool.set_property("reclaim", "disabled")

        # Run ior
        self.run_ior_with_pool()
        pool_info = self.pool.get_pool_daos_space()
        scm_free_space_after_ior = pool_info["s_free"][scm_index]
        ssd_free_space_after_ior = pool_info["s_free"][ssd_index]
        self.log.info(
            "SCM Free Space after ior = {}".format(scm_free_space_after_ior))
        self.log.info(
            "SSD Free Space after ior = {}".format(ssd_free_space_after_ior))

        self.log.info(
            "Comparing if scm space after ior - {} is less than initial free "
            "space - {}".format(
                scm_free_space_after_ior, initial_scm_free_space))
        self.assertLessEqual(
            scm_free_space_after_ior, (initial_scm_free_space - total_ior),
            "SCM free space after IOR > the initial SCM free space")

        self.log.info("Checking that nothing has been moved to SSD")
        self.assertEqual(
            ssd_free_space_after_ior, initial_ssd_free_space,
            "Detected data moved to SSD after running IOR")

        # Enable the aggregation
        self.log.info("Enabling the aggregation")
        self.pool.set_property("reclaim", "time")
        # wait 90 seconds for files to get old enough for aggregation +
        # 90 seconds for aggregation to start and finish
        wait_time = 180
        self.log.info("Waiting for {} seconds".format(wait_time))
        time.sleep(wait_time)

        pool_info = self.pool.get_pool_daos_space()
        scm_free_space_after_aggregate = pool_info["s_free"][scm_index]
        ssd_free_space_after_aggregate = pool_info["s_free"][ssd_index]

        self.log.info("Checking the data is moved to SSD after aggregation")
        self.log.info(
            "{} == {}".format(
                (initial_ssd_free_space - total_ior),
                ssd_free_space_after_aggregate))
        self.assertEqual(
            (initial_ssd_free_space - total_ior),
            ssd_free_space_after_aggregate,
            "No data detected in SSD after aggregation")
        self.log.info("Checking the SCM space is reclaimed")
        self.log.info(
            "{} > {}".format(
                scm_free_space_after_aggregate, scm_free_space_after_ior))
        self.assertGreater(
            scm_free_space_after_aggregate, scm_free_space_after_ior,
            "SCM space has not been reclaimed")
Пример #9
0
    def get_storage_capacity(self, engine_params):
        """Get the configured SCM and NVMe storage per server engine.

        Only sums up capacities of devices that have been specified in the
        server configuration file.

        Args:
            engine_params (list): a list of configuration parameters for each
                engine

        Raises:
            ServerFailed: if output from the dmg storage scan is missing or
                not in the expected format

        Returns:
            dict: a dictionary of each engine's smallest SCM and NVMe storage
                capacity in bytes, e.g.
                    {
                        "scm":  [3183575302144, 6367150604288],
                        "nvme": [1500312748032, 1500312748032]
                    }

        """
        self._check_information("storage", "HostStorage")

        device_capacity = {"nvme": {}, "scm": {}}
        try:
            for entry in self.storage["response"]["HostStorage"].values():
                # Collect a list of sizes for each NVMe device
                if entry["storage"]["nvme_devices"]:
                    for device in entry["storage"]["nvme_devices"]:
                        if device["pci_addr"] not in device_capacity["nvme"]:
                            device_capacity["nvme"][device["pci_addr"]] = []
                        device_capacity["nvme"][device["pci_addr"]].append(0)
                        for namespace in device["namespaces"]:
                            device_capacity["nvme"][device["pci_addr"]][-1] += \
                                namespace["size"]

                # Collect a list of sizes for each SCM device
                if entry["storage"]["scm_namespaces"]:
                    for device in entry["storage"]["scm_namespaces"]:
                        if device["blockdev"] not in device_capacity["scm"]:
                            device_capacity["scm"][device["blockdev"]] = []
                        device_capacity["scm"][device["blockdev"]].append(
                            device["size"])

        except KeyError as error:
            raise ServerFailed(
                "ServerInformation: Error obtaining storage data") from error

        self.log.info("Detected device capacities:")
        for category in sorted(device_capacity):
            for device in sorted(device_capacity[category]):
                sizes = [
                    get_display_size(size)
                    for size in device_capacity[category][device]
                ]
                self.log.info("  %-4s for %s : %s", category.upper(), device,
                              sizes)

        # Determine what storage is currently configured for each engine
        storage_capacity = {"scm": [], "nvme": []}
        for engine_param in engine_params:
            # Get the NVMe storage configuration for this engine
            bdev_list = engine_param.get_value("bdev_list")
            storage_capacity["nvme"].append(0)
            for device in bdev_list:
                if device in device_capacity["nvme"]:
                    storage_capacity["nvme"][-1] += min(
                        device_capacity["nvme"][device])

            # Get the SCM storage configuration for this engine
            scm_size = engine_param.get_value("scm_size")
            scm_list = engine_param.get_value("scm_list")
            if scm_list:
                storage_capacity["scm"].append(0)
                for device in scm_list:
                    scm_dev = os.path.basename(device)
                    if scm_dev in device_capacity["scm"]:
                        storage_capacity["scm"][-1] += min(
                            device_capacity["scm"][scm_dev])
            else:
                storage_capacity["scm"].append(
                    human_to_bytes("{}GB".format(scm_size)))

        self.log.info("Detected engine capacities:")
        for category in sorted(storage_capacity):
            sizes = [
                get_display_size(size) for size in storage_capacity[category]
            ]
            self.log.info("  %-4s : %s", category.upper(), sizes)

        return storage_capacity