Пример #1
0
class DaosSnapshotTest(TestWithServers):
    """
    Test Class Description:
        Test daos container create-snap, list-snaps, and destroy-snap. Test
        steps:
        1. Create snapshots. Obtain the epoch value each time we create a
            snapshot.
        2. List the snapshots, obtain the epoch values, and compare against
            those returned during create-snap.
        3. Destroy all the snapshots one by one.
        4. List and verify that there's no snapshot.

        Test destroy-snap --epcrange.
        1. Create snapshots.
        2. List the snapshots, obtain the epoch values, and compare against
            those returned during create-snap.
        3. Destroy all the snapshots with --epcrange. Use the first epoch for B
            and the last epoch for E.
        3. List and verify that there's no snapshot.

    Note that we keep the test steps basic due to time constraint. Add more
    cases if we see bugs around this feature.

    :avocado: recursive
    """
    def __init__(self, *args, **kwargs):
        """Initialize a DaosSnapshotTest object."""
        super(DaosSnapshotTest, self).__init__(*args, **kwargs)
        self.daos_cmd = None

    def create_snapshot(self, pool_uuid, cont_uuid, count):
        """Create snapshots and return the epoch values obtained from stdout.

        Args:
            pool_uuid (str): Pool UUID.
            cont_uuid (str): Container UUID.
            count (int): Number of snapshots to create.

        Returns:
            list: Epochs obtained from stdout.
        """
        epochs = []
        for _ in range(count):
            epochs.append(
                self.daos_cmd.container_create_snap(pool=pool_uuid,
                                                    cont=cont_uuid)["epoch"])
        return epochs

    def prepare_pool_container(self):
        """Create a pool and a container and prepare for the test cases.
        """
        self.daos_cmd = DaosCommand(self.bin)
        self.add_pool(connect=False)
        self.add_container(self.pool)

    def create_verify_snapshots(self, count):
        """Create and list to verify that the snapshots are created.

        Args:
            count (int): Number of snapshots to create.

        Returns:
            list: Epoch of snapshots created.
        """
        # Create 5 snapshots.
        expected_epochs = self.create_snapshot(pool_uuid=self.pool.uuid,
                                               cont_uuid=self.container.uuid,
                                               count=count)
        expected_epochs.sort()
        self.log.info("Expected Epochs = {}".format(expected_epochs))

        # List the snapshots and verify their epochs.
        actual_epochs = self.daos_cmd.container_list_snaps(
            pool=self.pool.uuid, cont=self.container.uuid)["epochs"]
        actual_epochs.sort()
        self.log.info("Actual Epochs = {}".format(actual_epochs))
        self.assertEqual(expected_epochs, actual_epochs)

        return actual_epochs

    def test_create_list_delete(self):
        """JIRA ID: DAOS-4872

        Test Description:
            Test daos container create-snap, list-snaps, destroy-snap

        Use Cases:
            See test cases in the class description.

        :avocado: tags=all,small,control,full_regression,daos_snapshot
        """
        self.prepare_pool_container()

        # Create snapshots.
        snapshot_count = self.params.get("snapshot_count",
                                         "/run/stress_test/*/")
        self.log.info("Creating {} snapshots".format(snapshot_count))
        actual_epochs = self.create_verify_snapshots(snapshot_count)

        # Destroy all the snapshots.
        for epoch in actual_epochs:
            self.daos_cmd.container_destroy_snap(pool=self.pool.uuid,
                                                 cont=self.container.uuid,
                                                 epc=epoch)

        # List and verify that there's no snapshot.
        epochs = self.daos_cmd.container_list_snaps(pool=self.pool.uuid,
                                                    cont=self.container.uuid)
        self.assertTrue(not epochs)

    @skipForTicket("DAOS-4691")
    def test_epcrange(self):
        """JIRA ID: DAOS-4872

        Test Description:
            Test --epcrange. See class description.

        Use Cases:
            See class description.
        
        :avocado: tags=all,small,container,full_regression,daos_snapshot_range
        """
        self.prepare_pool_container()

        # Create snapshots.
        snapshot_count = self.params.get("snapshot_count",
                                         "/run/stress_test/*/")
        self.log.info("Creating {} snapshots".format(snapshot_count))
        actual_epochs = self.create_verify_snapshots(snapshot_count)

        # Destroy all snapshots with --epcrange.
        epcrange = "{}-{}".format(actual_epochs[0], actual_epochs[-1])
        self.daos_cmd.container_destroy_snap(pool=self.pool.uuid,
                                             cont=self.container.uuid,
                                             epcrange=epcrange)

        # List and verify that there's no snapshot.
        epochs = self.daos_cmd.container_list_snaps(pool=self.pool.uuid,
                                                    cont=self.container.uuid)
        self.assertTrue(not epochs)
Пример #2
0
    def test_snapshot_aggregation(self):
        """JIRA ID: DAOS-3751.

        Test Description:
            Verify snapshot aggregation with 2 servers and 6 clients (CI limit).
            Write the same data to the pool twice.  Create a snapshot between
            the writes and confirm that deleting the snapshot reduces the pool
            capacity by half.

        :avocado: tags=all,pr,daily_regression
        :avocado: tags=hw,large
        :avocado: tags=container,snap
        :avocado: tags=snapshot_aggregation
        """
        self.dmg = self.get_dmg_command()
        daos = DaosCommand(self.bin)

        # Create a pool and a container that spans the 2 servers.
        self.update_ior_cmd_with_pool()
        self.pool.get_info()
        self.pool.set_query_data()
        self.update_free_space()
        self.log.info(
            "Pool free space before writes:\n  SCM:  %s\n  NVMe: %s",
            self.free_space["scm"][-1], self.free_space["nvme"][-1])

        # Disable the aggregation
        self.pool.set_property("reclaim", "disabled")

        # Run an IOR job that writes >4k sequential blocks for a few minutes
        self.processes = len(self.hostlist_clients)
        manager = self.get_ior_job_manager_command()
        self.run_ior(manager, self.processes)

        # Get the capacity of the pool after running IOR.
        self.update_free_space()
        self.log.info(
            "Pool free space after first write:\n  SCM:  %s\n  NVMe: %s",
            self.free_space["scm"][-1], self.free_space["nvme"][-1])
        self.assertLess(
            self.free_space["scm"][1]["api"],
            self.free_space["scm"][0]["api"],
            "SCM free pool space was not reduced by the initial write")
        self.assertLess(
            self.free_space["nvme"][1]["api"],
            self.free_space["nvme"][0]["api"],
            "NVMe free pool space was not reduced by the initial write")

        # Create a snapshot of the container once the IOR job completes.
        self.container.create_snap()
        self.log.info("Created container snapshot: %s", self.container.epoch)

        # Run the same IOR job to cause an overwrite.
        self.ior_cmd.signature.value += 333
        self.run_ior(self.get_ior_job_manager_command(), self.processes)

        # Verify that the utilized capacity of the pool has increased.
        self.update_free_space()
        self.log.info(
            "Pool free space after second write:\n  SCM:  %s\n  NVMe: %s",
            self.free_space["scm"][-1], self.free_space["nvme"][-1])
        self.assertLess(
            self.free_space["scm"][2]["api"],
            self.free_space["scm"][1]["api"],
            "SCM free pool space was not reduced by the overwrite")
        self.assertLess(
            self.free_space["nvme"][2]["api"],
            self.free_space["nvme"][1]["api"],
            "NVMe free pool space was not reduced by the overwrite")

        # Enable the aggregation
        # Test UUID.
        self.pool.use_label = False
        self.pool.set_property("reclaim", "time")
        self.pool.use_label = True

        # Delete the snapshot.
        daos.container_destroy_snap(
            pool=self.pool.uuid,
            cont=self.container.uuid, epc=self.container.epoch)

        # Wait for aggregation to start and finish.
        space_reclaimed = False
        time_exceeded = False
        sleep_time = 20
        loop_count = 0
        while not space_reclaimed and not time_exceeded:
            loop_count += 1
            self.log.info(
                "Waiting for %s seconds for aggregation to finish - loop %s",
                sleep_time, loop_count)
            time.sleep(sleep_time)

            # Update the utilized capacity of the pool
            self.pool.get_info()
            self.pool.set_query_data()
            self.update_free_space()
            self.log.info(
                "Pool free space %s seconds after deleting the snapshot:"
                "\n  SCM:  %s\n  NVMe: %s", sleep_time * loop_count,
                self.free_space["scm"][-1], self.free_space["nvme"][-1])

            # Determine if the utilized NVMe capacity of the pool has been
            # reduced back to the capacity after the first ior write
            space_reclaimed = self.free_space["nvme"][1]["api"] == \
                self.free_space["nvme"][-1]["api"]

            # Determine if the time has exceeded
            time_exceeded = sleep_time * loop_count > 140

        if not space_reclaimed:
            self.fail(
                "Pool free space was not restored by the aggregation after "
                "snapshot deletion")

        self.log.info(
            "Pool free space restored by the aggregation after snapshot "
            "deletion")