def test_recovery_unplug_cache_fs(cache_mode, cls, filesystem, direct): """ title: Test for recovery after cache drive removal - test with filesystem. description: | Verify that unflushed data can be safely recovered after, when SSD drive is removed after write completion - test with filesystem. pass_criteria: - CAS recovers successfully after cache drive unplug - No data corruption """ with TestRun.step("Prepare devices"): cache_disk = TestRun.disks['cache'] core_disk = TestRun.disks['core'] cache_disk.create_partitions([Size(2, Unit.GibiByte)]) core_disk.create_partitions([Size(16, Unit.GibiByte)]) cache_device = cache_disk.partitions[0] core_device = core_disk.partitions[0] with TestRun.step("Create test files."): source_file, target_file = create_test_files(test_file_size) source_file_md5 = source_file.md5sum() with TestRun.step("Create filesystem on core device."): core_device.create_filesystem(filesystem) with TestRun.step("Start cache and add core."): cache = casadm.start_cache(cache_device, cache_mode, cls) core = cache.add_core(core_device) with TestRun.step("Mount CAS device."): core.mount(mount_point) with TestRun.step("Copy file to CAS."): copy_file(source=source_file.full_path, target=test_file_path, size=test_file_size, direct="oflag" if direct else None) TestRun.LOGGER.info(str(core.get_statistics())) with TestRun.step("Unmount CAS device."): core.unmount() with TestRun.step("Unplug cache device."): cache_disk.unplug() TestRun.LOGGER.info(f"List caches:\n{casadm.list_caches().stdout}") TestRun.LOGGER.info(f"Dirty blocks on cache: " f"{cache.get_dirty_blocks().get_value(Unit.Blocks4096)}") with TestRun.step("Stop cache."): try: cache.stop() TestRun.fail("Stopping the cache should be aborted without --no-flush flag.") except CmdException as e: TestRun.LOGGER.info(str(e.output)) try: cache.stop(no_data_flush=True) TestRun.LOGGER.warning("Expected stopping cache with errors with --no-flush flag.") except CmdException as e1: cli_messages.check_stderr_msg(e1.output, cli_messages.stop_cache_errors) with TestRun.step("Plug missing cache device."): TestRun.LOGGER.info(str(casadm.list_caches(by_id_path=False))) cache_disk.plug() with TestRun.step("Load cache."): cache = casadm.load_cache(cache_device) TestRun.LOGGER.info(f"Dirty blocks on cache: " f"{cache.get_dirty_blocks().get_value(Unit.Blocks4096)}") with TestRun.step("Stop cache with data flush."): cache.stop() with TestRun.step("Mount core device."): core_device.mount(mount_point) with TestRun.step("Copy file from core device and check md5sum."): copy_file(source=test_file_path, target=target_file.full_path, size=test_file_size, direct="iflag" if direct else None) target_file_md5 = target_file.md5sum() compare_files(source_file_md5, target_file_md5) with TestRun.step("Unmount core device and remove files."): core_device.unmount() try: target_file.remove() source_file.remove() except Exception: # On some OSes files at /tmp location are automatically removed after DUT hard reset pass
def test_recovery_flush_reset_raw(cache_mode): """ title: Recovery after reset during cache flushing - test on raw device. description: | Verify that unflushed data can be safely recovered, when reset was pressed during data flushing on raw device. pass_criteria: - CAS recovers successfully after reboot - No data corruption """ with TestRun.step("Prepare cache and core devices."): cache_disk = TestRun.disks['cache'] core_disk = TestRun.disks['core'] cache_disk.create_partitions([Size(2, Unit.GibiByte)]) core_disk.create_partitions([Size(16, Unit.GibiByte)] * 2) cache_device = cache_disk.partitions[0] core_device = core_disk.partitions[0] core_device_link = core_device.get_device_link("/dev/disk/by-id") cache_device_link = cache_device.get_device_link("/dev/disk/by-id") with TestRun.step("Create test files."): source_file, target_file = create_test_files(test_file_size) with TestRun.step("Setup cache and add core."): cache = casadm.start_cache(cache_device, cache_mode) core = cache.add_core(core_device) cache.set_cleaning_policy(CleaningPolicy.nop) cache.set_seq_cutoff_policy(SeqCutOffPolicy.never) with TestRun.step("Copy file to CAS."): copy_file(source=source_file.full_path, target=core.path, size=test_file_size, direct="oflag") with TestRun.step("Sync and flush buffers."): os_utils.sync() output = TestRun.executor.run(f"hdparm -f {core.path}") if output.exit_code != 0: raise CmdException("Error during hdparm", output) with TestRun.step("Trigger flush."): TestRun.executor.run_in_background( cli.flush_cache_cmd(f"{cache.cache_id}")) with TestRun.step("Hard reset DUT during data flushing."): power_cycle_dut(wait_for_flush_begin=True, core_device=core_device) cache_device.path = cache_device_link.get_target() core_device.path = core_device_link.get_target() with TestRun.step( "Copy file from core and check if current md5sum is different than " "before restart."): copy_file(source=core_device_link.get_target(), target=target_file.full_path, size=test_file_size, direct="iflag") compare_files(source_file, target_file, should_differ=True) with TestRun.step("Load cache."): cache = casadm.load_cache(cache_device) if cache.get_dirty_blocks() == Size.zero(): TestRun.fail("There are no dirty blocks on cache device.") with TestRun.step("Stop cache with dirty data flush."): core_writes_before = core_device.get_io_stats().sectors_written cache.stop() if core_writes_before >= core_device.get_io_stats().sectors_written: TestRun.fail( "No data was flushed after stopping cache started with load option." ) with TestRun.step( "Copy test file from core device to temporary location. " "Compare it with the first version – they should be the same."): copy_file(source=core_device_link.get_target(), target=target_file.full_path, size=test_file_size, direct="iflag") compare_files(source_file, target_file) with TestRun.step("Cleanup core device and remove test files."): target_file.remove() source_file.remove()
def test_recovery_unplug_cache_fs(cache_mode, cls, filesystem, direct): """ title: Test for recovery after cache drive removal - test with filesystem. description: | Verify that unflushed data can be safely recovered after, when SSD drive is removed after write completion - test with filesystem. pass_criteria: - CAS recovers successfully after cache drive unplug - No data corruption """ with TestRun.step("Prepare devices"): cache_disk = TestRun.disks['cache'] core_disk = TestRun.disks['core'] cache_disk.create_partitions([Size(2, Unit.GibiByte)]) core_disk.create_partitions([Size(16, Unit.GibiByte)]) cache_device = cache_disk.partitions[0] core_device = core_disk.partitions[0] with TestRun.step("Create test files."): source_file, target_file = create_test_files(test_file_size) with TestRun.step("Create filesystem on core device."): core_device.create_filesystem(filesystem) with TestRun.step("Start cache and add core."): cache = casadm.start_cache(cache_device, cache_mode, cls) core = cache.add_core(core_device) with TestRun.step("Mount CAS device."): core.mount(mount_point) with TestRun.step("Copy file to CAS."): copy_file(source=source_file.full_path, target=test_file_path, size=test_file_size, direct="oflag" if direct else None) TestRun.LOGGER.info(str(core.get_statistics())) with TestRun.step("Unmount CAS device."): core.unmount() with TestRun.step("Unplug cache device."): cache_disk.unplug() TestRun.LOGGER.info(f"List caches:\n{casadm.list_caches().stdout}") TestRun.LOGGER.info( f"Dirty blocks on cache: " f"{cache.get_dirty_blocks().get_value(Unit.Blocks4096)}") with TestRun.step("Stop cache."): cache.stop() with TestRun.step("Plug missing cache device."): cache_disk.plug() with TestRun.step("Load cache."): cache = casadm.load_cache(cache_device) TestRun.LOGGER.info( f"Dirty blocks on cache: " f"{cache.get_dirty_blocks().get_value(Unit.Blocks4096)}") with TestRun.step("Stop cache with data flush."): cache.stop() with TestRun.step("Mount core device."): core_device.mount(mount_point) with TestRun.step("Copy file from core device and check md5sum."): copy_file(source=test_file_path, target=target_file.full_path, size=test_file_size, direct="iflag" if direct else None) compare_files(source_file, target_file) with TestRun.step("Unmount core device and remove files."): core_device.unmount() target_file.remove() source_file.remove()
def test_recovery_flush_reset_fs(cache_mode, fs): """ title: Recovery after reset during cache flushing - test on filesystem. description: | Verify that unflushed data can be safely recovered, when reset was pressed during data flushing on filesystem. pass_criteria: - CAS recovers successfully after reboot - No data corruption """ with TestRun.step("Prepare cache and core devices."): cache_disk = TestRun.disks['cache'] core_disk = TestRun.disks['core'] cache_disk.create_partitions([Size(2, Unit.GibiByte)]) core_disk.create_partitions([Size(16, Unit.GibiByte)] * 2) cache_device = cache_disk.partitions[0] core_device = core_disk.partitions[0] core_device_link = core_device.get_device_link("/dev/disk/by-id") cache_device_link = cache_device.get_device_link("/dev/disk/by-id") with TestRun.step(f"Create {fs} filesystem on core."): core_device.create_filesystem(fs) with TestRun.step("Create test files."): source_file, target_file = create_test_files(test_file_size) with TestRun.step("Setup cache and add core."): cache = casadm.start_cache(cache_device, cache_mode) Udev.disable() core = cache.add_core(core_device) cache.set_cleaning_policy(CleaningPolicy.nop) cache.set_seq_cutoff_policy(SeqCutOffPolicy.never) with TestRun.step("Mount CAS device."): core.mount(mount_point) with TestRun.step("Copy file to CAS."): copy_file(source=source_file.full_path, target=os.path.join(mount_point, "source_test_file"), size=test_file_size, direct="oflag") with TestRun.step("Unmount CAS device."): core.unmount() with TestRun.step("Trigger flush."): TestRun.executor.run_in_background( cli.flush_cache_cmd(f"{cache.cache_id}")) with TestRun.step("Hard reset DUT during data flushing."): power_cycle_dut(True, core_device) cache_device.path = cache_device_link.get_target() core_device.path = core_device_link.get_target() with TestRun.step("Load cache."): cache = casadm.load_cache(cache_device) if cache.get_dirty_blocks() == Size.zero(): TestRun.fail("There are no dirty blocks on cache device.") with TestRun.step("Stop cache with dirty data flush."): core_writes_before = core_device.get_io_stats().sectors_written cache.stop() if core_writes_before >= core_device.get_io_stats().sectors_written: TestRun.fail( "No data was flushed after stopping cache started with load option." ) with TestRun.step("Mount core device."): core_device.mount(mount_point) with TestRun.step( "Copy test file from core device to temporary location. " "Compare it with the first version – they should be the same."): copy_file(source=os.path.join(mount_point, "source_test_file"), target=target_file.full_path, size=test_file_size, direct="iflag") compare_files(source_file, target_file) with TestRun.step("Unmount core device and remove test files."): core_device.unmount() target_file.remove() source_file.remove() Udev.enable()
def test_recovery_unplug_cache_raw(cache_mode, cls): """ title: Test for recovery after cache drive removal - test on raw device. description: | Verify that unflushed data can be safely recovered after, when SSD drive is removed after write completion - test on raw device. pass_criteria: - CAS recovers successfully after cache drive unplug - No data corruption """ with TestRun.step("Prepare devices"): cache_disk = TestRun.disks['cache'] core_disk = TestRun.disks['core'] cache_disk.create_partitions([Size(2, Unit.GibiByte)]) core_disk.create_partitions([Size(16, Unit.GibiByte)]) cache_device = cache_disk.partitions[0] core_device = core_disk.partitions[0] with TestRun.step("Create test files."): source_file, target_file = create_test_files(test_file_size) source_file_md5 = source_file.md5sum() with TestRun.step("Start cache and add core."): cache = casadm.start_cache(cache_device, cache_mode, cls) core = cache.add_core(core_device) with TestRun.step("Copy file to CAS."): copy_file(source=source_file.full_path, target=core.path, size=test_file_size, direct="oflag") TestRun.LOGGER.info(str(core.get_statistics())) with TestRun.step("Unplug cache device."): cache_disk.unplug() TestRun.LOGGER.info(f"List caches:\n{casadm.list_caches().stdout}") TestRun.LOGGER.info( f"Dirty blocks on cache: " f"{cache.get_dirty_blocks().get_value(Unit.Blocks4096)}") with TestRun.step("Stop cache."): cache.stop() with TestRun.step("Plug missing cache device."): cache_disk.plug() with TestRun.step("Load cache."): cache = casadm.load_cache(cache_device) TestRun.LOGGER.info( f"Dirty blocks on cache: " f"{cache.get_dirty_blocks().get_value(Unit.Blocks4096)}") with TestRun.step("Stop cache with data flush."): cache.stop() with TestRun.step("Copy file from core device and check md5sum."): copy_file(source=core_device.path, target=target_file.full_path, size=test_file_size, direct="iflag") target_file_md5 = target_file.md5sum() compare_files(source_file_md5, target_file_md5) with TestRun.step("Cleanup core device and remove test files."): try: target_file.remove() source_file.remove() except Exception: # On some OSes files at /tmp location are automatically removed after DUT hard reset pass