def wait_for_flushing(cache: Cache, core: Core, timeout: timedelta = timedelta(seconds=30)): start_time = datetime.now() while datetime.now() - start_time < timeout: try: casadm_parser.get_flushing_progress(cache.cache_id, core.core_id) return except CmdException: continue raise Exception("Management flush not started!")
def test_concurrent_cores_flush(cache_mode): """ title: Fail to flush two cores simultaneously. description: | CAS should return an error on attempt to flush second core if there is already one flush in progress. pass_criteria: - No system crash. - First core flushing should finish successfully. - It should not be possible to run flushing command on cores within the same cache simultaneously. """ with TestRun.step("Prepare cache and core."): cache_dev = TestRun.disks['cache'] cache_dev.create_partitions([cache_size]) cache_part = cache_dev.partitions[0] core_dev = TestRun.disks['core'] core_dev.create_partitions([cache_size * 2] * 2) core_part1 = core_dev.partitions[0] core_part2 = core_dev.partitions[1] with TestRun.step("Start cache."): cache = casadm.start_cache(cache_part, cache_mode, force=True) with TestRun.step("Disable cleaning and sequential cutoff."): cache.set_cleaning_policy(CleaningPolicy.nop) cache.set_seq_cutoff_policy(SeqCutOffPolicy.never) with TestRun.step(f"Add both core devices to cache."): core1 = cache.add_core(core_part1) core2 = cache.add_core(core_part2) with TestRun.step("Run workload on concurrent cores."): block_size = Size(4, Unit.MebiByte) count = int(cache_size.value / 2 / block_size.value) dd_pid = Dd().output(core1.path) \ .input("/dev/urandom") \ .block_size(block_size) \ .count(count) \ .run_in_background() Dd().output(core2.path) \ .input("/dev/urandom") \ .block_size(block_size) \ .count(count) \ .run() with TestRun.step("Check if both DD operations finished."): while TestRun.executor.run(f"ls /proc/{dd_pid}").exit_code == 0: sleep(1) with TestRun.step("Check if both cores contain dirty blocks."): if int(core1.get_dirty_blocks()) == 0: TestRun.fail("The first core does not contain dirty blocks.") if int(core2.get_dirty_blocks()) == 0: TestRun.fail("The second core does not contain dirty blocks.") core2_dirty_blocks_before = int(core2.get_dirty_blocks()) with TestRun.step("Start flushing the first core."): TestRun.executor.run_in_background( cli.flush_core_cmd(str(cache.cache_id), str(core1.core_id)) ) with TestRun.step("Wait some time and start flushing the second core."): sleep(2) percentage = casadm_parser.get_flushing_progress(cache.cache_id, core1.core_id) while percentage < 40: percentage = casadm_parser.get_flushing_progress(cache.cache_id, core1.core_id) try: core2.flush_core() TestRun.fail("The first core is flushing right now so flush attempt of the second core " "should fail.") except CmdException: TestRun.LOGGER.info("The first core is flushing right now so the second core's flush " "fails as expected.") with TestRun.step("Wait for the first core to finish flushing."): try: percentage = casadm_parser.get_flushing_progress(cache.cache_id, core1.core_id) while percentage < 100: percentage = casadm_parser.get_flushing_progress(cache.cache_id, core1.core_id) except CmdException: TestRun.LOGGER.info("The first core is not flushing dirty data anymore.") with TestRun.step("Check number of dirty data on both cores."): if int(core1.get_dirty_blocks()) > 0: TestRun.LOGGER.error("The quantity of dirty cache lines on the first core " "after completed flush should be zero.") core2_dirty_blocks_after = int(core2.get_dirty_blocks()) if core2_dirty_blocks_before != core2_dirty_blocks_after: TestRun.LOGGER.error("The quantity of dirty cache lines on the second core " "after failed flush should not change.") with TestRun.step("Stop cache."): cache.stop()
def test_interrupt_cache_stop(cache_mode, filesystem): """ title: Test if OpenCAS works correctly after cache stopping interruption. description: | Negative test of the ability of OpenCAS to handle cache's stop interruption. pass_criteria: - No system crash. - Flushing would be stopped after interruption. - Md5sum are correct during all test steps. - Dirty blocks quantity after interruption is lower but non-zero. """ with TestRun.step("Prepare cache and core."): cache_part, core_part = prepare() for _ in TestRun.iteration( range(iterations_per_config), f"Reload cache configuration {iterations_per_config} times."): with TestRun.step("Start cache."): cache = casadm.start_cache(cache_part, cache_mode, force=True) with TestRun.step("Set cleaning policy to NOP."): cache.set_cleaning_policy(CleaningPolicy.nop) with TestRun.step( f"Add core device with {filesystem} filesystem and mount it."): core_part.create_filesystem(filesystem) core = cache.add_core(core_part) core.mount(mount_point) with TestRun.step( f"Create test file in mount point of exported object."): test_file = create_test_file() with TestRun.step("Check md5 sum of test file."): test_file_md5sum_before = test_file.md5sum() with TestRun.step( "Get number of dirty data on exported object before interruption." ): os_utils.sync() os_utils.drop_caches(DropCachesMode.ALL) cache_dirty_blocks_before = cache.get_dirty_blocks() with TestRun.step("Unmount core."): core.unmount() with TestRun.step("Start stopping cache."): flush_pid = TestRun.executor.run_in_background( cli.stop_cmd(str(cache.cache_id))) sleep(2) with TestRun.step("Interrupt cache stopping."): percentage = casadm_parser.get_flushing_progress( cache.cache_id, core.core_id) while percentage < 50: percentage = casadm_parser.get_flushing_progress( cache.cache_id, core.core_id) TestRun.executor.run(f"kill -s SIGINT {flush_pid}") with TestRun.step( "Check number of dirty data on exported object after interruption." ): cache_dirty_blocks_after = cache.get_dirty_blocks() if cache_dirty_blocks_after >= cache_dirty_blocks_before: TestRun.LOGGER.error( "Quantity of dirty lines after cache stop interruption " "should be lower.") if int(cache_dirty_blocks_after) == 0: TestRun.LOGGER.error( "Quantity of dirty lines after cache stop interruption " "should not be zero.") with TestRun.step("Stop cache."): cache.stop() with TestRun.step("Mount core device."): core_part.mount(mount_point) with TestRun.step("Check md5 sum of test file again."): if test_file_md5sum_before != test_file.md5sum(): TestRun.LOGGER.error("Md5 sums before and after interrupting" " cache stop are different.") with TestRun.step("Unmount core device."): core_part.unmount()
def test_interrupt_cache_mode_switch_delayed(cache_mode, filesystem): """ title: Test if OpenCAS works correctly after cache mode switching delayed interruption. description: | Negative test of the ability of OpenCAS to handle cache mode switching interruption with delay. pass_criteria: - No system crash. - Cache mode cannot be interrupted with delay. - Md5sum are correct during all test steps. - Dirty blocks quantity after cache mode switching is zero. """ with TestRun.step("Prepare cache and core."): cache_part, core_part = prepare() for _ in TestRun.iteration( range(iterations_per_config), f"Reload cache configuration {iterations_per_config} times."): with TestRun.step("Start cache."): cache = casadm.start_cache(cache_part, cache_mode, force=True) with TestRun.step("Set cleaning policy to NOP."): cache.set_cleaning_policy(CleaningPolicy.nop) with TestRun.step( f"Add core device with {filesystem} filesystem and mount it."): core_part.create_filesystem(filesystem) core = cache.add_core(core_part) core.mount(mount_point) with TestRun.step( f"Create test file in mount point of exported object."): test_file = create_test_file() with TestRun.step("Check md5 sum of test file."): test_file_md5sum_before = test_file.md5sum() with TestRun.step("Start switching cache mode."): flush_pid = TestRun.executor.run_in_background( cli.set_cache_mode_cmd(str(CacheMode.DEFAULT.name.lower()), str(cache.cache_id), "yes")) sleep(2) with TestRun.step("Send interruption signal."): percentage = casadm_parser.get_flushing_progress( cache.cache_id, core.core_id) while percentage < 50: percentage = casadm_parser.get_flushing_progress( cache.cache_id, core.core_id) TestRun.executor.run(f"kill -s SIGINT {flush_pid}") with TestRun.step( "Get quantity of dirty data on exported object after sending interruption " "signal to cas to stop mode switching."): if int(cache.get_dirty_blocks()) != 0: TestRun.LOGGER.error( "Quantity of dirty lines should be zero now.") with TestRun.step("Check cache mode."): if cache.get_cache_mode() == cache_mode: TestRun.LOGGER.error("Cache mode should have changed.") with TestRun.step("Unmount core and stop cache."): core.unmount() cache.stop() with TestRun.step("Mount core device."): core_part.mount(mount_point) with TestRun.step("Check md5 sum of test file again."): if test_file_md5sum_before != test_file.md5sum(): TestRun.LOGGER.error( "Md5 sums before and after interrupting core removal are different." ) with TestRun.step("Unmount core device."): core_part.unmount()