def test_move_when_corrupt_exists_at_dest(global_integration_cli_args, test_dataset: DatasetForTests, other_dataset: DatasetForTests, destination_path): """ Move a dataset to a location that already exists but is invalid. It should see that the destination is corrupt and skip the move. """ test_dataset.add_to_index() other_dataset.add_to_index() expected_new_path: Path = destination_path.joinpath( *test_dataset.path_offset) # Create a corrupt dataset at destination expected_new_path.parent.mkdir(parents=True) expected_new_path.write_text("invalid") original_index = freeze_index(test_dataset.collection.index_) # Move one path to destination_path res = _call_move(['--destination', destination_path, test_dataset.path], global_integration_cli_args) # Move script should have completed, but dataset should have been skipped. assert res.exit_code == 0, res.output print(res.output) now_index = freeze_index(test_dataset.collection.index_) assert original_index == now_index assert test_dataset.path.exists()
def test_remove_missing(test_dataset: DatasetForTests, integration_test_data: Path, other_dataset: DatasetForTests): """An on-disk dataset that's not indexed should be trashed when trash_missing=True""" register_base_directory(integration_test_data) trashed_path = test_dataset.base_path.joinpath(*_TRASH_PREFIX, *test_dataset.path_offset) # Add a second dataset that's indexed. Should not be touched! other_dataset.add_to_index() assert other_dataset.path.exists() assert test_dataset.path.exists(), "On-disk location should exist before test begins." assert not trashed_path.exists(), "Trashed file shouldn't exit." _check_sync( collection=test_dataset.collection, expected_paths=[ test_dataset.uri, other_dataset.path.as_uri(), ], expected_mismatches=[ mm.DatasetNotIndexed(test_dataset.dataset, test_dataset.uri) ], # Unmodified index expected_index_result=freeze_index(test_dataset.collection.index_), cache_path=integration_test_data, fix_settings=dict(trash_missing=True, update_locations=True) ) assert not test_dataset.path.exists(), "On-disk location should exist before test begins." assert trashed_path.exists(), "Trashed file shouldn't exit." assert other_dataset.path.exists(), "Dataset outside of collection folder shouldn't be touched"
def _check_mismatch_fix(index: Index, mismatches: Iterable[Mismatch], expected_index_result: Mapping[DatasetLite, Iterable[str]], fix_settings: dict): """Check that the index is correctly updated when fixing mismatches""" # First check that no change is made to the index if we have all fixes set to False. starting_index = freeze_index(index) # Default settings are all false. fixes.fix_mismatches(mismatches, index) assert starting_index == freeze_index(index), "Changes made to index despite all fix settings being " \ "false (index_missing=False etc)" # Now perform fixes, check that they match expected. fixes.fix_mismatches(mismatches, index, **fix_settings) assert expected_index_result == freeze_index(index)
def test_detect_corrupt_existing(test_dataset: DatasetForTests, integration_test_data: Path): # type: (Tuple[Collection, str, str, Path]) -> None """If a dataset exists but cannot be read, report as corrupt""" path = uri_to_local_path(test_dataset.uri) test_dataset.add_to_index() assert path.exists() # Overwrite with corrupted file. os.unlink(str(path)) with path.open('w') as f: f.write('corruption!') assert path.exists() # Another dataset exists in the same location _check_sync( collection=test_dataset.collection, expected_paths=[test_dataset.uri], expected_mismatches=[ # We don't know if it's the same dataset mm.UnreadableDataset(None, test_dataset.uri) ], # Unmodified index expected_index_result=freeze_index(test_dataset.collection.index_), cache_path=integration_test_data, fix_settings=dict(trash_missing=True, trash_archived=True, update_locations=True) ) # If a dataset is in the index pointing to the corrupt location, it shouldn't be trashed with trash_archived=True assert path.exists(), "Corrupt dataset with sibling in index should not be trashed"