def test_staging_file(tmp_dir, dvc): from dvc.data import check from dvc.data.stage import stage from dvc.data.transfer import transfer tmp_dir.gen("foo", "foo") fs = LocalFileSystem() local_odb = dvc.odb.local staging_odb, _, obj = stage(local_odb, (tmp_dir / "foo").fs_path, fs, "md5") assert not local_odb.exists(obj.hash_info) assert staging_odb.exists(obj.hash_info) with pytest.raises(FileNotFoundError): check(local_odb, obj) check(staging_odb, obj) transfer(staging_odb, local_odb, {obj.hash_info}, hardlink=True) check(local_odb, obj) check(staging_odb, obj) path = local_odb.hash_to_path(obj.hash_info.value) assert fs.exists(path)
def test_get_hash_dirty_file(tmp_dir, dvc): from dvc.data import check from dvc.data.stage import get_file_hash from dvc.objects.errors import ObjectFormatError tmp_dir.dvc_gen("file", "file") file_hash_info = HashInfo("md5", "8c7dd922ad47494fc02c388e12c00eac") (tmp_dir / "file").write_text("something") something_hash_info = HashInfo("md5", "437b930db84b8079c2dd804a71936b5f") clean_staging() # file is modified in workspace # get_file_hash(file) should return workspace hash, not DVC cached hash fs = RepoFileSystem(repo=dvc) assert fs.info((tmp_dir / "file").fs_path).get("md5") is None staging, _, obj = stage(dvc.odb.local, (tmp_dir / "file").fs_path, fs, "md5") assert obj.hash_info == something_hash_info check(staging, obj) # file is removed in workspace # any staged object referring to modified workspace obj is now invalid (tmp_dir / "file").unlink() with pytest.raises(ObjectFormatError): check(staging, obj) # get_file_hash(file) should return DVC cached hash assert fs.info((tmp_dir / "file").fs_path)["md5"] == file_hash_info.value _, hash_info = get_file_hash((tmp_dir / "file").fs_path, fs, "md5", state=dvc.state) assert hash_info == file_hash_info # tmp_dir/file can be staged even though it is missing in workspace since # repofs will use the DVC cached hash (and refer to the local cache object) _, _, obj = stage(dvc.odb.local, (tmp_dir / "file").fs_path, fs, "md5") assert obj.hash_info == file_hash_info