def test_staging_file(tmp_dir, dvc): from dvc.data import check from dvc.data.stage import stage from dvc.data.transfer import transfer tmp_dir.gen("foo", "foo") fs = LocalFileSystem() local_odb = dvc.odb.local staging_odb, _, obj = stage(local_odb, (tmp_dir / "foo").fs_path, fs, "md5") assert not local_odb.exists(obj.hash_info) assert staging_odb.exists(obj.hash_info) with pytest.raises(FileNotFoundError): check(local_odb, obj) check(staging_odb, obj) transfer(staging_odb, local_odb, {obj.hash_info}, hardlink=True) check(local_odb, obj) check(staging_odb, obj) path = local_odb.hash_to_path(obj.hash_info.value) assert fs.exists(path)
def pull( self, objs: Iterable["HashInfo"], jobs: Optional[int] = None, remote: Optional[str] = None, odb: Optional["ObjectDB"] = None, ): """Pull data items in a cloud-agnostic way. Args: objs: objects to pull from the cloud. jobs: number of jobs that can be running simultaneously. remote: optional name of remote to pull from. By default remote from core.remote config option is used. odb: optional ODB to pull from. Overrides remote. """ from dvc.data.transfer import transfer if not odb: odb = self.get_remote_odb(remote, "pull") return transfer( odb, self.repo.odb.local, objs, jobs=jobs, src_index=get_index(odb), cache_odb=self.repo.odb.local, verify=odb.verify, )
def test_subrepos_are_ignored(tmp_dir, erepo_dir): subrepo = erepo_dir / "dir" / "subrepo" make_subrepo(subrepo, erepo_dir.scm) with erepo_dir.chdir(): erepo_dir.dvc_gen("dir/foo", "foo", commit="foo") erepo_dir.scm_gen("dir/bar", "bar", commit="bar") with subrepo.chdir(): subrepo.dvc_gen({"file": "file"}, commit="add files on subrepo") with external_repo(os.fspath(erepo_dir)) as repo: repo.repo_fs.download( "dir", os.fspath(tmp_dir / "out"), ) expected_files = {"foo": "foo", "bar": "bar", ".gitignore": "/foo\n"} assert (tmp_dir / "out").read_text() == expected_files # clear cache to test saving to cache cache_dir = tmp_dir / repo.odb.local.cache_dir remove(cache_dir) clean_staging() makedirs(cache_dir) staging, _, obj = stage( repo.odb.local, "dir", repo.repo_fs, "md5", dvcignore=repo.dvcignore, ) transfer( staging, repo.odb.local, {obj.hash_info}, shallow=False, hardlink=True, ) assert set(cache_dir.glob("??/*")) == { cache_dir / "e1" / "d9e8eae5374860ae025ec84cfd85c7.dir", cache_dir / "37" / "b51d194a7513e45b56f6524f2d51f2", cache_dir / "94" / "7d2b84e5aa88170e80dff467a5bfb6", cache_dir / "ac" / "bd18db4cc2f85cedef654fccc4a4d8", }