def test(self): """ Making sure that 'remote' syntax is handled properly for local outs. """ cwd = os.getcwd() remote = "myremote" ret = main(["remote", "add", remote, cwd]) self.assertEqual(ret, 0) self.dvc = DvcRepo() foo = f"remote://{remote}/{self.FOO}" ret = main(["add", foo]) self.assertEqual(ret, 0) d = load_yaml("foo.dvc") self.assertEqual(d["outs"][0]["path"], foo) bar = os.path.join(cwd, self.BAR) ret = main(["add", bar]) self.assertEqual(ret, 0) d = load_yaml("bar.dvc") self.assertEqual(d["outs"][0]["path"], self.BAR)
def test(self): """ Check that adding/removing metrics doesn't affect stage state """ stages = self.dvc.add(self.FOO) self.assertEqual(len(stages), 1) self.assertTrue(stages[0] is not None) file1 = "file1" file1_stage = file1 + ".dvc" self.dvc.run( fname=file1_stage, outs_no_cache=[file1], deps=[self.FOO, self.CODE], cmd=f"python {self.CODE} {self.FOO} {file1}", single_stage=True, ) stages = self.dvc.reproduce(file1_stage) self.assertEqual(len(stages), 0) d = load_yaml(file1_stage) d["outs"][0]["metric"] = True dump_yaml(file1_stage, d) stages = self.dvc.reproduce(file1_stage) self.assertEqual(len(stages), 0) d = load_yaml(file1_stage) d["outs"][0]["metric"] = False dump_yaml(file1_stage, d) stages = self.dvc.reproduce(file1_stage) self.assertEqual(len(stages), 0)
def test_parse_yaml_invalid_unicode(tmp_dir): filename = "invalid_utf8.yaml" tmp_dir.gen(filename, b"\x80some: stuff") with pytest.raises(EncodingError) as excinfo: load_yaml(tmp_dir / filename) assert filename in excinfo.value.path assert excinfo.value.encoding == "utf-8"
def test_commit_changed_md5(tmp_dir, dvc): tmp_dir.gen({"file": "file content"}) (stage, ) = dvc.add("file", no_commit=True) stage_file_content = load_yaml(stage.path) stage_file_content["md5"] = "1111111111" dump_yaml(stage.path, stage_file_content) with pytest.raises(StageCommitError): dvc.commit(stage.path) dvc.commit(stage.path, force=True) assert "md5" not in load_yaml(stage.path)
def test_default_wdir_is_not_written(self): stage = self.dvc.run( cmd=f"echo test > {self.FOO}", outs=[self.FOO], wdir=".", single_stage=True, ) d = load_yaml(stage.relpath) self.assertNotIn(Stage.PARAM_WDIR, d.keys()) stage = self.dvc.run( cmd=f"echo test > {self.BAR}", outs=[self.BAR], single_stage=True ) d = load_yaml(stage.relpath) self.assertNotIn(Stage.PARAM_WDIR, d.keys())
def test_meta_is_preserved(tmp_dir, dvc): (stage,) = tmp_dir.dvc_gen("foo", "foo content") # Add meta to DVC-file data = load_yaml(stage.path) data["meta"] = {"custom_key": 42} dump_yaml(stage.path, data) # Loading and dumping to test that it works and meta is retained dvcfile = SingleStageFile(dvc, stage.path) new_stage = dvcfile.stage dvcfile.dump(new_stage) new_data = load_yaml(stage.path) assert new_data["meta"] == data["meta"]
def test(self): data_stage_file = self.DATA + DVC_FILE_SUFFIX ret = main(["add", self.DATA]) self.assertEqual(ret, 0) self.assertTrue(os.path.exists(data_stage_file)) new_data_dir = "data_dir2" os.makedirs(new_data_dir) ret = main(["move", self.DATA, new_data_dir]) self.assertEqual(ret, 0) new_data_path = os.path.join(new_data_dir, os.path.basename(self.DATA)) new_data_stage_file = new_data_path + DVC_FILE_SUFFIX self.assertFalse(os.path.exists(self.DATA)) self.assertFalse(os.path.exists(data_stage_file)) self.assertTrue(os.path.exists(new_data_path)) self.assertTrue(os.path.exists(new_data_stage_file)) new_stage_file = load_yaml(new_data_stage_file) self.assertEqual( os.path.basename(self.DATA), new_stage_file["outs"][0]["path"] )
def test_init(tmp_dir, dvc): tmp_dir.gen({ CmdExperimentsInit.CODE: { "copy.py": "" }, "data": "data", "params.yaml": '{"foo": 1}', "dvclive": {}, "plots": {}, }) code_path = os.path.join(CmdExperimentsInit.CODE, "copy.py") script = f"python {code_path}" assert main(["exp", "init", script]) == 0 assert load_yaml(tmp_dir / "dvc.yaml") == { "stages": { "default": { "cmd": script, "deps": ["data", "src"], "metrics": [{ "metrics.json": { "cache": False } }], "outs": ["models"], "params": ["foo"], "plots": [{ "plots": { "cache": False } }], } } }
def test(self): # Use copy to test for changes in the inodes ret = main(["config", "cache.type", "copy"]) self.assertEqual(ret, 0) ret = main(["add", self.DATA_DIR]) self.assertEqual(0, ret) stage_path = self.DATA_DIR + DVC_FILE_SUFFIX stage = load_yaml(stage_path) staged_files = self.outs_info(stage) # move instead of remove, to lock inode assigned to stage_files[0].path # if we were to use remove, we might end up with same inode assigned to # newly checked out file shutil.move(staged_files[0].path, "random_name") ret = main(["checkout", "--force", stage_path]) self.assertEqual(ret, 0) checkedout_files = self.outs_info(stage) self.assertEqual(len(staged_files), len(checkedout_files)) self.assertEqual(staged_files[0].path, checkedout_files[0].path) self.assertNotEqual(staged_files[0].inode, checkedout_files[0].inode) self.assertEqual(staged_files[1].inode, checkedout_files[1].inode)
def test(self): d = load_yaml(self.file1_stage) del d[Stage.PARAM_OUTS][0][LocalFileSystem.PARAM_CHECKSUM] del d[Stage.PARAM_DEPS][0][LocalFileSystem.PARAM_CHECKSUM] dump_yaml(self.file1_stage, d) stages = self.dvc.reproduce(self.file1_stage) self.assertEqual(len(stages), 1)
def test(self): d = load_yaml(self.file1_stage) del d[Stage.PARAM_OUTS][0][LocalTree.PARAM_CHECKSUM] del d[Stage.PARAM_DEPS][0][LocalTree.PARAM_CHECKSUM] dump_yaml(self.file1_stage, d) with pytest.raises(CheckoutError): self.dvc.checkout(force=True)
def test_desc_is_preserved(tmp_dir, dvc): (stage,) = tmp_dir.dvc_gen("foo", "foo content") data = load_yaml(stage.path) stage_desc = "test stage description" out_desc = "test out description" data["desc"] = stage_desc data["outs"][0]["desc"] = out_desc dump_yaml(stage.path, data) dvcfile = SingleStageFile(dvc, stage.path) new_stage = dvcfile.stage dvcfile.dump(new_stage) new_data = load_yaml(stage.path) assert new_data["desc"] == stage_desc assert new_data["outs"][0]["desc"] == out_desc
def test_migrates_v1_lockfile_to_v2_during_dump(tmp_dir, dvc, v1_repo_lock, caplog): caplog.clear() with caplog.at_level(logging.INFO, logger="dvc.dvcfile"): assert dvc.reproduce() assert "Migrating lock file 'dvc.lock' from v1 to v2" in caplog.messages d = load_yaml(tmp_dir / "dvc.lock") assert d == {"stages": v1_repo_lock, "schema": "2.0"}
def test_params_with_false_values(tmp_dir, dvc, param_value): """These falsy params values should not ignored by `status` on loading.""" key = "param" dep = ParamsDependency(Stage(dvc), DEFAULT_PARAMS_FILE, [key]) (tmp_dir / DEFAULT_PARAMS_FILE).write_text(f"{key}: {param_value}") dep.fill_values(load_yaml(DEFAULT_PARAMS_FILE)) assert dep.status() == {}
def load(self): if not self.exists(): return {} data = load_yaml(self.path, fs=self.repo.fs) try: data = self.validate(data, fname=self.relpath) except StageFileFormatError as exc: raise LockfileCorruptedError( f"Lockfile '{self.relpath}' is corrupted.") from exc return data
def _load_cache(self, key, value): path = self._get_cache_path(key, value) try: return COMPILED_LOCK_FILE_STAGE_SCHEMA(load_yaml(path)) except FileNotFoundError: return None except (YAMLFileCorruptedError, Invalid): logger.warning("corrupted cache file '%s'.", relpath(path)) os.unlink(path) return None
def load(self): if not self.exists(): return {} data = load_yaml(self.path, tree=self.repo.tree) try: self.validate(data, fname=self.relpath) except StageFileFormatError: raise LockfileCorruptedError( f"Lockfile '{self.relpath}' is corrupted.") return data
def test(self): stages = self.dvc.add(self.FOO) self.assertEqual(len(stages), 1) stage = stages[0] self.assertTrue(stage is not None) d = load_yaml(stage.relpath) # NOTE: checking that reloaded stage didn't change its checksum md5 = "11111111111111111111111111111111" d[stage.PARAM_MD5] = md5 dump_yaml(stage.relpath, d) dvcfile = SingleStageFile(self.dvc, stage.relpath) stage = dvcfile.stage self.assertTrue(stage is not None) dvcfile.dump(stage) d = load_yaml(stage.relpath) self.assertEqual(d[stage.PARAM_MD5], md5)
def _test(self): data_file_name, link_name = self._prepare_external_data() ret = main(["add", os.path.join(link_name, data_file_name)]) self.assertEqual(0, ret) stage_file = data_file_name + DVC_FILE_SUFFIX self.assertTrue(os.path.exists(stage_file)) d = load_yaml(stage_file) relative_data_path = posixpath.join(link_name, data_file_name) self.assertEqual(relative_data_path, d["outs"][0]["path"])
def test_checkout_executable(tmp_dir, dvc): tmp_dir.dvc_gen("foo", "foo") contents = load_yaml("foo.dvc") contents["outs"][0]["isexec"] = True dump_yaml("foo.dvc", contents) dvc.checkout("foo") isexec = os.stat("foo").st_mode & stat.S_IEXEC if os.name == "nt": # NOTE: you can't set exec bits on Windows assert not isexec else: assert isexec
def test_cyclic_graph_error(tmp_dir, dvc, run_copy): tmp_dir.gen("foo", "foo") run_copy("foo", "bar", name="copy-foo-bar") run_copy("bar", "baz", name="copy-bar-baz") run_copy("baz", "foobar", name="copy-baz-foobar") data = load_yaml(PIPELINE_FILE) data["stages"]["copy-baz-foo"] = { "cmd": "echo baz > foo", "deps": ["baz"], "outs": ["foo"], } dump_yaml(PIPELINE_FILE, data) with pytest.raises(CyclicGraphError): dvc.reproduce(":copy-baz-foo")
def test_add_executable(tmp_dir, dvc): tmp_dir.gen("foo", "foo") st = os.stat("foo") os.chmod("foo", st.st_mode | stat.S_IEXEC) dvc.add("foo") assert load_yaml("foo.dvc") == { "outs": [{ "md5": "acbd18db4cc2f85cedef654fccc4a4d8", "path": "foo", "size": 3, "isexec": True, }], } assert os.stat("foo").st_mode & stat.S_IEXEC
def _load_cache(self, key, value): from voluptuous import Invalid from dvc.schema import COMPILED_LOCK_FILE_STAGE_SCHEMA from dvc.utils.serialize import YAMLFileCorruptedError, load_yaml path = self._get_cache_path(key, value) try: return COMPILED_LOCK_FILE_STAGE_SCHEMA(load_yaml(path)) except FileNotFoundError: return None except (YAMLFileCorruptedError, Invalid): logger.warning("corrupted cache file '%s'.", relpath(path)) os.unlink(path) return None
def test_ignored_in_checksum(self): stage = self.dvc.run( cmd=f"echo test > {self.FOO}", deps=[self.BAR], outs=[self.FOO], single_stage=True, ) d = stage.dumpd() self.assertNotIn(Stage.PARAM_WDIR, d.keys()) d = load_yaml(stage.relpath) self.assertNotIn(Stage.PARAM_WDIR, d.keys()) with self.dvc.lock: stage = SingleStageFile(self.dvc, stage.relpath).stage self.assertFalse(stage.changed())
def test_warn_on_outdated_stage(tmp_dir, dvc, local_remote, caplog): stage = dvc.run(outs=["bar"], cmd="echo bar > bar", single_stage=True) assert main(["push"]) == 0 stage_file_path = stage.relpath content = load_yaml(stage_file_path) del content["outs"][0]["md5"] dump_yaml(stage_file_path, content) with caplog.at_level(logging.WARNING, logger="dvc"): caplog.clear() assert main(["status", "-c"]) == 0 expected_warning = ( "Output 'bar'(stage: 'bar.dvc') is missing version info. " "Cache for it will not be collected. " "Use `dvc repro` to get your pipeline up to date.") assert expected_warning in caplog.text
def test_fname_changes_path_and_wdir(self): dname = "dir" os.mkdir(os.path.join(self._root_dir, dname)) foo = os.path.join(dname, self.FOO) fname = os.path.join(dname, "stage" + DVC_FILE_SUFFIX) stage = self.dvc.run( cmd=f"echo test > {foo}", outs=[foo], fname=fname, single_stage=True, ) self.assertEqual(stage.wdir, os.path.realpath(self._root_dir)) self.assertEqual(stage.path, os.path.join(os.path.realpath(self._root_dir), fname)) # Check that it is dumped properly (relative to fname) d = load_yaml(stage.relpath) self.assertEqual(d[Stage.PARAM_WDIR], "..")
def get_running_exps(self) -> Dict[str, int]: """Return info for running experiments.""" from dvc.utils.serialize import load_yaml from .executor.base import BaseExecutor, ExecutorInfo result = {} for pidfile in self.repo.fs.walk_files( os.path.join(self.repo.tmp_dir, self.EXEC_PID_DIR) ): rev, _ = os.path.splitext(os.path.basename(pidfile)) try: info = ExecutorInfo.from_dict(load_yaml(pidfile)) if rev == "workspace": # If we are appending to a checkpoint branch in a workspace # run, show the latest checkpoint as running. last_rev = self.scm.get_ref(EXEC_BRANCH) if last_rev: result[last_rev] = info.to_dict() else: result[rev] = info.to_dict() else: result[rev] = info.to_dict() if info.git_url: def on_diverged(_ref: str, _checkpoint: bool): return False for ref in BaseExecutor.fetch_exps( self.scm, info.git_url, on_diverged=on_diverged, ): logger.debug( "Updated running experiment '%s'.", ref ) last_rev = self.scm.get_ref(ref) result[rev]["last"] = last_rev if last_rev: result[last_rev] = info.to_dict() except OSError: pass return result
def test_move_meta(tmp_dir, dvc): (stage,) = tmp_dir.dvc_gen("foo", "foo") data = load_yaml(stage.path) data["meta"] = {"custom_key": 42} dump_yaml(stage.path, data) dvc.move("foo", "bar") res = (tmp_dir / "bar.dvc").read_text() print(res) assert res == textwrap.dedent( """\ outs: - md5: acbd18db4cc2f85cedef654fccc4a4d8 size: 3 path: bar meta: custom_key: 42 """ )
def test(self): foo_stage_file_path = self.FOO + DVC_FILE_SUFFIX ret = main(["add", self.FOO]) self.assertEqual(ret, 0) self.assertTrue(os.path.exists(foo_stage_file_path)) target_foo_path = os.path.join(self.DATA_DIR, self.FOO) target_foo_stage_file_path = target_foo_path + DVC_FILE_SUFFIX ret = main(["move", self.FOO, self.DATA_DIR]) self.assertEqual(ret, 0) self.assertFalse(os.path.exists(self.FOO)) self.assertFalse(os.path.exists(foo_stage_file_path)) self.assertTrue(os.path.exists(target_foo_path)) self.assertTrue(os.path.exists(target_foo_stage_file_path)) new_stage = load_yaml(target_foo_stage_file_path) self.assertEqual(self.FOO, new_stage["outs"][0]["path"])
def test_add(tmp_dir, dvc): (stage, ) = tmp_dir.dvc_gen({"foo": "foo"}) md5, _ = file_md5("foo") assert stage is not None assert isinstance(stage, Stage) assert os.path.isfile(stage.path) assert len(stage.outs) == 1 assert len(stage.deps) == 0 assert stage.cmd is None assert stage.outs[0].hash_info == HashInfo("md5", md5) assert stage.md5 is None assert load_yaml("foo.dvc") == { "outs": [{ "md5": "acbd18db4cc2f85cedef654fccc4a4d8", "path": "foo" }], }