Пример #1
0
    def test(self):
        # NOTE: using 'copy' so that cache and link don't have same inode
        ret = main(["config", "cache.type", "copy"])
        self.assertEqual(ret, 0)

        self.dvc = DvcRepo(".")
        stages = self.dvc.add(self.DATA_DIR)
        self.assertEqual(len(stages), 1)
        self.assertEqual(len(stages[0].outs), 1)
        out = stages[0].outs[0]

        # NOTE: modifying cache file for one of the files inside the directory
        # to check if dvc will detect that the cache is corrupted.
        _, entry_hash = next(
            self.dvc.cache.local.load_dir_cache(out.hash_info).items())
        cache = os.fspath(
            self.dvc.cache.local.tree.hash_to_path_info(entry_hash.value))

        os.chmod(cache, 0o644)
        with open(cache, "w+") as fobj:
            fobj.write("1")

        with pytest.raises(CheckoutError):
            self.dvc.checkout(force=True)

        self.assertFalse(os.path.exists(cache))
Пример #2
0
    def test(self):
        """
        Making sure that 'remote' syntax is handled properly for local outs.
        """
        cwd = os.getcwd()
        remote = "myremote"

        ret = main(["remote", "add", remote, cwd])
        self.assertEqual(ret, 0)

        self.dvc = DvcRepo()

        foo = f"remote://{remote}/{self.FOO}"
        ret = main(["add", foo])
        self.assertEqual(ret, 0)

        d = load_yaml("foo.dvc")
        self.assertEqual(d["outs"][0]["path"], foo)

        bar = os.path.join(cwd, self.BAR)
        ret = main(["add", bar])
        self.assertEqual(ret, 0)

        d = load_yaml("bar.dvc")
        self.assertEqual(d["outs"][0]["path"], self.BAR)
Пример #3
0
    def test(self):
        # Import
        with StaticFileServer() as httpd:
            import_url = urljoin(self.get_remote(httpd.server_port), self.FOO)
            import_output = "imported_file"
            import_stage = self.dvc.imp_url(import_url, import_output)

        self.assertTrue(os.path.exists(import_output))
        self.assertTrue(filecmp.cmp(import_output, self.FOO, shallow=False))

        self.dvc.remove("imported_file.dvc")

        with StaticFileServer(handler_class=ContentMD5Handler) as httpd:
            import_url = urljoin(self.get_remote(httpd.server_port), self.FOO)
            import_output = "imported_file"
            import_stage = self.dvc.imp_url(import_url, import_output)

        self.assertTrue(os.path.exists(import_output))
        self.assertTrue(filecmp.cmp(import_output, self.FOO, shallow=False))

        # Run --deps
        with StaticFileServer() as httpd:
            remote = self.get_remote(httpd.server_port)

            cache_id = str(uuid.uuid4())
            cache = urljoin(remote, cache_id)

            ret1 = main(["remote", "add", "mycache", cache])
            ret2 = main(["remote", "add", "myremote", remote])
            self.assertEqual(ret1, 0)
            self.assertEqual(ret2, 0)

            self.dvc = DvcRepo(".")

            run_dependency = urljoin(remote, self.BAR)
            run_output = "remote_file"
            cmd = 'open("{}", "w+")'.format(run_output)

            with open("create-output.py", "w") as fd:
                fd.write(cmd)

            run_stage = self.dvc.run(
                deps=[run_dependency],
                outs=[run_output],
                cmd="python create-output.py",
            )
            self.assertTrue(run_stage is not None)

            self.assertTrue(os.path.exists(run_output))

            # Pull
            self.dvc.remove(import_stage.path, outs_only=True)
            self.assertFalse(os.path.exists(import_output))

            shutil.move(self.local_cache, cache_id)
            self.assertFalse(os.path.exists(self.local_cache))

            self.dvc.pull([import_stage.path], remote="mycache")

            self.assertTrue(os.path.exists(import_output))
Пример #4
0
    def setUp(self):
        super().setUp()
        ret = main(["config", "cache.type", "hardlink"])
        self.assertEqual(ret, 0)
        self.dvc.close()

        self.dvc = DvcRepo(".")
Пример #5
0
def test_ssh_dir_out(dvc_repo):
    if not _should_test_ssh():
        pytest.skip()

    # Set up remote and cache
    remote_url = get_ssh_url()
    assert main(["remote", "add", "upstream", remote_url]) == 0

    cache_url = get_ssh_url()
    assert main(["remote", "add", "sshcache", cache_url]) == 0
    assert main(["config", "cache.ssh", "sshcache"]) == 0

    # Recreating to reread configs
    repo = DvcRepo(dvc_repo.root_dir)

    url_info = URLInfo(remote_url)
    mkdir_cmd = "mkdir dir-out;cd dir-out;echo 1 > 1.txt; echo 2 > 2.txt"
    repo.run(
        cmd="ssh {netloc} 'cd {path};{cmd}'".format(netloc=url_info.netloc,
                                                    path=url_info.path,
                                                    cmd=mkdir_cmd),
        outs=[(url_info / "dir-out").url],
        deps=["foo"],  # add a fake dep to not consider this a callback
    )

    repo.reproduce("dir-out.dvc")
    repo.reproduce("dir-out.dvc", force=True)
Пример #6
0
    def test(self):
        # NOTE: using 'copy' so that cache and link don't have same inode
        ret = main(["config", "cache.type", "copy"])
        self.assertEqual(ret, 0)

        self.dvc = DvcRepo(".")
        stages = self.dvc.add(self.DATA_DIR)
        self.assertEqual(len(stages), 1)
        self.assertEqual(len(stages[0].outs), 1)
        out = stages[0].outs[0]

        # NOTE: modifying cache file for one of the files inside the directory
        # to check if dvc will detect that the cache is corrupted.
        entry = self.dvc.cache.local.load_dir_cache(out.checksum)[0]
        checksum = entry[self.dvc.cache.local.PARAM_CHECKSUM]
        cache = self.dvc.cache.local.get(checksum)

        os.chmod(cache, 0o644)
        with open(cache, "w+") as fobj:
            fobj.write("1")

        with pytest.raises(CheckoutError):
            self.dvc.checkout(force=True)

        self.assertFalse(os.path.exists(cache))
Пример #7
0
 def setUp(self):
     super(
         TestShouldNotCheckoutUponCorruptedLocalHardlinkCache, self
     ).setUp()
     ret = main(["config", "cache.type", "hardlink"])
     self.assertEqual(ret, 0)
     self.dvc = DvcRepo(".")
Пример #8
0
    def test(self):
        """
        Making sure that 'remote' syntax is handled properly for local outs.
        """
        cwd = os.getcwd()
        remote = "myremote"

        ret = main(["remote", "add", remote, cwd])
        self.assertEqual(ret, 0)

        self.dvc = DvcRepo()

        foo = "remote://{}/{}".format(remote, self.FOO)
        ret = main(["add", foo])
        self.assertEqual(ret, 0)

        with open("foo.dvc", "r") as fobj:
            d = yaml.safe_load(fobj)
            self.assertEqual(d["outs"][0]["path"], foo)

        bar = os.path.join(cwd, self.BAR)
        ret = main(["add", bar])
        self.assertEqual(ret, 0)

        with open("bar.dvc", "r") as fobj:
            d = yaml.safe_load(fobj)
            self.assertEqual(d["outs"][0]["path"], bar)
Пример #9
0
    def _test_metrics(self, func):
        self.dvc.scm.commit("init")

        self.dvc.scm.branch("one")
        self.dvc.scm.branch("two")

        func("master")
        func("one")
        func("two")

        # TestDvc currently is based on TestGit, so it is safe to use
        # scm.git for now
        self.dvc.scm.repo.git.clean("-fd")

        self.dvc = DvcRepo(".")

        res = self.dvc.metrics.show(["metrics.json"],
                                    all_branches=True,
                                    typ="json",
                                    xpath="metrics")

        self.assertEqual(
            res,
            {
                "master": {
                    "metrics.json": ["master"]
                },
                "one": {
                    "metrics.json": ["one"]
                },
                "two": {
                    "metrics.json": ["two"]
                },
                "working tree": {
                    "metrics.json": ["two"]
                },
            },
        )

        res = self.dvc.metrics.show(all_branches=True,
                                    typ="json",
                                    xpath="metrics")

        self.assertEqual(
            res,
            {
                "master": {
                    "metrics.json": ["master"]
                },
                "one": {
                    "metrics.json": ["one"]
                },
                "two": {
                    "metrics.json": ["two"]
                },
                "working tree": {
                    "metrics.json": ["two"]
                },
            },
        )
Пример #10
0
    def test(self):
        from dvc.repo import Repo as DvcRepo

        self.dvc = DvcRepo(self._root_dir)
        with self.assertRaises(MoveNotDataSourceError):
            self.dvc.move(self.file1, "dst")

        ret = main(["move", self.file1, "dst"])
        self.assertNotEqual(ret, 0)
Пример #11
0
def test_allow_init_dvc_subdir(tmp_dir, scm, monkeypatch):
    tmp_dir.gen({"subdir": {}})

    with monkeypatch.context() as m:
        m.chdir("subdir")
        assert main(["init", "--subdir"]) == 0

    repo = DvcRepo("subdir")
    assert repo.root_dir == os.fspath(tmp_dir / "subdir")
    assert repo.scm.root_dir == os.fspath(tmp_dir)
Пример #12
0
    def _test_metrics(self, func):
        self.dvc.scm.commit("init")

        self.dvc.scm.branch("one")
        self.dvc.scm.branch("two")

        func("master")
        func("one")
        func("two")

        self.dvc = DvcRepo(".")

        res = self.dvc.metrics.show("metrics.json",
                                    all_branches=True,
                                    typ="json",
                                    xpath="metrics")

        self.assertEqual(
            res,
            {
                "master": {
                    "metrics.json": ["master"]
                },
                "one": {
                    "metrics.json": ["one"]
                },
                "two": {
                    "metrics.json": ["two"]
                },
            },
        )

        res = self.dvc.metrics.show("",
                                    all_branches=True,
                                    typ="json",
                                    xpath="metrics")

        self.assertEqual(
            res,
            {
                "master": {
                    "metrics.json": ["master"]
                },
                "one": {
                    "metrics.json": ["one"]
                },
                "two": {
                    "metrics.json": ["two"]
                },
            },
        )
Пример #13
0
    def setUp(self):
        super().setUp()
        self.additional_path = TestDir.mkdtemp()
        self.additional_git = Repo.init(self.additional_path)
        self.additional_dvc = DvcRepo.init(self.additional_path)

        cache_path = os.path.join(self._root_dir, ".dvc", "cache")
        config_path = os.path.join(self.additional_path, ".dvc",
                                   "config.local")
        cfg = configobj.ConfigObj()
        cfg.filename = config_path
        cfg["cache"] = {"dir": cache_path}
        cfg.write()

        self.additional_dvc = DvcRepo(self.additional_path)
Пример #14
0
def test_ssh_dir_out(tmp_dir, dvc, ssh_server):
    tmp_dir.gen({"foo": "foo content"})

    # Set up remote and cache
    user = ssh_server.test_creds["username"]
    port = ssh_server.port
    keyfile = ssh_server.test_creds["key_filename"]

    remote_url = SSHMocked.get_url(user, port)
    assert main(["remote", "add", "upstream", remote_url]) == 0
    assert main(["remote", "modify", "upstream", "keyfile", keyfile]) == 0

    cache_url = SSHMocked.get_url(user, port)
    assert main(["remote", "add", "sshcache", cache_url]) == 0
    assert main(["config", "cache.ssh", "sshcache"]) == 0
    assert main(["remote", "modify", "sshcache", "keyfile", keyfile]) == 0

    # Recreating to reread configs
    repo = DvcRepo(dvc.root_dir)

    # To avoid "WARNING: UNPROTECTED PRIVATE KEY FILE" from ssh
    os.chmod(keyfile, 0o600)

    (tmp_dir / "script.py").write_text(
        "import sys, pathlib\n"
        "path = pathlib.Path(sys.argv[1])\n"
        "dir_out = path / 'dir-out'\n"
        "dir_out.mkdir()\n"
        "(dir_out / '1.txt').write_text('1')\n"
        "(dir_out / '2.txt').write_text('2')\n"
    )

    url_info = URLInfo(remote_url)
    repo.run(
        cmd="python {} {}".format(tmp_dir / "script.py", url_info.path),
        outs=["remote://upstream/dir-out"],
        deps=["foo"],  # add a fake dep to not consider this a callback
    )

    repo.reproduce("dir-out.dvc")
    repo.reproduce("dir-out.dvc", force=True)
Пример #15
0
    def test(self, mock_prompt):
        if not self.should_test():
            return

        cache = (self.scheme + self.scheme_sep + self.bucket + self.sep +
                 str(uuid.uuid4()))

        ret = main(["config", "cache." + self.cache_scheme, "myrepo"])
        self.assertEqual(ret, 0)
        ret = main(["remote", "add", "myrepo", cache])
        self.assertEqual(ret, 0)
        ret = main(["remote", "modify", "myrepo", "type", self.cache_type])
        self.assertEqual(ret, 0)

        remote_name = "myremote"
        remote_key = str(uuid.uuid4())
        remote = (self.scheme + self.scheme_sep + self.bucket + self.sep +
                  remote_key)

        ret = main(["remote", "add", remote_name, remote])
        self.assertEqual(ret, 0)
        ret = main(["remote", "modify", remote_name, "type", self.cache_type])
        self.assertEqual(ret, 0)

        self.dvc = DvcRepo(".")

        foo_key = remote_key + self.sep + self.FOO
        bar_key = remote_key + self.sep + self.BAR

        foo_path = (self.scheme + self.scheme_sep + self.bucket + self.sep +
                    foo_key)
        bar_path = (self.scheme + self.scheme_sep + self.bucket + self.sep +
                    bar_key)

        # Using both plain and remote notation
        out_foo_path = "remote://" + remote_name + "/" + self.FOO
        out_bar_path = bar_path

        self.write(self.bucket, foo_key, self.FOO_CONTENTS)

        import_stage = self.dvc.imp_url(out_foo_path, "import")

        self.assertTrue(os.path.exists("import"))
        self.assertTrue(filecmp.cmp("import", self.FOO, shallow=False))
        self.assertEqual(self.dvc.status([import_stage.path]), {})
        self.check_already_cached(import_stage)

        import_remote_stage = self.dvc.imp_url(out_foo_path,
                                               out_foo_path + "_imported")
        self.assertEqual(self.dvc.status([import_remote_stage.path]), {})

        cmd_stage = self.dvc.run(
            outs=[out_bar_path],
            deps=[out_foo_path],
            cmd=self.cmd(foo_path, bar_path),
        )

        self.assertEqual(self.dvc.status([cmd_stage.path]), {})
        self.assertEqual(self.dvc.status(), {})
        self.check_already_cached(cmd_stage)

        self.write(self.bucket, foo_key, self.BAR_CONTENTS)

        self.assertNotEqual(self.dvc.status(), {})

        self.dvc.update(import_stage.path)
        self.assertTrue(os.path.exists("import"))
        self.assertTrue(filecmp.cmp("import", self.BAR, shallow=False))
        self.assertEqual(self.dvc.status([import_stage.path]), {})

        self.dvc.update(import_remote_stage.path)
        self.assertEqual(self.dvc.status([import_remote_stage.path]), {})

        stages = self.dvc.reproduce(cmd_stage.path)
        self.assertEqual(len(stages), 1)
        self.assertEqual(self.dvc.status([cmd_stage.path]), {})

        self.assertEqual(self.dvc.status(), {})
        self.dvc.gc()
        self.assertEqual(self.dvc.status(), {})

        self.dvc.remove(cmd_stage.path, outs_only=True)
        self.assertNotEqual(self.dvc.status([cmd_stage.path]), {})

        self.dvc.checkout([cmd_stage.path], force=True)
        self.assertEqual(self.dvc.status([cmd_stage.path]), {})
Пример #16
0
 def setUp(self):
     super().setUp()
     self.tmpdir = TestDvc.mkdtemp()
     ret = main(["config", "cache.type", "hardlink"])
     self.assertEqual(ret, 0)
     self.dvc = DvcRepo(".")