def exists_with_progress(path_info): ret = self.exists(path_info) progress_callback.update(str(path_info)) return ret
def __eq__(self, other): if isinstance(other, basestring): other = self.__class__(other) return str(self) == str(other)
def __init__(self, fname, e): msg = "DVC-file '{}' format error: {}".format(fname, str(e)) super(StageFileFormatError, self).__init__(msg)
def check_missing_outputs(self): paths = [str(out) for out in self.outs if not out.exists] if paths: raise MissingDataSource(paths)
def _unprotect_dir(self, path): for root, dirs, files in os.walk(str(path)): for f in files: path = os.path.join(root, f) self._unprotect_file(path)
def test(self, mock_prompt): if not self.should_test(): return cache = (self.scheme + self.scheme_sep + self.bucket + self.sep + str(uuid.uuid4())) ret = main(["config", "cache." + self.cache_scheme, "myrepo"]) self.assertEqual(ret, 0) ret = main(["remote", "add", "myrepo", cache]) self.assertEqual(ret, 0) ret = main(["remote", "modify", "myrepo", "type", "hardlink"]) self.assertEqual(ret, 0) remote_name = "myremote" remote_key = str(uuid.uuid4()) remote = (self.scheme + self.scheme_sep + self.bucket + self.sep + remote_key) ret = main(["remote", "add", remote_name, remote]) self.assertEqual(ret, 0) ret = main(["remote", "modify", remote_name, "type", "hardlink"]) self.assertEqual(ret, 0) self.dvc = DvcRepo(".") foo_key = remote_key + self.sep + self.FOO bar_key = remote_key + self.sep + self.BAR foo_path = (self.scheme + self.scheme_sep + self.bucket + self.sep + foo_key) bar_path = (self.scheme + self.scheme_sep + self.bucket + self.sep + bar_key) # Using both plain and remote notation out_foo_path = "remote://" + remote_name + "/" + self.FOO out_bar_path = bar_path self.write(self.bucket, foo_key, self.FOO_CONTENTS) import_stage = self.dvc.imp(out_foo_path, "import") self.assertTrue(os.path.exists("import")) self.assertTrue(filecmp.cmp("import", self.FOO, shallow=False)) self.assertEqual(self.dvc.status(import_stage.path), {}) self.check_already_cached(import_stage) import_remote_stage = self.dvc.imp(out_foo_path, out_foo_path + "_imported") self.assertEqual(self.dvc.status(import_remote_stage.path), {}) cmd_stage = self.dvc.run( outs=[out_bar_path], deps=[out_foo_path], cmd=self.cmd(foo_path, bar_path), ) self.assertEqual(self.dvc.status(cmd_stage.path), {}) self.assertEqual(self.dvc.status(), {}) self.check_already_cached(cmd_stage) self.write(self.bucket, foo_key, self.BAR_CONTENTS) self.assertNotEqual(self.dvc.status(), {}) stages = self.dvc.reproduce(import_stage.path) self.assertEqual(len(stages), 1) self.assertTrue(os.path.exists("import")) self.assertTrue(filecmp.cmp("import", self.BAR, shallow=False)) self.assertEqual(self.dvc.status(import_stage.path), {}) stages = self.dvc.reproduce(import_remote_stage.path) self.assertEqual(len(stages), 1) self.assertEqual(self.dvc.status(import_remote_stage.path), {}) stages = self.dvc.reproduce(cmd_stage.path) self.assertEqual(len(stages), 1) self.assertEqual(self.dvc.status(cmd_stage.path), {}) self.assertEqual(self.dvc.status(), {}) self.dvc.gc() self.assertEqual(self.dvc.status(), {}) self.dvc.remove(cmd_stage.path, outs_only=True) self.assertNotEqual(self.dvc.status(cmd_stage.path), {}) self.dvc.checkout(cmd_stage.path, force=True) self.assertEqual(self.dvc.status(cmd_stage.path), {})
def _test_cloud(self): self._setup_cloud() stages = self.dvc.add(self.FOO) self.assertEqual(len(stages), 1) stage = stages[0] self.assertTrue(stage is not None) out = stage.outs[0] cache = out.cache_path md5 = out.checksum info = out.get_used_cache() stages = self.dvc.add(self.DATA_DIR) self.assertEqual(len(stages), 1) stage_dir = stages[0] self.assertTrue(stage_dir is not None) out_dir = stage_dir.outs[0] cache_dir = out_dir.cache_path name_dir = str(out_dir) md5_dir = out_dir.checksum info_dir = NamedCache.make(out_dir.scheme, md5_dir, name_dir) with self.cloud.repo.state: # Check status status = self.cloud.status(info, show_checksums=True) expected = {md5: {"name": md5, "status": STATUS_NEW}} self.assertEqual(status, expected) status_dir = self.cloud.status(info_dir, show_checksums=True) expected = {md5_dir: {"name": md5_dir, "status": STATUS_NEW}} self.assertEqual(status_dir, expected) # Push and check status self.cloud.push(info) self.assertTrue(os.path.exists(cache)) self.assertTrue(os.path.isfile(cache)) self.cloud.push(info_dir) self.assertTrue(os.path.isfile(cache_dir)) status = self.cloud.status(info, show_checksums=True) expected = {md5: {"name": md5, "status": STATUS_OK}} self.assertEqual(status, expected) status_dir = self.cloud.status(info_dir, show_checksums=True) expected = {md5_dir: {"name": md5_dir, "status": STATUS_OK}} self.assertEqual(status_dir, expected) # Remove and check status shutil.rmtree(self.dvc.cache.local.cache_dir) status = self.cloud.status(info, show_checksums=True) expected = {md5: {"name": md5, "status": STATUS_DELETED}} self.assertEqual(status, expected) status_dir = self.cloud.status(info_dir, show_checksums=True) expected = {md5_dir: {"name": md5_dir, "status": STATUS_DELETED}} self.assertEqual(status_dir, expected) # Pull and check status self.cloud.pull(info) self.assertTrue(os.path.exists(cache)) self.assertTrue(os.path.isfile(cache)) with open(cache, "r") as fd: self.assertEqual(fd.read(), self.FOO_CONTENTS) self.cloud.pull(info_dir) self.assertTrue(os.path.isfile(cache_dir)) status = self.cloud.status(info, show_checksums=True) expected = {md5: {"name": md5, "status": STATUS_OK}} self.assertEqual(status, expected) status_dir = self.cloud.status(info_dir, show_checksums=True) expected = {md5_dir: {"name": md5_dir, "status": STATUS_OK}} self.assertTrue(status_dir, expected)
def get_gcp_storagepath(): return TEST_GCP_REPO_BUCKET + "/" + str(uuid.uuid4())
def __init__(self, record): msg = "failed to log {}".format(str(record)) super(LoggingException, self).__init__(msg)
def tmp_file(fname): """ Temporary name for a partial download """ import uuid return fname + "." + str(uuid.uuid4())
def _test_cloud(self): self._setup_cloud() stages = self.dvc.add(self.FOO) self.assertEqual(len(stages), 1) stage = stages[0] self.assertTrue(stage is not None) out = stage.outs[0] cache = out.cache_path name = str(out) md5 = out.checksum info = {"name": name, out.remote.PARAM_CHECKSUM: md5} stages = self.dvc.add(self.DATA_DIR) self.assertEqual(len(stages), 1) stage_dir = stages[0] self.assertTrue(stage_dir is not None) out_dir = stage_dir.outs[0] cache_dir = out.cache_path name_dir = str(out) md5_dir = out.checksum info_dir = {"name": name_dir, out_dir.remote.PARAM_CHECKSUM: md5_dir} with self.cloud.repo.state: # Check status status = self.cloud.status([info], show_checksums=True) expected = {md5: {"name": md5, "status": STATUS_NEW}} self.assertEqual(status, expected) status_dir = self.cloud.status([info_dir], show_checksums=True) expected = {md5_dir: {"name": md5_dir, "status": STATUS_NEW}} self.assertEqual(status_dir, expected) # Push and check status self.cloud.push([info]) self.assertTrue(os.path.exists(cache)) self.assertTrue(os.path.isfile(cache)) self.cloud.push([info_dir]) self.assertTrue(os.path.isfile(cache_dir)) status = self.cloud.status([info], show_checksums=True) expected = {md5: {"name": md5, "status": STATUS_OK}} self.assertEqual(status, expected) status_dir = self.cloud.status([info_dir], show_checksums=True) expected = {md5_dir: {"name": md5_dir, "status": STATUS_OK}} self.assertEqual(status_dir, expected) # Remove and check status shutil.rmtree(self.dvc.cache.local.cache_dir) status = self.cloud.status([info], show_checksums=True) expected = {md5: {"name": md5, "status": STATUS_DELETED}} self.assertEqual(status, expected) status_dir = self.cloud.status([info_dir], show_checksums=True) expected = {md5_dir: {"name": md5_dir, "status": STATUS_DELETED}} self.assertEqual(status_dir, expected) # Pull and check status self.cloud.pull([info]) self.assertTrue(os.path.exists(cache)) self.assertTrue(os.path.isfile(cache)) with open(cache, "r") as fd: self.assertEqual(fd.read(), self.FOO_CONTENTS) self.cloud.pull([info_dir]) self.assertTrue(os.path.isfile(cache_dir)) status = self.cloud.status([info], show_checksums=True) expected = {md5: {"name": md5, "status": STATUS_OK}} self.assertEqual(status, expected) status_dir = self.cloud.status([info_dir], show_checksums=True) expected = {md5_dir: {"name": md5_dir, "status": STATUS_OK}} self.assertTrue(status_dir, expected)
def walk_files(directory): for root, _, files in os.walk(str(directory)): for f in files: yield os.path.join(root, f)
def get_gdrive_url(): return "gdrive://root/" + str(uuid.uuid4())
def as_posix(self): f = self._flavour return str(self).replace(f.sep, "/")
def changed(self): status = self.status() logger.debug(str(status)) return bool(status)
def get_aws_storagepath(): return TEST_AWS_REPO_BUCKET + "/" + str(uuid.uuid4())
def exists_with_progress(path_info): ret = self.exists(path_info) pbar.update_desc(str(path_info)) return ret
def get_azure_url(): container_name = os.getenv("AZURE_STORAGE_CONTAINER_NAME") assert container_name is not None return "azure://{}/{}".format(container_name, str(uuid.uuid4()))
def tmp_fname(fname): """ Temporary name for a partial download """ from uuid import uuid4 return fname + "." + str(uuid4()) + ".tmp"
def external_cache_id(self): if not self._external_cache_id: self._external_cache_id = str(uuid.uuid4()) return self._external_cache_id
def walk_files(directory, ignore_file_handler=None): for root, _, files in dvc_walk(str(directory), ignore_file_handler=ignore_file_handler): for f in files: yield os.path.join(root, f)
def fix_key(k): return str(k) if isinstance(k, builtin_str) else k
def __init__(self, target_infos): targets = [str(t) for t in target_infos] m = ("Checkout failed for following targets:\n {}\nDid you " "forget to fetch?".format("\n".join(targets))) super(CheckoutError, self).__init__(m)
def _changed_entries(entries): return [ str(entry) for entry in entries if entry.checksum and entry.changed_checksum() ]
def status(self): if self.changed(): # FIXME better msgs return {str(self): "changed"} return {}
def tmp_fname(fname): """ Temporary name for a partial download """ return fspath(fname) + "." + str(uuid()) + ".tmp"
def __str__(self): return str(self.path_info)