def __init__(self, path=None, is_tmp=False, **kwargs): # handle tmp paths manually since luigi uses the env tmp dir if not path: if not is_tmp: raise Exception("either path or is_tmp must be set") # get the tmp dir from the config and ensure it exists tmp_dir = os.path.realpath(Config.instance().get_expanded("target", "tmp_dir")) if not self.fs.exists(tmp_dir): perm = Config.instance().get("target", "tmp_dir_permission") self.fs.mkdir(tmp_dir, perm=perm and int(perm)) # create a random path while True: path = os.path.join(tmp_dir, "luigi-tmp-%09d" % (random.randint(0, 999999999,))) if not self.fs.exists(path): break # is_tmp might be an extension if isinstance(is_tmp, six.string_types): if is_tmp[0] != ".": is_tmp = "." + is_tmp path += is_tmp else: path = self.fs.abspath(os.path.expandvars(os.path.expanduser(remove_scheme(path)))) luigi.LocalTarget.__init__(self, path=path, is_tmp=is_tmp) FileSystemTarget.__init__(self, self.path, **kwargs)
def uri(b): uri = os.path.join( b, self.sanitize_path(path).lstrip("/")).rstrip("/") if not scheme: uri = remove_scheme(uri) return uri
def move_to_local(self, dst=None, perm=None, dir_perm=None, **kwargs): if dst: dst = add_scheme(self.fs.local_fs.abspath(get_path(dst)), "file") dst = FileSystemFileTarget.move_to(self, dst, perm=perm, dir_perm=dir_perm, **kwargs) return remove_scheme(dst)
def open(self, path, mode, cache=None, **kwargs): if cache is None: cache = self.cache is not None elif cache and self.cache is None: cache = False path = self.abspath(path) yield_path = kwargs.pop("_yield_path", False) if mode == "r": if cache: lpath = self._cached_copy(path, None, cache=True, **kwargs) lpath = remove_scheme(lpath) else: tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True) lpath = tmp.path self._cached_copy(path, add_scheme(lpath, "file"), cache=False, **kwargs) try: if yield_path: yield lpath else: f = open(lpath, "r") yield f if not f.closed: f.close() finally: if not cache: del tmp elif mode == "w": tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True) lpath = tmp.path try: if yield_path: yield lpath else: f = open(lpath, "w") yield f if not f.closed: f.close() if tmp.exists(): self._cached_copy(add_scheme(lpath, "file"), path, cache=cache, **kwargs) finally: del tmp else: raise Exception("unknown mode {}, use r or w".format(mode))
def open(self, path, mode, cache=None, **kwargs): if cache is None: cache = self.cache is not None elif cache and self.cache is None: cache = False yield_path = kwargs.pop("_yield_path", False) path = self.abspath(path) tmp = None if mode == "r": if cache: lpath = self._cached_copy(path, None, cache=True, **kwargs) lpath = remove_scheme(lpath) else: tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True) lpath = tmp.path self._cached_copy(path, add_scheme(lpath, "file"), cache=False, **kwargs) def cleanup(): if not cache and tmp.exists(): tmp.remove() f = lpath if yield_path else open(lpath, "r") return RemoteFileProxy(f, success_fn=cleanup, failure_fn=cleanup) elif mode == "w": tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True) lpath = tmp.path def cleanup(): if tmp.exists(): tmp.remove() def copy_and_cleanup(): try: if tmp.exists(): self._cached_copy(add_scheme(lpath, "file"), path, cache=cache, **kwargs) finally: cleanup() f = lpath if yield_path else open(lpath, "w") return RemoteFileProxy(f, success_fn=copy_and_cleanup, failure_fn=cleanup) else: raise Exception("unknown mode {}, use r or w".format(mode))
def open(self, path, mode, perm=None, dir_perm=None, cache=None, **kwargs): if self.cache is None: cache = False elif cache is None: cache = self.use_cache else: cache = bool(cache) yield_path = kwargs.pop("_yield_path", False) path = self.abspath(path) tmp = None read_mode = mode.startswith("r") if read_mode: if cache: lpath = self._cached_copy(path, None, cache=cache, **kwargs) else: tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True) lpath = self.copy(path, tmp.uri(), cache=cache, **kwargs) lpath = remove_scheme(lpath) def cleanup(): if not cache and tmp and tmp.exists(): tmp.remove() f = lpath if yield_path else open(lpath, mode) return RemoteFileProxy(f, success_fn=cleanup, failure_fn=cleanup) else: # write or update tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True) lpath = tmp.path def cleanup(): tmp.remove(silent=True) def copy_and_cleanup(): exists = True try: exists = tmp.exists() if exists: self.copy(tmp.uri(), path, perm=perm, dir_perm=dir_perm, cache=cache, **kwargs) finally: if exists: tmp.remove(silent=True) f = lpath if yield_path else open(lpath, mode) return RemoteFileProxy(f, success_fn=copy_and_cleanup, failure_fn=cleanup)
def __init__(self, path=None, fs=LocalFileSystem.default_instance, is_tmp=False, tmp_dir=None, **kwargs): if isinstance(fs, six.string_types): fs = LocalFileSystem(fs) # handle tmp paths manually since luigi uses the env tmp dir if not path: if not is_tmp: raise Exception("either path or is_tmp must be set") # if not set, get the tmp dir from the config and ensure that it exists cfg = Config.instance() if tmp_dir: tmp_dir = get_path(tmp_dir) else: tmp_dir = os.path.realpath( cfg.get_expanded("target", "tmp_dir")) if not fs.exists(tmp_dir): perm = cfg.get_expanded_int("target", "tmp_dir_perm") fs.mkdir(tmp_dir, perm=perm) # create a random path while True: basename = "luigi-tmp-{:09d}".format( random.randint(0, 999999999)) path = os.path.join(tmp_dir, basename) if not fs.exists(path): break # is_tmp might be a file extension if isinstance(is_tmp, six.string_types): if is_tmp[0] != ".": is_tmp = "." + is_tmp path += is_tmp else: # ensure path is not a target and does not contain, then normalize path = remove_scheme(get_path(path)) path = fs.abspath(os.path.expandvars(os.path.expanduser(path))) luigi.LocalTarget.__init__(self, path=path, is_tmp=is_tmp) FileSystemTarget.__init__(self, self.path, fs=fs, **kwargs)
def uri(b): uri = os.path.join( b, self.sanitize_path(path).lstrip("/")).rstrip("/") return uri if scheme else remove_scheme(uri)
def _unscheme(self, path): return remove_scheme(path) if get_scheme(path) == "file" else path
def _cached_copy(self, src, dst, cache=None, prefer_cache=False, validate=None, **kwargs): cache = self._use_cache(cache) # ensure absolute paths src = self.abspath(src) dst = self.abspath(dst) if dst else None # determine the copy mode for code readability # (remote-remote: "rr", remote-local: "rl", remote-cache: "rc", ...) src_local = self.is_local(src) dst_local = dst and self.is_local(dst) mode = "rl"[src_local] + ("rl"[dst_local] if dst is not None else "c") # disable caching when the mode is local-local, local-cache or remote-remote if mode in ("ll", "lc", "rr"): cache = False # dst can be None, but in this case, caching should be enabled if dst is None and not cache: raise Exception( "copy destination must not be empty when caching is disabled") # paths including scheme and base full_src = src if has_scheme(src) else self.gfal.url(src, cmd="filecopy") full_dst = None if dst: full_dst = dst if has_scheme(dst) else self.gfal.url( dst, cmd="filecopy") if cache: kwargs_no_retries = kwargs.copy() kwargs_no_retries["retries"] = 0 kwargs_no_retries = kwargs # handle 3 cases: lr, rl, rc if mode == "lr": # strategy: copy to remote, copy to cache, sync stats # copy to remote, no need to validate as we need the stat anyway self._atomic_copy(src, full_dst, validate=False, **kwargs) rstat = self.stat(dst, **kwargs_no_retries) # remove the cache entry if dst in self.cache: self.cache.remove(dst) # allocate cache space and copy to cache lstat = _local_fs.stat(src) self.cache.allocate(lstat.st_size) full_cdst = add_scheme(self.cache.cache_path(dst), "file") with self.cache.lock(dst): self._atomic_copy(src, full_cdst, validate=False) self.cache.touch(dst, (int(time.time()), rstat.st_mtime)) return dst else: # rl, rc # strategy: copy to cache when not up to date, sync stats, opt. copy to local # build the full cache path of the src file full_csrc = add_scheme(self.cache.cache_path(src), "file") # if the file is cached and prefer_cache is true, # return the cache path, no questions asked # otherwise, check if the file is there and up to date if not prefer_cache or src not in self.cache: with self.cache.lock(src): # in cache and outdated? rstat = self.stat(src, **kwargs_no_retries) if src in self.cache and abs( self.cache.mtime(src) - rstat.st_mtime) > 1: self.cache.remove(src, lock=False) # in cache at all? if src not in self.cache: self.cache.allocate(rstat.st_size) self._atomic_copy(full_src, full_csrc, validate=validate, **kwargs) self.cache.touch( src, (int(time.time()), rstat.st_mtime)) if mode == "rl": # copy to local without permission bits copy_no_perm(remove_scheme(full_csrc), remove_scheme(full_dst)) return dst else: # rc return full_csrc else: # simply copy and return the dst path self._atomic_copy(full_src, full_dst, validate=validate, **kwargs) return full_dst if dst_local else dst
def copy_to_local(self, dst=None, **kwargs): if dst: dst = add_scheme(self.fs.local_fs.abspath(get_path(dst)), "file") dst = FileSystemFileTarget.copy_to(self, dst, **kwargs) return remove_scheme(dst)
def move_from_local(self, *args, **kwargs): return remove_scheme(self.move_from(*args, **kwargs))
def move_to_local(self, *args, **kwargs): return remove_scheme(self.move_to(*args, **kwargs))
def move_to_local(self, dst=None, **kwargs): if dst: dst = add_scheme(self.fs.local_fs.abspath(get_path(dst)), "file") dst = self.move_to(dst, **kwargs) return remove_scheme(dst)