def open(self, path, mode, cache=None, **kwargs): if cache is None: cache = self.cache is not None elif cache and self.cache is None: cache = False path = self.abspath(path) yield_path = kwargs.pop("_yield_path", False) if mode == "r": if cache: lpath = self._cached_copy(path, None, cache=True, **kwargs) lpath = remove_scheme(lpath) else: tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True) lpath = tmp.path self._cached_copy(path, add_scheme(lpath, "file"), cache=False, **kwargs) try: if yield_path: yield lpath else: f = open(lpath, "r") yield f if not f.closed: f.close() finally: if not cache: del tmp elif mode == "w": tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True) lpath = tmp.path try: if yield_path: yield lpath else: f = open(lpath, "w") yield f if not f.closed: f.close() if tmp.exists(): self._cached_copy(add_scheme(lpath, "file"), path, cache=cache, **kwargs) finally: del tmp else: raise Exception("unknown mode {}, use r or w".format(mode))
def open(self, path, mode, cache=None, **kwargs): if cache is None: cache = self.cache is not None elif cache and self.cache is None: cache = False yield_path = kwargs.pop("_yield_path", False) path = self.abspath(path) tmp = None if mode == "r": if cache: lpath = self._cached_copy(path, None, cache=True, **kwargs) lpath = remove_scheme(lpath) else: tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True) lpath = tmp.path self._cached_copy(path, add_scheme(lpath, "file"), cache=False, **kwargs) def cleanup(): if not cache and tmp.exists(): tmp.remove() f = lpath if yield_path else open(lpath, "r") return RemoteFileProxy(f, success_fn=cleanup, failure_fn=cleanup) elif mode == "w": tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True) lpath = tmp.path def cleanup(): if tmp.exists(): tmp.remove() def copy_and_cleanup(): try: if tmp.exists(): self._cached_copy(add_scheme(lpath, "file"), path, cache=cache, **kwargs) finally: cleanup() f = lpath if yield_path else open(lpath, "w") return RemoteFileProxy(f, success_fn=copy_and_cleanup, failure_fn=cleanup) else: raise Exception("unknown mode {}, use r or w".format(mode))
def open(self, path, mode, perm=None, dir_perm=None, cache=None, **kwargs): if self.cache is None: cache = False elif cache is None: cache = self.use_cache else: cache = bool(cache) yield_path = kwargs.pop("_yield_path", False) path = self.abspath(path) tmp = None read_mode = mode.startswith("r") if read_mode: if cache: lpath = self._cached_copy(path, None, cache=cache, **kwargs) else: tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True) lpath = self.copy(path, tmp.uri(), cache=cache, **kwargs) lpath = remove_scheme(lpath) def cleanup(): if not cache and tmp and tmp.exists(): tmp.remove() f = lpath if yield_path else open(lpath, mode) return RemoteFileProxy(f, success_fn=cleanup, failure_fn=cleanup) else: # write or update tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True) lpath = tmp.path def cleanup(): tmp.remove(silent=True) def copy_and_cleanup(): exists = True try: exists = tmp.exists() if exists: self.copy(tmp.uri(), path, perm=perm, dir_perm=dir_perm, cache=cache, **kwargs) finally: if exists: tmp.remove(silent=True) f = lpath if yield_path else open(lpath, mode) return RemoteFileProxy(f, success_fn=copy_and_cleanup, failure_fn=cleanup)
def env(self): # strategy: create a tempfile, forward it to a container, let python dump its full env, # close the container and load the env file if self.image not in self._envs: tmp = LocalFileTarget(is_tmp=".env") tmp.touch() env_file = os.path.join("/tmp", tmp.unique_basename) # get the docker run command docker_run_cmd = self._docker_run_cmd() # mount the env file docker_run_cmd.extend(["-v", "{}:{}".format(tmp.path, env_file)]) # build commands to setup the environment setup_cmds = self._build_setup_cmds(self._get_env()) # build the python command that dumps the environment py_cmd = "import os,pickle;" \ + "pickle.dump(dict(os.environ),open('{}','wb'),protocol=2)".format(env_file) # build the full command cmd = quote_cmd(docker_run_cmd + [ self.image, "bash", "-l", "-c", "; ".join( flatten(setup_cmds, quote_cmd(["python", "-c", py_cmd]))), ]) # run it code, out, _ = interruptable_popen(cmd, shell=True, executable="/bin/bash", stdout=subprocess.PIPE, stderr=subprocess.STDOUT) if code != 0: raise Exception( "docker sandbox env loading failed:\n{}".format(out)) # load the environment from the tmp file env = tmp.load(formatter="pickle") # cache self._envs[self.image] = env return self._envs[self.image]
def log(fn, opts, task, *args, **kwargs): """ log() Wraps a bound method of a task and redirects output of both stdin and stdout to the file defined by the tasks's *log* parameter. It its value is ``"-"`` or *None*, the output is not redirected. """ _task = get_task(task) log = get_param(_task.log_file, _task.default_log_file) if log == "-" or not log: return fn(task, *args, **kwargs) else: # use the local target functionality to create the parent directory LocalFileTarget(log).parent.touch() with open(log, "a", 1) as f: tee = TeeStream(f, sys.__stdout__) sys.stdout = tee sys.stderr = tee try: ret = fn(task, *args, **kwargs) except Exception as e: traceback.print_exc(file=tee) raise e finally: sys.stdout = sys.__stdout__ sys.stderr = sys.__stderr__ tee.flush() return ret
def get_source_target(self): # when self.source_path is set, return a target around it # otherwise assume self.requires() returns a task with a single local target if self.source_path not in (NO_STR, None): return LocalFileTarget(self.source_path) else: return self.input()
def localize(self, mode="r", perm=None, parent_perm=None, **kwargs): if mode not in ("r", "w"): raise Exception("unknown mode '{}', use r or w".format(mode)) if mode == "r": with self.fs.open(self.path, "r", _yield_path=True, **kwargs) as lpath: yield LocalFileTarget(lpath) else: # w tmp = LocalFileTarget(is_tmp=self.ext() or True) try: yield tmp if tmp.exists(): self.copy_from_local(tmp, dir_perm=parent_perm, **kwargs) self.chmod(perm) else: logger.warning("cannot move non-existing localized file target {!r}".format( self)) finally: del tmp
def localize(self, path=None, is_tmp=True, skip_parent=False, mode=0o0660, cache=True, retry=None): tmp = LocalFileTarget(path, is_tmp=is_tmp) try: yield tmp if not skip_parent: self.parent.touch() self.put(tmp, cache=cache, retry=retry) self.chmod(mode, retry=retry) finally: del tmp
def localize(self, mode="r", perm=None, dir_perm=None, tmp_dir=None, **kwargs): if mode not in ["r", "w", "a"]: raise Exception( "unknown mode '{}', use 'r', 'w' or 'a'".format(mode)) logger.debug("localizing file target {!r} with mode '{}'".format( self, mode)) if mode == "r": with self.fs.open(self.path, "r", _yield_path=True, perm=perm, **kwargs) as lpath: yield LocalFileTarget(lpath) else: # mode "w" or "a" tmp = LocalFileTarget(is_tmp=self.ext(n=1) or True, tmp_dir=tmp_dir) # copy to local in append mode if mode == "a" and self.exists(): self.copy_to_local(tmp) try: yield tmp if tmp.exists(): self.copy_from_local(tmp, perm=perm, dir_perm=dir_perm, **kwargs) else: logger.warning( "cannot move non-existing localized file target {!r}". format(self)) finally: tmp.remove()
def output(self): return LocalFileTarget("gridpacks/{}_{}_{}.tgz".format(os.path.basename(self.repo_path), datetime.strftime(datetime.now(), '%y-%m-%d'),self.checksum))
def output(self): repo_base = os.path.basename(self.get_repo_path()) return LocalFileTarget("{}_{}.tgz".format(repo_base, self.checksum))
def merge_parquet_task(task, inputs, output, local=False, cwd=None, force=True, writer_opts=None): """ This method is intended to be used by tasks that are supposed to merge parquet files, e.g. when inheriting from :py:class:`law.contrib.tasks.MergeCascade`. *inputs* should be a sequence of targets that represent the files to merge into *output*. When *local* is *False* and files need to be copied from remote first, *cwd* can be a set as the dowload directory. When empty, a temporary directory is used. The *task* itself is used to print and publish messages via its :py:meth:`law.Task.publish_message` and :py:meth:`law.Task.publish_step` methods. When *force* is *True*, any existing output file is overwritten. *writer_opts* is forwarded to :py:func:`merge_parquet_files` which is used internally for the actual merging. """ # ensure inputs are targets inputs = [ LocalFileTarget(inp) if isinstance(inp, six.string_types) else inp for inp in inputs ] # ensure output is a target if isinstance(output, six.string_types): output = LocalFileTarget(output) def merge(inputs, output): with task.publish_step("merging {} parquet files ...".format( len(inputs)), runtime=True): # clear the output if necessary if output.exists() and force: output.remove() if len(inputs) == 1: output.copy_from_local(inputs[0]) else: # merge merge_parquet_files([inp.path for inp in inputs], output.path, writer_opts=writer_opts) # print the size output_size = human_bytes(output.stat().st_size, fmt=True) task.publish_message(f"merged file size: {output_size}") if local: # everything is local, just merge merge(inputs, output) else: # when not local, we need to fetch files first into the cwd if not cwd: cwd = LocalDirectoryTarget(is_tmp=True) elif isinstance(cwd, str): cwd = LocalDirectoryTarget(cwd) cwd.touch() # fetch with task.publish_step("fetching inputs ...", runtime=True): def fetch(inp): local_inp = cwd.child(inp.unique_basename, type="f") inp.copy_to_local(local_inp, cache=False) return local_inp def callback(i): task.publish_message("fetch file {} / {}".format( i + 1, len(inputs))) local_inputs = map_verbose(fetch, inputs, every=5, callback=callback) # merge into a localized output with output.localize("w", cache=False) as local_output: merge(local_inputs, local_output)
def hadd_task(task, inputs, output, cwd=None, local=False, force=True): """ This method is intended to be used by tasks that are supposed to merge root files, e.g. when inheriting from :py:class:`law.contrib.tasks.MergeCascade`. *inputs* should be a sequence of local targets that represent the files to merge into *output*. *cwd* is the working directory in which hadd is invoked. When empty, a temporary directory is used. The *task* itself is used to print and publish messages via its :py:meth:`law.Task.publish_message` and :py:meth:`law.Task.publish_step` methods. When *local* is *True*, the input and output targets are assumed to be local and the merging is based on their local paths. Otherwise, the targets are fetched first and the output target is localized. When *force* is *True*, any existing output file is overwritten (by adding the ``-f`` flag to ``hadd``). """ # ensure inputs are targets inputs = [ LocalFileTarget(inp) if isinstance(inp, six.string_types) else inp for inp in inputs ] # ensure output is a target if isinstance(output, six.string_types): output = LocalFileTarget(output) # default cwd if not cwd: cwd = LocalDirectoryTarget(is_tmp=True) elif isinstance(cwd, six.string_types): cwd = LocalDirectoryTarget(cwd) cwd.touch() # helper to create the hadd cmd def hadd_cmd(input_paths, output_path): cmd = ["hadd", "-n", "0"] if force: cmd.append("-f") cmd.extend(["-d", cwd.path]) cmd.append(output_path) cmd.extend(input_paths) return quote_cmd(cmd) if local: # when local, there is no need to download inputs input_paths = [inp.path for inp in inputs] with task.publish_step("merging ...", runtime=True): if len(inputs) == 1: output.copy_from_local(inputs[0]) else: # merge using hadd cmd = hadd_cmd(input_paths, output.path) code = interruptable_popen(cmd, shell=True, executable="/bin/bash")[0] if code != 0: raise Exception("hadd failed") task.publish_message("merged file size: {}".format(human_bytes( output.stat.st_size, fmt=True))) else: # when not local, we need to fetch files first into the cwd with task.publish_step("fetching inputs ...", runtime=True): def fetch(inp): inp.copy_to_local(cwd.child(inp.unique_basename, type="f"), cache=False) return inp.unique_basename def callback(i): task.publish_message("fetch file {} / {}".format(i + 1, len(inputs))) bases = map_verbose(fetch, inputs, every=5, callback=callback) # start merging into the localized output with output.localize("w", cache=False) as tmp_out: with task.publish_step("merging ...", runtime=True): if len(bases) == 1: tmp_out.path = cwd.child(bases[0]).path else: # merge using hadd cmd = hadd_cmd(bases, tmp_out.path) code = interruptable_popen(cmd, shell=True, executable="/bin/bash", cwd=cwd.path)[0] if code != 0: raise Exception("hadd failed") task.publish_message("merged file size: {}".format(human_bytes( tmp_out.stat.st_size, fmt=True)))
def output(self): base = os.path.basename(self.get_cmssw_path()) if self.checksum: base += "{}.".format(self.checksum) return LocalFileTarget("{}.tgz".format(base))
def output(self): return LocalFileTarget(self.output_file)