Пример #1
0
    def open(self, path, mode, cache=None, **kwargs):
        if cache is None:
            cache = self.cache is not None
        elif cache and self.cache is None:
            cache = False

        path = self.abspath(path)

        yield_path = kwargs.pop("_yield_path", False)

        if mode == "r":
            if cache:
                lpath = self._cached_copy(path, None, cache=True, **kwargs)
                lpath = remove_scheme(lpath)
            else:
                tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True)
                lpath = tmp.path

                self._cached_copy(path,
                                  add_scheme(lpath, "file"),
                                  cache=False,
                                  **kwargs)
            try:
                if yield_path:
                    yield lpath
                else:
                    f = open(lpath, "r")
                    yield f
                    if not f.closed:
                        f.close()
            finally:
                if not cache:
                    del tmp

        elif mode == "w":
            tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True)
            lpath = tmp.path

            try:
                if yield_path:
                    yield lpath
                else:
                    f = open(lpath, "w")
                    yield f
                    if not f.closed:
                        f.close()

                if tmp.exists():
                    self._cached_copy(add_scheme(lpath, "file"),
                                      path,
                                      cache=cache,
                                      **kwargs)
            finally:
                del tmp

        else:
            raise Exception("unknown mode {}, use r or w".format(mode))
Пример #2
0
    def open(self, path, mode, cache=None, **kwargs):
        if cache is None:
            cache = self.cache is not None
        elif cache and self.cache is None:
            cache = False

        yield_path = kwargs.pop("_yield_path", False)

        path = self.abspath(path)
        tmp = None

        if mode == "r":
            if cache:
                lpath = self._cached_copy(path, None, cache=True, **kwargs)
                lpath = remove_scheme(lpath)
            else:
                tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True)
                lpath = tmp.path

                self._cached_copy(path,
                                  add_scheme(lpath, "file"),
                                  cache=False,
                                  **kwargs)

            def cleanup():
                if not cache and tmp.exists():
                    tmp.remove()

            f = lpath if yield_path else open(lpath, "r")
            return RemoteFileProxy(f, success_fn=cleanup, failure_fn=cleanup)

        elif mode == "w":
            tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True)
            lpath = tmp.path

            def cleanup():
                if tmp.exists():
                    tmp.remove()

            def copy_and_cleanup():
                try:
                    if tmp.exists():
                        self._cached_copy(add_scheme(lpath, "file"),
                                          path,
                                          cache=cache,
                                          **kwargs)
                finally:
                    cleanup()

            f = lpath if yield_path else open(lpath, "w")
            return RemoteFileProxy(f,
                                   success_fn=copy_and_cleanup,
                                   failure_fn=cleanup)

        else:
            raise Exception("unknown mode {}, use r or w".format(mode))
Пример #3
0
    def open(self, path, mode, perm=None, dir_perm=None, cache=None, **kwargs):
        if self.cache is None:
            cache = False
        elif cache is None:
            cache = self.use_cache
        else:
            cache = bool(cache)

        yield_path = kwargs.pop("_yield_path", False)
        path = self.abspath(path)
        tmp = None
        read_mode = mode.startswith("r")

        if read_mode:
            if cache:
                lpath = self._cached_copy(path, None, cache=cache, **kwargs)
            else:
                tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True)
                lpath = self.copy(path, tmp.uri(), cache=cache, **kwargs)
            lpath = remove_scheme(lpath)

            def cleanup():
                if not cache and tmp and tmp.exists():
                    tmp.remove()

            f = lpath if yield_path else open(lpath, mode)
            return RemoteFileProxy(f, success_fn=cleanup, failure_fn=cleanup)

        else:  # write or update
            tmp = LocalFileTarget(is_tmp=self.ext(path, n=0) or True)
            lpath = tmp.path

            def cleanup():
                tmp.remove(silent=True)

            def copy_and_cleanup():
                exists = True
                try:
                    exists = tmp.exists()
                    if exists:
                        self.copy(tmp.uri(),
                                  path,
                                  perm=perm,
                                  dir_perm=dir_perm,
                                  cache=cache,
                                  **kwargs)
                finally:
                    if exists:
                        tmp.remove(silent=True)

            f = lpath if yield_path else open(lpath, mode)
            return RemoteFileProxy(f,
                                   success_fn=copy_and_cleanup,
                                   failure_fn=cleanup)
Пример #4
0
    def env(self):
        # strategy: create a tempfile, forward it to a container, let python dump its full env,
        # close the container and load the env file
        if self.image not in self._envs:
            tmp = LocalFileTarget(is_tmp=".env")
            tmp.touch()

            env_file = os.path.join("/tmp", tmp.unique_basename)

            # get the docker run command
            docker_run_cmd = self._docker_run_cmd()

            # mount the env file
            docker_run_cmd.extend(["-v", "{}:{}".format(tmp.path, env_file)])

            # build commands to setup the environment
            setup_cmds = self._build_setup_cmds(self._get_env())

            # build the python command that dumps the environment
            py_cmd = "import os,pickle;" \
                + "pickle.dump(dict(os.environ),open('{}','wb'),protocol=2)".format(env_file)

            # build the full command
            cmd = quote_cmd(docker_run_cmd + [
                self.image,
                "bash",
                "-l",
                "-c",
                "; ".join(
                    flatten(setup_cmds, quote_cmd(["python", "-c", py_cmd]))),
            ])

            # run it
            code, out, _ = interruptable_popen(cmd,
                                               shell=True,
                                               executable="/bin/bash",
                                               stdout=subprocess.PIPE,
                                               stderr=subprocess.STDOUT)
            if code != 0:
                raise Exception(
                    "docker sandbox env loading failed:\n{}".format(out))

            # load the environment from the tmp file
            env = tmp.load(formatter="pickle")

            # cache
            self._envs[self.image] = env

        return self._envs[self.image]
Пример #5
0
def log(fn, opts, task, *args, **kwargs):
    """ log()
    Wraps a bound method of a task and redirects output of both stdin and stdout to the file
    defined by the tasks's *log* parameter. It its value is ``"-"`` or *None*, the output is not
    redirected.
    """
    _task = get_task(task)
    log = get_param(_task.log_file, _task.default_log_file)

    if log == "-" or not log:
        return fn(task, *args, **kwargs)
    else:
        # use the local target functionality to create the parent directory
        LocalFileTarget(log).parent.touch()
        with open(log, "a", 1) as f:
            tee = TeeStream(f, sys.__stdout__)
            sys.stdout = tee
            sys.stderr = tee
            try:
                ret = fn(task, *args, **kwargs)
            except Exception as e:
                traceback.print_exc(file=tee)
                raise e
            finally:
                sys.stdout = sys.__stdout__
                sys.stderr = sys.__stderr__
                tee.flush()
        return ret
Пример #6
0
 def get_source_target(self):
     # when self.source_path is set, return a target around it
     # otherwise assume self.requires() returns a task with a single local target
     if self.source_path not in (NO_STR, None):
         return LocalFileTarget(self.source_path)
     else:
         return self.input()
Пример #7
0
    def localize(self, mode="r", perm=None, parent_perm=None, **kwargs):
        if mode not in ("r", "w"):
            raise Exception("unknown mode '{}', use r or w".format(mode))

        if mode == "r":
            with self.fs.open(self.path, "r", _yield_path=True, **kwargs) as lpath:
                yield LocalFileTarget(lpath)

        else:  # w
            tmp = LocalFileTarget(is_tmp=self.ext() or True)

            try:
                yield tmp

                if tmp.exists():
                    self.copy_from_local(tmp, dir_perm=parent_perm, **kwargs)
                    self.chmod(perm)
                else:
                    logger.warning("cannot move non-existing localized file target {!r}".format(
                        self))
            finally:
                del tmp
Пример #8
0
 def localize(self,
              path=None,
              is_tmp=True,
              skip_parent=False,
              mode=0o0660,
              cache=True,
              retry=None):
     tmp = LocalFileTarget(path, is_tmp=is_tmp)
     try:
         yield tmp
         if not skip_parent:
             self.parent.touch()
         self.put(tmp, cache=cache, retry=retry)
         self.chmod(mode, retry=retry)
     finally:
         del tmp
Пример #9
0
    def localize(self,
                 mode="r",
                 perm=None,
                 dir_perm=None,
                 tmp_dir=None,
                 **kwargs):
        if mode not in ["r", "w", "a"]:
            raise Exception(
                "unknown mode '{}', use 'r', 'w' or 'a'".format(mode))

        logger.debug("localizing file target {!r} with mode '{}'".format(
            self, mode))

        if mode == "r":
            with self.fs.open(self.path,
                              "r",
                              _yield_path=True,
                              perm=perm,
                              **kwargs) as lpath:
                yield LocalFileTarget(lpath)

        else:  # mode "w" or "a"
            tmp = LocalFileTarget(is_tmp=self.ext(n=1) or True,
                                  tmp_dir=tmp_dir)

            # copy to local in append mode
            if mode == "a" and self.exists():
                self.copy_to_local(tmp)

            try:
                yield tmp

                if tmp.exists():
                    self.copy_from_local(tmp,
                                         perm=perm,
                                         dir_perm=dir_perm,
                                         **kwargs)
                else:
                    logger.warning(
                        "cannot move non-existing localized file target {!r}".
                        format(self))
            finally:
                tmp.remove()
Пример #10
0
 def output(self):
     return LocalFileTarget("gridpacks/{}_{}_{}.tgz".format(os.path.basename(self.repo_path),
                                                  datetime.strftime(datetime.now(), '%y-%m-%d'),self.checksum))
Пример #11
0
 def output(self):
     repo_base = os.path.basename(self.get_repo_path())
     return LocalFileTarget("{}_{}.tgz".format(repo_base, self.checksum))
Пример #12
0
def merge_parquet_task(task,
                       inputs,
                       output,
                       local=False,
                       cwd=None,
                       force=True,
                       writer_opts=None):
    """
    This method is intended to be used by tasks that are supposed to merge parquet files, e.g. when
    inheriting from :py:class:`law.contrib.tasks.MergeCascade`. *inputs* should be a sequence of
    targets that represent the files to merge into *output*. When *local* is *False* and files need
    to be copied from remote first, *cwd* can be a set as the dowload directory. When empty, a
    temporary directory is used. The *task* itself is used to print and publish messages via its
    :py:meth:`law.Task.publish_message` and :py:meth:`law.Task.publish_step` methods. When *force*
    is *True*, any existing output file is overwritten. *writer_opts* is forwarded to
    :py:func:`merge_parquet_files` which is used internally for the actual merging.
    """
    # ensure inputs are targets
    inputs = [
        LocalFileTarget(inp) if isinstance(inp, six.string_types) else inp
        for inp in inputs
    ]

    # ensure output is a target
    if isinstance(output, six.string_types):
        output = LocalFileTarget(output)

    def merge(inputs, output):
        with task.publish_step("merging {} parquet files ...".format(
                len(inputs)),
                               runtime=True):
            # clear the output if necessary
            if output.exists() and force:
                output.remove()

            if len(inputs) == 1:
                output.copy_from_local(inputs[0])
            else:
                # merge
                merge_parquet_files([inp.path for inp in inputs],
                                    output.path,
                                    writer_opts=writer_opts)

        # print the size
        output_size = human_bytes(output.stat().st_size, fmt=True)
        task.publish_message(f"merged file size: {output_size}")

    if local:
        # everything is local, just merge
        merge(inputs, output)

    else:
        # when not local, we need to fetch files first into the cwd
        if not cwd:
            cwd = LocalDirectoryTarget(is_tmp=True)
        elif isinstance(cwd, str):
            cwd = LocalDirectoryTarget(cwd)
        cwd.touch()

        # fetch
        with task.publish_step("fetching inputs ...", runtime=True):

            def fetch(inp):
                local_inp = cwd.child(inp.unique_basename, type="f")
                inp.copy_to_local(local_inp, cache=False)
                return local_inp

            def callback(i):
                task.publish_message("fetch file {} / {}".format(
                    i + 1, len(inputs)))

            local_inputs = map_verbose(fetch,
                                       inputs,
                                       every=5,
                                       callback=callback)

        # merge into a localized output
        with output.localize("w", cache=False) as local_output:
            merge(local_inputs, local_output)
Пример #13
0
def hadd_task(task, inputs, output, cwd=None, local=False, force=True):
    """
    This method is intended to be used by tasks that are supposed to merge root files, e.g. when
    inheriting from :py:class:`law.contrib.tasks.MergeCascade`. *inputs* should be a sequence of
    local targets that represent the files to merge into *output*. *cwd* is the working directory
    in which hadd is invoked. When empty, a temporary directory is used. The *task* itself is
    used to print and publish messages via its :py:meth:`law.Task.publish_message` and
    :py:meth:`law.Task.publish_step` methods.

    When *local* is *True*, the input and output targets are assumed to be local and the merging is
    based on their local paths. Otherwise, the targets are fetched first and the output target is
    localized.

    When *force* is *True*, any existing output file is overwritten (by adding the ``-f`` flag to
    ``hadd``).
    """
    # ensure inputs are targets
    inputs = [
        LocalFileTarget(inp) if isinstance(inp, six.string_types) else inp
        for inp in inputs
    ]

    # ensure output is a target
    if isinstance(output, six.string_types):
        output = LocalFileTarget(output)

    # default cwd
    if not cwd:
        cwd = LocalDirectoryTarget(is_tmp=True)
    elif isinstance(cwd, six.string_types):
        cwd = LocalDirectoryTarget(cwd)
    cwd.touch()

    # helper to create the hadd cmd
    def hadd_cmd(input_paths, output_path):
        cmd = ["hadd", "-n", "0"]
        if force:
            cmd.append("-f")
        cmd.extend(["-d", cwd.path])
        cmd.append(output_path)
        cmd.extend(input_paths)
        return quote_cmd(cmd)

    if local:
        # when local, there is no need to download inputs
        input_paths = [inp.path for inp in inputs]

        with task.publish_step("merging ...", runtime=True):
            if len(inputs) == 1:
                output.copy_from_local(inputs[0])
            else:
                # merge using hadd
                cmd = hadd_cmd(input_paths, output.path)
                code = interruptable_popen(cmd, shell=True, executable="/bin/bash")[0]
                if code != 0:
                    raise Exception("hadd failed")

        task.publish_message("merged file size: {}".format(human_bytes(
            output.stat.st_size, fmt=True)))

    else:
        # when not local, we need to fetch files first into the cwd
        with task.publish_step("fetching inputs ...", runtime=True):
            def fetch(inp):
                inp.copy_to_local(cwd.child(inp.unique_basename, type="f"), cache=False)
                return inp.unique_basename

            def callback(i):
                task.publish_message("fetch file {} / {}".format(i + 1, len(inputs)))

            bases = map_verbose(fetch, inputs, every=5, callback=callback)

        # start merging into the localized output
        with output.localize("w", cache=False) as tmp_out:
            with task.publish_step("merging ...", runtime=True):
                if len(bases) == 1:
                    tmp_out.path = cwd.child(bases[0]).path
                else:
                    # merge using hadd
                    cmd = hadd_cmd(bases, tmp_out.path)
                    code = interruptable_popen(cmd, shell=True, executable="/bin/bash",
                        cwd=cwd.path)[0]
                    if code != 0:
                        raise Exception("hadd failed")

                    task.publish_message("merged file size: {}".format(human_bytes(
                        tmp_out.stat.st_size, fmt=True)))
Пример #14
0
 def output(self):
     base = os.path.basename(self.get_cmssw_path())
     if self.checksum:
         base += "{}.".format(self.checksum)
     return LocalFileTarget("{}.tgz".format(base))
Пример #15
0
 def output(self):
     return LocalFileTarget(self.output_file)