示例#1
0
def get_source(path, basedir=".", wildcards=None, params=None):
    source = None
    if not path.startswith("http") and not path.startswith("git+file"):
        if path.startswith("file://"):
            path = path[7:]
        elif path.startswith("file:"):
            path = path[5:]
        if not os.path.isabs(path):
            path = smart_join(basedir, path, abspath=True)
        if is_local_file(path):
            path = "file://" + path
    if wildcards is not None and params is not None:
        # Format path if wildcards are given.
        path = format(path, wildcards=wildcards, params=params)
    if path.startswith("file://"):
        sourceurl = "file:" + pathname2url(path[7:])
    elif path.startswith("git+file"):
        source = git_content(path).encode()
        (root_path, file_path, version) = split_git_path(path)
        path = path.rstrip("@" + version)
    else:
        sourceurl = path

    if source is None:
        with urlopen(sourceurl) as source:
            source = source.read()

    language = get_language(path, source)

    return path, source, language
示例#2
0
def get_source(path, basedir="."):
    source = None
    if not path.startswith("http") and not path.startswith("git+file"):
        if path.startswith("file://"):
            path = path[7:]
        elif path.startswith("file:"):
            path = path[5:]
        if not os.path.isabs(path):
            path = os.path.abspath(os.path.join(basedir, path))
        path = "file://" + path
    path = format(path, stepout=1)
    if path.startswith("file://"):
        sourceurl = "file:" + pathname2url(path[7:])
    elif path.startswith("git+file"):
        source = git_content(path)
        (root_path, file_path, version) = split_git_path(path)
        path = path.rstrip("@" + version)
    else:
        sourceurl = path

    language = None
    if path.endswith(".py"):
        language = "python"
    elif path.endswith(".R"):
        language = "r"
    elif path.endswith(".Rmd"):
        language = "rmarkdown"
    elif path.endswith(".jl"):
        language = "julia"

    if source is None:
        with urlopen(sourceurl) as source:
            return path, source.read(), language
    else:
        return path, source, language
示例#3
0
def get_source(path, basedir="."):
    source = None
    if not path.startswith("http") and not path.startswith("git+file"):
        if path.startswith("file://"):
            path = path[7:]
        elif path.startswith("file:"):
            path = path[5:]
        if not os.path.isabs(path):
            path = os.path.abspath(os.path.join(basedir, path))
        path = "file://" + path
    # TODO this should probably be removed again. It does not work for report and hash!
    path = format(path, stepout=1)
    if path.startswith("file://"):
        sourceurl = "file:" + pathname2url(path[7:])
    elif path.startswith("git+file"):
        source = git_content(path).encode()
        (root_path, file_path, version) = split_git_path(path)
        path = path.rstrip("@" + version)
    else:
        sourceurl = path

    if source is None:
        with urlopen(sourceurl) as source:
            source = source.read()

    language = get_language(path, source)

    return path, source, language
示例#4
0
    def _open(self, path_or_uri, mode):
        from smart_open import open

        if str(path_or_uri).startswith("git+file:"):
            return io.BytesIO(git_content(path_or_uri).encode())

        try:
            return open(path_or_uri, mode)
        except Exception as e:
            raise WorkflowError(
                "Failed to open source file {}".format(path_or_uri), e)
示例#5
0
def content(env_file):
    if env_file.startswith("git+file:"):
        return git_content(env_file).encode('utf-8')
    elif urlparse(env_file).scheme:
        try:
            return urlopen(env_file).read()
        except URLError as e:
            raise WorkflowError("Failed to open environment file {}:".format(env_file), e)
    else:
        if not os.path.exists(env_file):
            raise WorkflowError("Conda env file does not "
                                "exist: {}".format(env_file))
        with open(env_file, 'rb') as f:
            return f.read()
示例#6
0
def get_source(path, basedir="."):
    import nbformat

    source = None
    if not path.startswith("http") and not path.startswith("git+file"):
        if path.startswith("file://"):
            path = path[7:]
        elif path.startswith("file:"):
            path = path[5:]
        if not os.path.isabs(path):
            path = os.path.abspath(os.path.join(basedir, path))
        path = "file://" + path
    path = format(path, stepout=1)
    if path.startswith("file://"):
        sourceurl = "file:" + pathname2url(path[7:])
    elif path.startswith("git+file"):
        source = git_content(path)
        (root_path, file_path, version) = split_git_path(path)
        path = path.rstrip("@" + version)
    else:
        sourceurl = path

    if source is None:
        with urlopen(sourceurl) as source:
            source = source.read()

    language = None
    if path.endswith(".py"):
        language = "python"
    elif path.endswith(".ipynb"):
        language = "jupyter"
    elif path.endswith(".R"):
        language = "r"
    elif path.endswith(".Rmd"):
        language = "rmarkdown"
    elif path.endswith(".jl"):
        language = "julia"

    # detect kernel language for Jupyter Notebooks
    if language == "jupyter":
        nb = nbformat.reads(source, as_version=nbformat.NO_CONVERT)
        kernel_language = nb["metadata"]["language_info"]["name"]

        language += "_" + kernel_language.lower()

    return path, source, language
示例#7
0
def script(path, basedir, input, output, params, wildcards, threads, resources,
           log, config, rulename, conda_env, singularity_img, singularity_args,
           bench_record, jobid, bench_iteration, shadow_dir):
    """
    Load a script from the given basedir + path and execute it.
    Supports Python 3 and R.
    """
    if not path.startswith("http") and not path.startswith("git+file"):
        if path.startswith("file://"):
            path = path[7:]
        elif path.startswith("file:"):
            path = path[5:]
        if not os.path.isabs(path):
            path = os.path.abspath(os.path.join(basedir, path))
        path = "file://" + path
    path = format(path, stepout=1)
    if path.startswith("file://"):
        sourceurl = "file:" + pathname2url(path[7:])
    elif path.startswith("git+file"):
        (root_path, file_path, version) = split_git_path(path)
        dir = ".snakemake/wrappers"
        os.makedirs(dir, exist_ok=True)
        new_path = os.path.join(dir,
                                version + "-" + "-".join(file_path.split("/")))
        with open(new_path, 'w') as wrapper:
            wrapper.write(git_content(path))
            sourceurl = "file:" + new_path
            path = path.rstrip("@" + version)
    else:
        sourceurl = path

    f = None
    try:
        with urlopen(sourceurl) as source:
            if path.endswith(".py"):
                wrapper_path = path[7:] if path.startswith("file://") else path
                snakemake = Snakemake(input, output, params, wildcards,
                                      threads, resources, log, config,
                                      rulename, bench_iteration,
                                      os.path.dirname(wrapper_path))
                snakemake = pickle.dumps(snakemake)
                # Obtain search path for current snakemake module.
                # The module is needed for unpickling in the script.
                # We append it at the end (as a fallback).
                searchpath = SNAKEMAKE_SEARCHPATH
                if singularity_img is not None:
                    searchpath = singularity.SNAKEMAKE_MOUNTPOINT
                searchpath = '"{}"'.format(searchpath)
                # For local scripts, add their location to the path in case they use path-based imports
                if path.startswith("file://"):
                    searchpath += ', "{}"'.format(os.path.dirname(path[7:]))
                preamble = textwrap.dedent("""
                ######## Snakemake header ########
                import sys; sys.path.extend([{searchpath}]); import pickle; snakemake = pickle.loads({snakemake}); from snakemake.logging import logger; logger.printshellcmds = {printshellcmds}; __real_file__ = __file__; __file__ = {file_override};
                ######## Original script #########
                """).format(searchpath=escape_backslash(searchpath),
                            snakemake=snakemake,
                            printshellcmds=logger.printshellcmds,
                            file_override=repr(os.path.realpath(wrapper_path)))
            elif path.endswith(".R") or path.endswith(".Rmd"):
                preamble = textwrap.dedent("""
                ######## Snakemake header ########
                library(methods)
                Snakemake <- setClass(
                    "Snakemake",
                    slots = c(
                        input = "list",
                        output = "list",
                        params = "list",
                        wildcards = "list",
                        threads = "numeric",
                        log = "list",
                        resources = "list",
                        config = "list",
                        rule = "character",
                        bench_iteration = "numeric",
                        scriptdir = "character",
                        source = "function"
                    )
                )
                snakemake <- Snakemake(
                    input = {},
                    output = {},
                    params = {},
                    wildcards = {},
                    threads = {},
                    log = {},
                    resources = {},
                    config = {},
                    rule = {},
                    bench_iteration = {},
                    scriptdir = {},
                    source = function(...){{
                        wd <- getwd()
                        setwd(snakemake@scriptdir)
                        source(...)
                        setwd(wd)
                    }}
                )

                ######## Original script #########
                """).format(
                    REncoder.encode_namedlist(input),
                    REncoder.encode_namedlist(output),
                    REncoder.encode_namedlist(params),
                    REncoder.encode_namedlist(wildcards), threads,
                    REncoder.encode_namedlist(log),
                    REncoder.encode_namedlist({
                        name: value
                        for name, value in resources.items()
                        if name != "_cores" and name != "_nodes"
                    }), REncoder.encode_dict(config),
                    REncoder.encode_value(rulename),
                    REncoder.encode_numeric(bench_iteration),
                    REncoder.encode_value(
                        os.path.dirname(path[7:]) if path.
                        startswith("file://") else os.path.dirname(path)))
            else:
                raise ValueError(
                    "Unsupported script: Expecting either Python (.py), R (.R) or RMarkdown (.Rmd) script."
                )

            dir = ".snakemake/scripts"
            os.makedirs(dir, exist_ok=True)

            with tempfile.NamedTemporaryFile(suffix="." +
                                             os.path.basename(path),
                                             dir=dir,
                                             delete=False) as f:
                if not path.endswith(".Rmd"):
                    f.write(preamble.encode())
                    f.write(source.read())
                else:
                    # Insert Snakemake object after the RMarkdown header
                    code = source.read().decode()
                    pos = next(islice(re.finditer(r"---\n", code), 1,
                                      2)).start() + 3
                    f.write(str.encode(code[:pos]))
                    preamble = textwrap.dedent("""
                        ```{r, echo=FALSE, message=FALSE, warning=FALSE}
                        %s
                        ```
                        """ % preamble)
                    f.write(preamble.encode())
                    f.write(str.encode(code[pos:]))

            if path.endswith(".py"):
                py_exec = sys.executable
                if conda_env is not None:
                    py = os.path.join(conda_env, "bin", "python")
                    if os.path.exists(py):
                        out = subprocess.check_output([py, "--version"],
                                                      stderr=subprocess.STDOUT,
                                                      universal_newlines=True)
                        ver = tuple(
                            map(
                                int,
                                PY_VER_RE.match(out).group("ver_min").split(
                                    ".")))
                        if ver >= MIN_PY_VERSION:
                            # Python version is new enough, make use of environment
                            # to execute script
                            py_exec = "python"
                        else:
                            logger.warning(
                                "Conda environment defines Python "
                                "version < {0}.{1}. Using Python of the "
                                "master process to execute "
                                "script. Note that this cannot be avoided, "
                                "because the script uses data structures from "
                                "Snakemake which are Python >={0}.{1} "
                                "only.".format(*MIN_PY_VERSION))
                if singularity_img is not None:
                    # use python from image
                    py_exec = "python"
                # use the same Python as the running process or the one from the environment
                shell("{py_exec} {f.name:q}", bench_record=bench_record)
            elif path.endswith(".R"):
                if conda_env is not None and "R_LIBS" in os.environ:
                    logger.warning("R script job uses conda environment but "
                                   "R_LIBS environment variable is set. This "
                                   "is likely not intended, as R_LIBS can "
                                   "interfere with R packages deployed via "
                                   "conda. Consider running `unset R_LIBS` or "
                                   "remove it entirely before executing "
                                   "Snakemake.")
                shell("Rscript --vanilla {f.name:q}",
                      bench_record=bench_record)
            elif path.endswith(".Rmd"):
                if len(output) != 1:
                    raise WorkflowError(
                        "RMarkdown scripts (.Rmd) may only have a single output file."
                    )
                out = os.path.abspath(output[0])
                shell(
                    "Rscript --vanilla -e 'rmarkdown::render(\"{f.name}\", output_file=\"{out}\", quiet=TRUE, knit_root_dir = \"{workdir}\", params = list(rmd=\"{f.name}\"))'",
                    bench_record=bench_record,
                    workdir=os.getcwd())

    except URLError as e:
        raise WorkflowError(e)
    finally:
        if f:
            os.remove(f.name)