def infer_source_file(path_or_uri, basedir: SourceFile = None): if isinstance(path_or_uri, SourceFile): if basedir is None or isinstance(path_or_uri, HostingProviderFile): return path_or_uri else: path_or_uri = path_or_uri.get_path_or_uri() if isinstance(path_or_uri, Path): path_or_uri = str(path_or_uri) if not isinstance(path_or_uri, str): raise SourceFileError( "must be given as Python string or one of the predefined source file marker types (see docs)" ) if is_local_file(path_or_uri): # either local file or relative to some remote basedir for schema in ("file://", "file:"): if path_or_uri.startswith(schema): path_or_uri = path_or_uri[len(schema):] break if not os.path.isabs(path_or_uri) and basedir is not None: return basedir.join(path_or_uri) return LocalSourceFile(path_or_uri) if path_or_uri.startswith("git+file:"): try: root_path, file_path, ref = split_git_path(path_or_uri) except Exception as e: raise WorkflowError( f"Failed to read source {path_or_uri} from git repo.", e) return LocalGitFile(root_path, file_path, ref=ref) # something else return GenericSourceFile(path_or_uri)
def get_source(path, basedir=".", wildcards=None, params=None): source = None if not path.startswith("http") and not path.startswith("git+file"): if path.startswith("file://"): path = path[7:] elif path.startswith("file:"): path = path[5:] if not os.path.isabs(path): path = smart_join(basedir, path, abspath=True) if is_local_file(path): path = "file://" + path if wildcards is not None and params is not None: # Format path if wildcards are given. path = format(path, wildcards=wildcards, params=params) if path.startswith("file://"): sourceurl = "file:" + pathname2url(path[7:]) elif path.startswith("git+file"): source = git_content(path).encode() (root_path, file_path, version) = split_git_path(path) path = path.rstrip("@" + version) else: sourceurl = path if source is None: with urlopen(sourceurl) as source: source = source.read() language = get_language(path, source) return path, source, language
def get_source(path, basedir="."): source = None if not path.startswith("http") and not path.startswith("git+file"): if path.startswith("file://"): path = path[7:] elif path.startswith("file:"): path = path[5:] if not os.path.isabs(path): path = os.path.abspath(os.path.join(basedir, path)) path = "file://" + path path = format(path, stepout=1) if path.startswith("file://"): sourceurl = "file:" + pathname2url(path[7:]) elif path.startswith("git+file"): source = git_content(path) (root_path, file_path, version) = split_git_path(path) path = path.rstrip("@" + version) else: sourceurl = path language = None if path.endswith(".py"): language = "python" elif path.endswith(".R"): language = "r" elif path.endswith(".Rmd"): language = "rmarkdown" elif path.endswith(".jl"): language = "julia" if source is None: with urlopen(sourceurl) as source: return path, source.read(), language else: return path, source, language
def get_source(path, basedir="."): source = None if not path.startswith("http") and not path.startswith("git+file"): if path.startswith("file://"): path = path[7:] elif path.startswith("file:"): path = path[5:] if not os.path.isabs(path): path = os.path.abspath(os.path.join(basedir, path)) path = "file://" + path # TODO this should probably be removed again. It does not work for report and hash! path = format(path, stepout=1) if path.startswith("file://"): sourceurl = "file:" + pathname2url(path[7:]) elif path.startswith("git+file"): source = git_content(path).encode() (root_path, file_path, version) = split_git_path(path) path = path.rstrip("@" + version) else: sourceurl = path if source is None: with urlopen(sourceurl) as source: source = source.read() language = get_language(path, source) return path, source, language
def get_source(path, basedir="."): import nbformat source = None if not path.startswith("http") and not path.startswith("git+file"): if path.startswith("file://"): path = path[7:] elif path.startswith("file:"): path = path[5:] if not os.path.isabs(path): path = os.path.abspath(os.path.join(basedir, path)) path = "file://" + path path = format(path, stepout=1) if path.startswith("file://"): sourceurl = "file:" + pathname2url(path[7:]) elif path.startswith("git+file"): source = git_content(path) (root_path, file_path, version) = split_git_path(path) path = path.rstrip("@" + version) else: sourceurl = path if source is None: with urlopen(sourceurl) as source: source = source.read() language = None if path.endswith(".py"): language = "python" elif path.endswith(".ipynb"): language = "jupyter" elif path.endswith(".R"): language = "r" elif path.endswith(".Rmd"): language = "rmarkdown" elif path.endswith(".jl"): language = "julia" # detect kernel language for Jupyter Notebooks if language == "jupyter": nb = nbformat.reads(source, as_version=nbformat.NO_CONVERT) kernel_language = nb["metadata"]["language_info"]["name"] language += "_" + kernel_language.lower() return path, source, language
def script(path, basedir, input, output, params, wildcards, threads, resources, log, config, rulename, conda_env, singularity_img, singularity_args, bench_record, jobid, bench_iteration, shadow_dir): """ Load a script from the given basedir + path and execute it. Supports Python 3 and R. """ if not path.startswith("http") and not path.startswith("git+file"): if path.startswith("file://"): path = path[7:] elif path.startswith("file:"): path = path[5:] if not os.path.isabs(path): path = os.path.abspath(os.path.join(basedir, path)) path = "file://" + path path = format(path, stepout=1) if path.startswith("file://"): sourceurl = "file:" + pathname2url(path[7:]) elif path.startswith("git+file"): (root_path, file_path, version) = split_git_path(path) dir = ".snakemake/wrappers" os.makedirs(dir, exist_ok=True) new_path = os.path.join(dir, version + "-" + "-".join(file_path.split("/"))) with open(new_path, 'w') as wrapper: wrapper.write(git_content(path)) sourceurl = "file:" + new_path path = path.rstrip("@" + version) else: sourceurl = path f = None try: with urlopen(sourceurl) as source: if path.endswith(".py"): wrapper_path = path[7:] if path.startswith("file://") else path snakemake = Snakemake(input, output, params, wildcards, threads, resources, log, config, rulename, bench_iteration, os.path.dirname(wrapper_path)) snakemake = pickle.dumps(snakemake) # Obtain search path for current snakemake module. # The module is needed for unpickling in the script. # We append it at the end (as a fallback). searchpath = SNAKEMAKE_SEARCHPATH if singularity_img is not None: searchpath = singularity.SNAKEMAKE_MOUNTPOINT searchpath = '"{}"'.format(searchpath) # For local scripts, add their location to the path in case they use path-based imports if path.startswith("file://"): searchpath += ', "{}"'.format(os.path.dirname(path[7:])) preamble = textwrap.dedent(""" ######## Snakemake header ######## import sys; sys.path.extend([{searchpath}]); import pickle; snakemake = pickle.loads({snakemake}); from snakemake.logging import logger; logger.printshellcmds = {printshellcmds}; __real_file__ = __file__; __file__ = {file_override}; ######## Original script ######### """).format(searchpath=escape_backslash(searchpath), snakemake=snakemake, printshellcmds=logger.printshellcmds, file_override=repr(os.path.realpath(wrapper_path))) elif path.endswith(".R") or path.endswith(".Rmd"): preamble = textwrap.dedent(""" ######## Snakemake header ######## library(methods) Snakemake <- setClass( "Snakemake", slots = c( input = "list", output = "list", params = "list", wildcards = "list", threads = "numeric", log = "list", resources = "list", config = "list", rule = "character", bench_iteration = "numeric", scriptdir = "character", source = "function" ) ) snakemake <- Snakemake( input = {}, output = {}, params = {}, wildcards = {}, threads = {}, log = {}, resources = {}, config = {}, rule = {}, bench_iteration = {}, scriptdir = {}, source = function(...){{ wd <- getwd() setwd(snakemake@scriptdir) source(...) setwd(wd) }} ) ######## Original script ######### """).format( REncoder.encode_namedlist(input), REncoder.encode_namedlist(output), REncoder.encode_namedlist(params), REncoder.encode_namedlist(wildcards), threads, REncoder.encode_namedlist(log), REncoder.encode_namedlist({ name: value for name, value in resources.items() if name != "_cores" and name != "_nodes" }), REncoder.encode_dict(config), REncoder.encode_value(rulename), REncoder.encode_numeric(bench_iteration), REncoder.encode_value( os.path.dirname(path[7:]) if path. startswith("file://") else os.path.dirname(path))) else: raise ValueError( "Unsupported script: Expecting either Python (.py), R (.R) or RMarkdown (.Rmd) script." ) dir = ".snakemake/scripts" os.makedirs(dir, exist_ok=True) with tempfile.NamedTemporaryFile(suffix="." + os.path.basename(path), dir=dir, delete=False) as f: if not path.endswith(".Rmd"): f.write(preamble.encode()) f.write(source.read()) else: # Insert Snakemake object after the RMarkdown header code = source.read().decode() pos = next(islice(re.finditer(r"---\n", code), 1, 2)).start() + 3 f.write(str.encode(code[:pos])) preamble = textwrap.dedent(""" ```{r, echo=FALSE, message=FALSE, warning=FALSE} %s ``` """ % preamble) f.write(preamble.encode()) f.write(str.encode(code[pos:])) if path.endswith(".py"): py_exec = sys.executable if conda_env is not None: py = os.path.join(conda_env, "bin", "python") if os.path.exists(py): out = subprocess.check_output([py, "--version"], stderr=subprocess.STDOUT, universal_newlines=True) ver = tuple( map( int, PY_VER_RE.match(out).group("ver_min").split( "."))) if ver >= MIN_PY_VERSION: # Python version is new enough, make use of environment # to execute script py_exec = "python" else: logger.warning( "Conda environment defines Python " "version < {0}.{1}. Using Python of the " "master process to execute " "script. Note that this cannot be avoided, " "because the script uses data structures from " "Snakemake which are Python >={0}.{1} " "only.".format(*MIN_PY_VERSION)) if singularity_img is not None: # use python from image py_exec = "python" # use the same Python as the running process or the one from the environment shell("{py_exec} {f.name:q}", bench_record=bench_record) elif path.endswith(".R"): if conda_env is not None and "R_LIBS" in os.environ: logger.warning("R script job uses conda environment but " "R_LIBS environment variable is set. This " "is likely not intended, as R_LIBS can " "interfere with R packages deployed via " "conda. Consider running `unset R_LIBS` or " "remove it entirely before executing " "Snakemake.") shell("Rscript --vanilla {f.name:q}", bench_record=bench_record) elif path.endswith(".Rmd"): if len(output) != 1: raise WorkflowError( "RMarkdown scripts (.Rmd) may only have a single output file." ) out = os.path.abspath(output[0]) shell( "Rscript --vanilla -e 'rmarkdown::render(\"{f.name}\", output_file=\"{out}\", quiet=TRUE, knit_root_dir = \"{workdir}\", params = list(rmd=\"{f.name}\"))'", bench_record=bench_record, workdir=os.getcwd()) except URLError as e: raise WorkflowError(e) finally: if f: os.remove(f.name)