def format_wildcards(self, string, **variables): """ Format a string with variables from the job. """ _variables = dict() _variables.update(self.rule.workflow.globals) _variables.update( dict( input=self.input, output=self.output, params=self.params, wildcards=self._format_wildcards, threads=self.threads, resources=self.resources, log=self.log, jobid=self.jobid, version=self.rule.version, name=self.name, rule=self.rule.name, rulename=self.rule.name, bench_iteration=None, )) _variables.update(variables) try: return format(string, **_variables) except NameError as ex: raise RuleException("NameError: " + str(ex), rule=self.rule) except IndexError as ex: raise RuleException("IndexError: " + str(ex), rule=self.rule)
def decorate(ruleinfo): if ruleinfo.input: rule.set_input(*ruleinfo.input[0], **ruleinfo.input[1]) if ruleinfo.output: rule.set_output(*ruleinfo.output[0], **ruleinfo.output[1]) if ruleinfo.params: rule.set_params(*ruleinfo.params[0], **ruleinfo.params[1]) if ruleinfo.threads: if not isinstance(ruleinfo.threads, int) and not callable( ruleinfo.threads): raise RuleException( "Threads value has to be an integer or a callable.", rule=rule) rule.resources["_cores"] = ruleinfo.threads if ruleinfo.shadow_depth: if ruleinfo.shadow_depth not in (True, "shallow", "full"): raise RuleException( "Shadow must either be 'shallow', 'full', " "or True (equivalent to 'full')", rule=rule) if ruleinfo.shadow_depth is True: rule.shadow_depth = 'full' else: rule.shadow_depth = ruleinfo.shadow_depth if ruleinfo.resources: args, resources = ruleinfo.resources if args: raise RuleException("Resources have to be named.") if not all( map(lambda r: isinstance(r, int) or callable(r), resources.values())): raise RuleException( "Resources values have to be integers or callables", rule=rule) rule.resources.update(resources) if ruleinfo.priority: if (not isinstance(ruleinfo.priority, int) and not isinstance(ruleinfo.priority, float)): raise RuleException("Priority values have to be numeric.", rule=rule) rule.priority = ruleinfo.priority if ruleinfo.version: rule.version = ruleinfo.version if ruleinfo.log: rule.set_log(*ruleinfo.log[0], **ruleinfo.log[1]) if ruleinfo.message: rule.message = ruleinfo.message if ruleinfo.benchmark: rule.benchmark = ruleinfo.benchmark rule.norun = ruleinfo.norun rule.docstring = ruleinfo.docstring rule.run_func = ruleinfo.func rule.shellcmd = ruleinfo.shellcmd ruleinfo.func.__name__ = "__{}".format(name) self.globals[ruleinfo.func.__name__] = ruleinfo.func setattr(rules, name, rule) return ruleinfo.func
def message(self): """ Return the message for this job. """ try: return (self.format_wildcards(self.rule.message) if self.rule.message else None) except AttributeError as ex: raise RuleException(str(ex), rule=self.rule) except KeyError as ex: raise RuleException("Unknown variable in message " "of shell command: {}".format(str(ex)), rule=self.rule)
def shellcmd(self): """ Return the shell command. """ try: return (self.format_wildcards(self.rule.shellcmd) if self.rule.shellcmd else None) except AttributeError as ex: raise RuleException(str(ex), rule=self.rule) except KeyError as ex: raise RuleException("Unknown variable when printing " "shell command: {}".format(str(ex)), rule=self.rule)
def check_caching(self): if self.name in self.workflow.cache_rules: if len(self.output) != 1: raise RuleException( "Only rules with exactly 1 output file may be cached.", rule=self) if self.dynamic_output: raise RuleException( "Rules with dynamic output files may not be cached.", rule=self)
def check_output(self, job, wait=3): """ Raise exception if output files of job are missing. """ for f in job.expanded_output: if not f.exists: logger.warning( "Output file {} not present. Waiting {} " "seconds to ensure that this is not because of filesystem " "latency.".format(f, wait)) while not f.exists and wait > 0: wait -= 1 time.sleep(1) if not f.exists: raise MissingOutputException("Output file {} not " "produced by rule {}.".format( f, job.rule.name), lineno=job.rule.lineno, snakefile=job.rule.snakefile) input_maxtime = job.input_maxtime if input_maxtime is not None: output_mintime = job.output_mintime if output_mintime is not None and output_mintime < input_maxtime: raise RuleException( "Output files {} are older than input " "files. Did you extract an archive? Make sure that output " "files have a more recent modification date than the " "archive, e.g. by using 'touch'.".format(", ".join( job.expanded_output)), rule=job.rule)
def _apply_wildcards(newitems, olditems, wildcards, wildcards_obj, concretize=apply_wildcards, ruleio=None): for name, item in olditems.allitems(): start = len(newitems) if callable(item): item = item(wildcards_obj) if not_iterable(item): item = [item] for item_ in item: if not isinstance(item_, str): raise RuleException( "Input function did not return str or list of str.", rule=self) concrete = concretize(item_, wildcards) newitems.append(concrete) if ruleio is not None: ruleio[concrete] = item_ else: if not_iterable(item): item = [item] for item_ in item: concrete = concretize(item_, wildcards) newitems.append(concrete) if ruleio is not None: ruleio[concrete] = item_ if name: newitems.set_name(name, start, end=len(newitems))
def run_wrapper(run, input, output, params, wildcards, threads, resources, log, linemaps): """ Wrapper around the run method that handles directory creation and output file deletion on error. Arguments run -- the run method input -- list of input files output -- list of output files wildcards -- so far processed wildcards threads -- usable threads log -- path to log file """ if log is None: log = Unformattable(errormsg="log used but undefined") try: # execute the actual run method. run(input, output, params, wildcards, threads, resources, log) except (Exception, BaseException) as ex: # this ensures that exception can be re-raised in the parent thread lineno, file = get_exception_origin(ex, linemaps) raise RuleException(format_error( ex, lineno, linemaps=linemaps, snakefile=file, show_traceback=True))
def check_wildcards(self, wildcards): missing_wildcards = self.wildcard_names - set(wildcards.keys()) if missing_wildcards: raise RuleException( "Could not resolve wildcards in rule {}:\n{}".format( self.name, "\n".join(self.wildcard_names)), lineno=self.lineno, snakefile=self.snakefile)
def check_caching(self): if self.name in self.workflow.cache_rules: if len(self.output) == 0: raise RuleException( "Rules without output files cannot be cached.", rule=self) if len(self.output) > 1: prefixes = set(out.multiext_prefix for out in self.output) if None in prefixes or len(prefixes) > 1: raise RuleException( "Rules with multiple output files must define them as a single multiext() " '(e.g. multiext("path/to/index", ".bwt", ".ann")). ' "The rationale is that multiple output files can only be unambiously resolved " "if they can be distinguished by a fixed set of extensions (i.e. mime types).", rule=self, ) if self.dynamic_output: raise RuleException( "Rules with dynamic output files may not be cached.", rule=self)
def decorate(ruleinfo): if ruleinfo.input: rule.set_input(*ruleinfo.input[0], **ruleinfo.input[1]) if ruleinfo.output: rule.set_output(*ruleinfo.output[0], **ruleinfo.output[1]) if ruleinfo.params: rule.set_params(*ruleinfo.params[0], **ruleinfo.params[1]) if ruleinfo.threads: if not isinstance(ruleinfo.threads, int): raise RuleException("Threads value has to be an integer.", rule=rule) rule.resources["_cores"] = ruleinfo.threads if ruleinfo.resources: args, resources = ruleinfo.resources if args: raise RuleException("Resources have to be named.") if not all( map(lambda r: isinstance(r, int), resources.values())): raise RuleException( "Resources values have to be integers.", rule=rule) rule.resources.update(resources) if ruleinfo.priority: if (not isinstance(ruleinfo.priority, int) and not isinstance(ruleinfo.priority, float)): raise RuleException("Priority values have to be numeric.", rule=rule) rule.priority = ruleinfo.priority if ruleinfo.version: rule.version = ruleinfo.version if ruleinfo.log: rule.set_log(*ruleinfo.log[0], **ruleinfo.log[1]) if ruleinfo.message: rule.message = ruleinfo.message if ruleinfo.benchmark: rule.benchmark = ruleinfo.benchmark rule.norun = ruleinfo.norun rule.docstring = ruleinfo.docstring rule.run_func = ruleinfo.func rule.shellcmd = ruleinfo.shellcmd ruleinfo.func.__name__ = "__{}".format(name) self.globals[ruleinfo.func.__name__] = ruleinfo.func setattr(rules, name, rule) return ruleinfo.func
def run_wrapper(run, input, output, params, wildcards, threads, resources, log, version, benchmark, benchmark_repeats, rule, conda_env, linemaps, debug=False, shadow_dir=None): """ Wrapper around the run method that handles exceptions and benchmarking. Arguments run -- the run method input -- list of input files output -- list of output files wildcards -- so far processed wildcards threads -- usable threads log -- list of log files rule (str) -- rule name shadow_dir -- optional shadow directory root """ if os.name == "posix" and debug: sys.stdin = open('/dev/stdin') try: runs = 1 if benchmark is None else benchmark_repeats wallclock = [] for i in range(runs): w = time.time() # execute the actual run method. with change_working_directory(shadow_dir): run(input, output, params, wildcards, threads, resources, log, version, rule, conda_env) w = time.time() - w wallclock.append(w) except (KeyboardInterrupt, SystemExit) as e: # re-raise the keyboard interrupt in order to record an error in the scheduler but ignore it raise e except (Exception, BaseException) as ex: log_verbose_traceback(ex) # this ensures that exception can be re-raised in the parent thread lineno, file = get_exception_origin(ex, linemaps) raise RuleException(format_error(ex, lineno, linemaps=linemaps, snakefile=file, show_traceback=True)) if benchmark is not None: try: with open(benchmark, "w") as f: print("s", "h:m:s", sep="\t", file=f) for t in wallclock: print(t, str(datetime.timedelta(seconds=t)), sep="\t", file=f) except (Exception, BaseException) as ex: raise WorkflowError(ex)
def update(self, jobs, file=None, visited=None, skip_until_dynamic=False): """ Update the DAG by adding given jobs and their dependencies. """ if visited is None: visited = set() producer = None exceptions = list() jobs = sorted(jobs, reverse=not self.ignore_ambiguity) cycles = list() for i, job in enumerate(jobs): if file in job.input: cycles.append(job) continue if job in visited: cycles.append(job) continue try: self.update_(job, visited=set(visited), skip_until_dynamic=skip_until_dynamic) # TODO this might fail if a rule discarded here is needed # elsewhere if i > 0: if job < jobs[i - 1] or self.ignore_ambiguity: break elif producer is not None: raise AmbiguousRuleException(file, job.rule, jobs[i - 1].rule) producer = job except (MissingInputException, CyclicGraphException) as ex: exceptions.append(ex) except RuntimeError as ex: if (isinstance(ex, RuntimeError) and str(ex).startswith( "maximum recursion depth exceeded")): ex = RuleException( "Maximum recursion depth exceeded. " "Maybe you have a cyclic dependency due to infinitely " "filled wildcards?\nProblematic " "input file:\n{}".format(file), rule=job.rule) raise ex if producer is None: if cycles: job = cycles[0] raise CyclicGraphException(job.rule, file, rule=job.rule) if exceptions: raise exceptions[0] return producer
def format_wildcards(self, string, **variables): """ Format a string with variables from the job. """ _variables = dict() _variables.update(self.rule.workflow.globals) _variables.update(variables) try: return format(string, input=self.input, output=self.output, params=self.params, wildcards=self._format_wildcards, threads=self.threads, resources=self.resources, log=self.log, **_variables) except NameError as ex: raise RuleException("NameError: " + str(ex), rule=self.rule)
def init(self): """ Initialise the DAG. """ for job in map(self.rule2job, self.targetrules): job = self.update([job]) self.targetjobs.add(job) exceptions = defaultdict(list) for file in self.targetfiles: try: job = self.update(self.file2jobs(file), file=file) self.targetjobs.add(job) except MissingRuleException as ex: exceptions[file].append(ex) if exceptions: raise RuleException(include=chain(*exceptions.values())) self.update_needrun()
def check_output(self, job, wait=3): """ Raise exception if output files of job are missing. """ try: wait_for_files(job.expanded_output, latency_wait=wait) except IOError as e: raise MissingOutputException(str(e), rule=job.rule) input_maxtime = job.input_maxtime if input_maxtime is not None: output_mintime = job.output_mintime if output_mintime is not None and output_mintime < input_maxtime: raise RuleException( "Output files {} are older than input " "files. Did you extract an archive? Make sure that output " "files have a more recent modification date than the " "archive, e.g. by using 'touch'.".format(", ".join( job.expanded_output)), rule=job.rule)
def decorate(ruleinfo): if ruleinfo.wildcard_constraints: rule.set_wildcard_constraints( *ruleinfo.wildcard_constraints[0], **ruleinfo.wildcard_constraints[1]) if ruleinfo.input: rule.set_input(*ruleinfo.input[0], **ruleinfo.input[1]) if ruleinfo.output: rule.set_output(*ruleinfo.output[0], **ruleinfo.output[1]) if ruleinfo.params: rule.set_params(*ruleinfo.params[0], **ruleinfo.params[1]) # handle default resources rule.resources = copy.deepcopy(self.default_resources) if ruleinfo.threads: if not isinstance(ruleinfo.threads, int) and not callable( ruleinfo.threads): raise RuleException( "Threads value has to be an integer or a callable.", rule=rule) rule.resources["_cores"] = ruleinfo.threads if ruleinfo.shadow_depth: if ruleinfo.shadow_depth not in (True, "shallow", "full", "minimal"): raise RuleException( "Shadow must either be 'minimal', 'shallow', 'full', " "or True (equivalent to 'full')", rule=rule) if ruleinfo.shadow_depth is True: rule.shadow_depth = 'full' logger.warning( "Shadow is set to True in rule {} (equivalent to 'full'). It's encouraged to use the more explicit options 'minimal|shallow|full' instead." .format(rule)) else: rule.shadow_depth = ruleinfo.shadow_depth if ruleinfo.resources: args, resources = ruleinfo.resources if args: raise RuleException("Resources have to be named.") if not all( map(lambda r: isinstance(r, int) or callable(r), resources.values())): raise RuleException( "Resources values have to be integers or callables", rule=rule) rule.resources.update(resources) if ruleinfo.priority: if (not isinstance(ruleinfo.priority, int) and not isinstance(ruleinfo.priority, float)): raise RuleException("Priority values have to be numeric.", rule=rule) rule.priority = ruleinfo.priority if ruleinfo.version: rule.version = ruleinfo.version if ruleinfo.log: rule.set_log(*ruleinfo.log[0], **ruleinfo.log[1]) if ruleinfo.message: rule.message = ruleinfo.message if ruleinfo.benchmark: rule.benchmark = ruleinfo.benchmark if not self.run_local and ruleinfo.group is not None: rule.group = ruleinfo.group if ruleinfo.wrapper: if self.use_conda: rule.conda_env = snakemake.wrapper.get_conda_env( ruleinfo.wrapper, prefix=self.wrapper_prefix) # TODO retrieve suitable singularity image if ruleinfo.conda_env and self.use_conda: if not (ruleinfo.script or ruleinfo.wrapper or ruleinfo.shellcmd): raise RuleException( "Conda environments are only allowed " "with shell, script, or wrapper directives " "(not with run).", rule=rule) if not (urllib.parse.urlparse(ruleinfo.conda_env).scheme or os.path.isabs(ruleinfo.conda_env)): ruleinfo.conda_env = os.path.join(self.current_basedir, ruleinfo.conda_env) rule.conda_env = ruleinfo.conda_env if self.use_singularity: invalid_rule = not (ruleinfo.script or ruleinfo.wrapper or ruleinfo.shellcmd) if ruleinfo.singularity_img: if invalid_rule: raise RuleException( "Singularity directive is only allowed " "with shell, script or wrapper directives " "(not with run).", rule=rule) rule.singularity_img = ruleinfo.singularity_img elif self.global_singularity_img: if not invalid_rule: # skip rules with run directive rule.singularity_img = self.global_singularity_img if self.use_docker: #invalid_rule = not ruleinfo.shellcmd invalid_rule = not (ruleinfo.script or ruleinfo.shellcmd) if ruleinfo.docker_img: if invalid_rule: raise RuleException( "docker_img directive is only allowed " "with shell or script directives " "(not with run/wrapper).", rule=rule) rule.docker_img = ruleinfo.docker_img #elif self.global_singularity_img: # if not invalid_rule: # # skip rules with run directive # rule.singularity_img = self.global_singularity_img rule.norun = ruleinfo.norun rule.docstring = ruleinfo.docstring rule.run_func = ruleinfo.func rule.shellcmd = ruleinfo.shellcmd rule.script = ruleinfo.script rule.wrapper = ruleinfo.wrapper rule.cwl = ruleinfo.cwl rule.restart_times = self.restart_times rule.basedir = self.current_basedir ruleinfo.func.__name__ = "__{}".format(rule.name) self.globals[ruleinfo.func.__name__] = ruleinfo.func setattr(rules, rule.name, RuleProxy(rule)) if checkpoint: checkpoints.register(rule) return ruleinfo.func
def expand(self, rule, ruleinfo): """Recursively expand wildcards within RuleInfo object""" fields = list( filter(None.__ne__, filter(self.expands_field, ruleinfo_fields))) # normalize field values and create namedlist dictionary args = {} for field in fields: attr = getattr(ruleinfo, field) if isinstance(attr, tuple): if len(attr) != 2: raise Exception("Internal Error") # flatten named lists for key in attr[1]: if is_container(attr[1][key]): attr[1][key] = list(flatten(attr[1][key])) # flatten unnamed and overwrite tuples # also turn attr[0] into a list, making it mutable attr = (list(flatten(attr[0])), attr[1]) setattr(ruleinfo, field, attr) args[field] = NamedList(fromtuple=attr) else: args[field] = NamedList() args[field].append(attr) # build graph of expansion dependencies deps = networkx().DiGraph() for field, nlist in args.items(): for n, value in enumerate(nlist): if not isinstance(value, str): # only strings can be expanded continue s = "{}[{}]".format(field, n) # create node for value itself deps.add_node(s, core=True, name=field, idx=n) # node depends on wildcards contained in value deps.add_edges_from((s, t) for t in get_names(value) if t.split(".")[0].split("[")[0] in fields) # field node depends on all it's value nodes deps.add_edge(field, s) # create edges field.name -> field[n] for name, (i, j) in nlist.get_names(): s = "{}.{}".format(field, name) if j is None: j = i + 1 deps.add_edges_from( (s, "{}[{}]".format(field, n)) for n in range(i, j)) # sort variables so that they can be expanded in order try: nodes = list( reversed([ node for node in networkx().algorithms.dag.topological_sort(deps) if deps.out_degree(node) > 0 and 'core' in deps.nodes[node] ])) except networkx().NetworkXUnfeasible: raise CircularReferenceException(deps, rule) # expand variables for node in nodes: name = deps.nodes[node]['name'] idx = deps.nodes[node]['idx'] value = args[name][idx] if isinstance(value, str): try: value2 = partial_format(value, **args) except FormattingError as e: raise RuleException( "Unable to resolve wildcard '{{{}}}' in parameter {}" "in rule {}".format(e.attr, node, rule.name)) except IndexError as e: raise RuleException( "Unable to format '{}' using '{}'".format(value, args)) args[name][idx] = value2 if ymp.print_rule == 1: log.error("{}::{}: {} => {}".format( rule.name, node, value, value2)) # update ruleinfo for name in fields: attr = getattr(ruleinfo, name) if isinstance(attr, tuple): if len(attr) != 2: raise Exception("Internal Error") args[name].update_tuple(attr) else: setattr(ruleinfo, name, args[name][0])
def check_string_type(f): if not isinstance(f, str): raise RuleException( "Input function did not return str or list of str.", rule=self)
def prepare(self): """ Prepare execution of job. This includes creation of directories and deletion of previously created dynamic files. Creates a shadow directory for the job if specified. """ self.check_protected_output() unexpected_output = self.dag.reason(self).missing_output.intersection( self.existing_output) if unexpected_output: logger.warning( "Warning: the following output files of rule {} were not " "present when the DAG was created:\n{}".format( self.rule, unexpected_output)) self.remove_existing_output() for f, f_ in zip(self.output, self.rule.output): f.prepare() self.download_remote_input() for f in self.log: f.prepare() if self.benchmark: self.benchmark.prepare() if not self.is_shadow: return # Create shadow directory structure self.shadow_dir = tempfile.mkdtemp( dir=self.rule.workflow.persistence.shadow_path) cwd = os.getcwd() if self.rule.shadow_depth == "minimal": # Re-create the directory structure in the shadow directory for (f, d) in set([(item, os.path.dirname(item)) for sublist in [self.input, self.output, self.log] if sublist is not None for item in sublist]): if d and not os.path.isabs(d): rel_path = os.path.relpath(d) # Only create subdirectories if not rel_path.split(os.path.sep)[0] == "..": os.makedirs(os.path.join(self.shadow_dir, rel_path), exist_ok=True) else: raise RuleException( "The following file name references a parent directory relative to your workdir.\n" 'This isn\'t supported for shadow: "minimal". Consider using an absolute path instead.\n{}' .format(f), rule=self.rule, ) # Symlink the input files for rel_path in set([ os.path.relpath(f) for f in self.input if not os.path.isabs(f) ]): link = os.path.join(self.shadow_dir, rel_path) original = os.path.relpath(rel_path, os.path.dirname(link)) os.symlink(original, link) # Shallow simply symlink everything in the working directory. elif self.rule.shadow_depth == "shallow": for source in os.listdir(cwd): link = os.path.join(self.shadow_dir, source) os.symlink(os.path.abspath(source), link) elif self.rule.shadow_depth == "full": snakemake_dir = os.path.join(cwd, ".snakemake") for dirpath, dirnames, filenames in os.walk(cwd): # Must exclude .snakemake and its children to avoid infinite # loop of symlinks. if os.path.commonprefix([snakemake_dir, dirpath]) == snakemake_dir: continue for dirname in dirnames: if dirname == ".snakemake": continue relative_source = os.path.relpath( os.path.join(dirpath, dirname)) shadow = os.path.join(self.shadow_dir, relative_source) os.mkdir(shadow) for filename in filenames: source = os.path.join(dirpath, filename) relative_source = os.path.relpath(source) link = os.path.join(self.shadow_dir, relative_source) os.symlink(source, link)
def expand_wildcards(self, wildcards=None): """ Expand wildcards depending on the requested output or given wildcards dict. """ def concretize_iofile(f, wildcards): if not isinstance(f, _IOFile): return IOFile(f, rule=self) else: return f.apply_wildcards(wildcards, fill_missing=f in self.dynamic_input, fail_dynamic=self.dynamic_output) def concretize_param(p, wildcards): if isinstance(p, str): return apply_wildcards(p, wildcards) return p def check_string_type(f): if not isinstance(f, str): raise RuleException( "Input function did not return str or list of str.", rule=self) def _apply_wildcards(newitems, olditems, wildcards, wildcards_obj, concretize=apply_wildcards, check_return_type=check_string_type, ruleio=None, no_flattening=False): for name, item in olditems.allitems(): start = len(newitems) is_iterable = True if callable(item): try: item = item(wildcards_obj) except (Exception, BaseException) as e: raise InputFunctionException(e, rule=self, wildcards=wildcards) if not_iterable(item) or no_flattening: item = [item] is_iterable = False for item_ in item: check_return_type(item_) concrete = concretize(item_, wildcards) newitems.append(concrete) if ruleio is not None: ruleio[concrete] = item_ if name: newitems.set_name( name, start, end=len(newitems) if is_iterable else None) if wildcards is None: wildcards = dict() missing_wildcards = self.wildcard_names - set(wildcards.keys()) if missing_wildcards: raise RuleException( "Could not resolve wildcards in rule {}:\n{}".format( self.name, "\n".join(self.wildcard_names)), lineno=self.lineno, snakefile=self.snakefile) ruleio = dict() try: input = InputFiles() wildcards_obj = Wildcards(fromdict=wildcards) _apply_wildcards(input, self.input, wildcards, wildcards_obj, concretize=concretize_iofile, ruleio=ruleio) params = Params() #When applying wildcards to params, the return type need not be #a string, so the check is disabled. _apply_wildcards(params, self.params, wildcards, wildcards_obj, concretize=concretize_param, check_return_type=lambda x: None, no_flattening=True) output = OutputFiles( o.apply_wildcards(wildcards) for o in self.output) output.take_names(self.output.get_names()) dependencies = { None if f is None else f.apply_wildcards(wildcards): rule for f, rule in self.dependencies.items() } ruleio.update(dict((f, f_) for f, f_ in zip(output, self.output))) log = Log() _apply_wildcards(log, self.log, wildcards, wildcards_obj, concretize=concretize_iofile) benchmark = self.benchmark.apply_wildcards( wildcards) if self.benchmark else None return input, output, params, log, benchmark, ruleio, dependencies except WildcardError as ex: # this can only happen if an input contains an unresolved wildcard. raise RuleException( "Wildcards in input, params, log or benchmark file of rule {} cannot be " "determined from output files:\n{}".format(self, str(ex)), lineno=self.lineno, snakefile=self.snakefile)
def decorate(ruleinfo): if ruleinfo.wildcard_constraints: rule.set_wildcard_constraints( *ruleinfo.wildcard_constraints[0], **ruleinfo.wildcard_constraints[1]) if ruleinfo.input: rule.set_input(*ruleinfo.input[0], **ruleinfo.input[1]) if ruleinfo.output: rule.set_output(*ruleinfo.output[0], **ruleinfo.output[1]) if ruleinfo.params: rule.set_params(*ruleinfo.params[0], **ruleinfo.params[1]) # handle default resources if self.default_resources is not None: rule.resources = copy.deepcopy(self.default_resources.parsed) if ruleinfo.threads is not None: if (not isinstance(ruleinfo.threads, int) and not isinstance(ruleinfo.threads, float) and not callable(ruleinfo.threads)): raise RuleException( "Threads value has to be an integer, float, or a callable.", rule=rule, ) if name in self.overwrite_threads: rule.resources["_cores"] = self.overwrite_threads[name] else: if isinstance(ruleinfo.threads, float): ruleinfo.threads = int(ruleinfo.threads) rule.resources["_cores"] = ruleinfo.threads if ruleinfo.shadow_depth: if ruleinfo.shadow_depth not in (True, "shallow", "full", "minimal"): raise RuleException( "Shadow must either be 'minimal', 'shallow', 'full', " "or True (equivalent to 'full')", rule=rule, ) if ruleinfo.shadow_depth is True: rule.shadow_depth = "full" logger.warning( "Shadow is set to True in rule {} (equivalent to 'full'). It's encouraged to use the more explicit options 'minimal|shallow|full' instead." .format(rule)) else: rule.shadow_depth = ruleinfo.shadow_depth if ruleinfo.resources: args, resources = ruleinfo.resources if args: raise RuleException("Resources have to be named.") if not all( map(lambda r: isinstance(r, int) or callable(r), resources.values())): raise RuleException( "Resources values have to be integers or callables", rule=rule) rule.resources.update(resources) if ruleinfo.priority: if not isinstance(ruleinfo.priority, int) and not isinstance( ruleinfo.priority, float): raise RuleException("Priority values have to be numeric.", rule=rule) rule.priority = ruleinfo.priority if ruleinfo.version: rule.version = ruleinfo.version if ruleinfo.log: rule.set_log(*ruleinfo.log[0], **ruleinfo.log[1]) if ruleinfo.message: rule.message = ruleinfo.message if ruleinfo.benchmark: rule.benchmark = ruleinfo.benchmark if not self.run_local and ruleinfo.group is not None: rule.group = ruleinfo.group if ruleinfo.wrapper: rule.conda_env = snakemake.wrapper.get_conda_env( ruleinfo.wrapper, prefix=self.wrapper_prefix) # TODO retrieve suitable singularity image if ruleinfo.env_modules: # If using environment modules and they are defined for the rule, # ignore conda and singularity directive below. # The reason is that this is likely intended in order to use # a software stack specifically compiled for a particular # HPC cluster. invalid_rule = not (ruleinfo.script or ruleinfo.wrapper or ruleinfo.shellcmd or ruleinfo.notebook) if invalid_rule: raise RuleException( "envmodules directive is only allowed with " "shell, script, notebook, or wrapper directives (not with run)", rule=rule, ) from snakemake.deployment.env_modules import EnvModules rule.env_modules = EnvModules(*ruleinfo.env_modules) if ruleinfo.conda_env: if not (ruleinfo.script or ruleinfo.wrapper or ruleinfo.shellcmd or ruleinfo.notebook): raise RuleException( "Conda environments are only allowed " "with shell, script, notebook, or wrapper directives " "(not with run).", rule=rule, ) if not (urllib.parse.urlparse(ruleinfo.conda_env).scheme or os.path.isabs(ruleinfo.conda_env)): ruleinfo.conda_env = os.path.join(self.current_basedir, ruleinfo.conda_env) rule.conda_env = ruleinfo.conda_env invalid_rule = not (ruleinfo.script or ruleinfo.wrapper or ruleinfo.shellcmd or ruleinfo.notebook) if ruleinfo.container_img: if invalid_rule: raise RuleException( "Singularity directive is only allowed " "with shell, script, notebook or wrapper directives " "(not with run).", rule=rule, ) rule.container_img = ruleinfo.container_img elif self.global_container_img: if not invalid_rule: # skip rules with run directive rule.container_img = self.global_container_img rule.norun = ruleinfo.norun rule.docstring = ruleinfo.docstring rule.run_func = ruleinfo.func rule.shellcmd = ruleinfo.shellcmd rule.script = ruleinfo.script rule.notebook = ruleinfo.notebook rule.wrapper = ruleinfo.wrapper rule.cwl = ruleinfo.cwl rule.restart_times = self.restart_times rule.basedir = self.current_basedir if ruleinfo.cache is True: if not self.enable_cache: logger.warning( "Workflow defines that rule {} is eligible for caching between workflows " "(use the --cache argument to enable this).".format( rule.name)) else: self.cache_rules.add(rule.name) elif not (ruleinfo.cache is False): raise WorkflowError( "Invalid argument for 'cache:' directive. Only true allowed. " "To deactivate caching, remove directive.", rule=rule, ) ruleinfo.func.__name__ = "__{}".format(rule.name) self.globals[ruleinfo.func.__name__] = ruleinfo.func setattr(rules, rule.name, RuleProxy(rule)) if checkpoint: checkpoints.register(rule) return ruleinfo.func
def expand_wildcards(self, wildcards=None): """ Expand wildcards depending on the requested output or given wildcards dict. """ def concretize_iofile(f, wildcards): if not isinstance(f, _IOFile): return IOFile(f, rule=self) else: return f.apply_wildcards(wildcards, fill_missing=f in self.dynamic_input, fail_dynamic=self.dynamic_output) def _apply_wildcards(newitems, olditems, wildcards, wildcards_obj, concretize=apply_wildcards, ruleio=None): for name, item in olditems.allitems(): start = len(newitems) if callable(item): item = item(wildcards_obj) if not_iterable(item): item = [item] for item_ in item: if not isinstance(item_, str): raise RuleException( "Input function did not return str or list of str.", rule=self) concrete = concretize(item_, wildcards) newitems.append(concrete) if ruleio is not None: ruleio[concrete] = item_ else: if not_iterable(item): item = [item] for item_ in item: concrete = concretize(item_, wildcards) newitems.append(concrete) if ruleio is not None: ruleio[concrete] = item_ if name: newitems.set_name(name, start, end=len(newitems)) if wildcards is None: wildcards = dict() # TODO validate missing_wildcards = self.wildcard_names - set(wildcards.keys()) if missing_wildcards: raise RuleException( "Could not resolve wildcards in rule {}:\n{}".format( self.name, "\n".join(self.wildcard_names)), lineno=self.lineno, snakefile=self.snakefile) ruleio = dict() try: input = InputFiles() wildcards_obj = Wildcards(fromdict=wildcards) _apply_wildcards(input, self.input, wildcards, wildcards_obj, concretize=concretize_iofile, ruleio=ruleio) params = Params() _apply_wildcards(params, self.params, wildcards, wildcards_obj) output = OutputFiles( o.apply_wildcards(wildcards) for o in self.output) output.take_names(self.output.get_names()) ruleio.update(dict((f, f_) for f, f_ in zip(output, self.output))) log = self.log.apply_wildcards(wildcards) if self.log else None return input, output, params, log, ruleio except WildcardError as ex: # this can only happen if an input contains an unresolved wildcard. raise RuleException( "Wildcards in input or log file of rule {} cannot be " "determined from output files:\n{}".format(self, str(ex)), lineno=self.lineno, snakefile=self.snakefile)