def code(self): try: from pygments.lexers import get_lexer_by_name from pygments.formatters import HtmlFormatter from pygments import highlight import pygments.util except ImportError: raise WorkflowError( "Python package pygments must be installed to create reports.") source, language = None, None if self._rule.shellcmd is not None: source = self._rule.shellcmd language = "bash" elif self._rule.script is not None: logger.info("Loading script code for rule {}".format(self.name)) _, source, language = script.get_source(self._rule.script, self._rule.basedir) source = source.decode() elif self._rule.wrapper is not None: logger.info("Loading wrapper code for rule {}".format(self.name)) _, source, language = script.get_source( wrapper.get_script(self._rule.wrapper, prefix=self._rule.workflow.wrapper_prefix)) source = source.decode() try: lexer = get_lexer_by_name(language) return highlight( source, lexer, HtmlFormatter(linenos=True, cssclass="source", wrapcode=True)) except pygments.util.ClassNotFound: return "<pre><code>source</code></pre>"
def code(self): try: from pygments.lexers import get_lexer_by_name from pygments.formatters import HtmlFormatter from pygments import highlight import pygments.util except ImportError: raise WorkflowError( "Python package pygments must be installed to create reports.") sources, language = None, None if self._rule.shellcmd is not None: sources = [self._rule.shellcmd] language = "bash" elif self._rule.script is not None and not contains_wildcard( self._rule.script): logger.info("Loading script code for rule {}".format(self.name)) _, source, language = script.get_source(self._rule.script, self._rule.basedir) sources = [source.decode()] elif self._rule.wrapper is not None and not contains_wildcard( self._rule.wrapper): logger.info("Loading wrapper code for rule {}".format(self.name)) _, source, language = script.get_source( wrapper.get_script(self._rule.wrapper, prefix=self._rule.workflow.wrapper_prefix)) sources = [source.decode()] elif self._rule.notebook is not None and not contains_wildcard( self._rule.notebook): _, source, language = script.get_source(self._rule.notebook, self._rule.basedir) language = language.split("_")[1] sources = notebook.get_cell_sources(source) else: # A run directive. There is no easy way yet to obtain # the actual uncompiled source code. sources = [] language = "python" try: lexer = get_lexer_by_name(language) highlighted = [ highlight( source, lexer, HtmlFormatter(linenos=True, cssclass="source", wrapcode=True), ) for source in sources ] return highlighted except pygments.util.ClassNotFound: return [ '<pre class="source"><code>{}</code></pre>'.format(source) for source in sources ]
def notebook( path, basedir, input, output, params, wildcards, threads, resources, log, config, rulename, conda_env, singularity_img, singularity_args, env_modules, bench_record, jobid, bench_iteration, cleanup_scripts, shadow_dir, ): """ Load a script from the given basedir + path and execute it. """ path, source, language = get_source(path, basedir) ExecClass = { "jupyter_python": PythonJupyterNotebook, "jupyter_r": RJupyterNotebook, }.get(language, None) if ExecClass is None: raise ValueError( "Unsupported notebook: Expecting Jupyter Notebook (.ipynb).") executor = ExecClass( path, source, basedir, input, output, params, wildcards, threads, resources, log, config, rulename, conda_env, singularity_img, singularity_args, env_modules, bench_record, jobid, bench_iteration, cleanup_scripts, shadow_dir, ) executor.evaluate()
def notebook( path, basedir, input, output, params, wildcards, threads, resources, log, config, rulename, conda_env, container_img, singularity_args, env_modules, bench_record, jobid, bench_iteration, cleanup_scripts, shadow_dir, edit=None, ): """ Load a script from the given basedir + path and execute it. """ draft = False if edit is not None: if urlparse(path).scheme == "": if not os.path.isabs(path): local_path = os.path.join(basedir, path) else: local_path = path if not os.path.exists(local_path): # draft the notebook, it does not exist yet language = None draft = True path = "file://{}".format(os.path.abspath(local_path)) if path.endswith(".py.ipynb"): language = "jupyter_python" elif path.endswith(".r.ipynb"): language = "jupyter_r" else: raise WorkflowError( "Notebook to edit has to end on .py.ipynb or .r.ipynb in order " "to decide which programming language shall be used.") else: raise WorkflowError( "Notebook {} is not local, but edit mode is only allowed for " "local notebooks.".format(path)) if not draft: path, source, language = get_source(path, basedir) else: source = None exec_class = get_exec_class(language) executor = exec_class( path, source, basedir, input, output, params, wildcards, threads, resources, log, config, rulename, conda_env, container_img, singularity_args, env_modules, bench_record, jobid, bench_iteration, cleanup_scripts, shadow_dir, ) if draft: executor.draft(listen=edit) else: executor.evaluate(edit=edit)
def _get_provenance_hash(self, job: Job): """ Recursively calculate hash for the output of the given job and all upstream jobs in a blockchain fashion. This is based on an idea of Sven Nahnsen. Fails if job has more than one output file. The reason is that there is no way to generate a per-output file hash without generating the files. This hash, however, shall work without having to generate the files, just by describing all steps down to a given job. """ if job in self._hashes: return self._hashes[job] workflow = job.dag.workflow h = hashlib.sha256() # Hash shell command or script. if job.is_shell: # We cannot use the formatted shell command, because it also contains threads, # resources, and filenames (which shall be irrelevant for the hash). h.update(job.rule.shellcmd.encode()) elif job.is_script: _, source, _ = script.get_source( job.rule.script, basedir=job.rule.basedir, wildcards=job.wildcards, params=job.params, ) h.update(source) elif job.is_notebook: _, source, _ = script.get_source( job.rule.notebook, basedir=job.rule.basedir, wildcards=job.wildcards, params=job.params, ) h.update(source) elif job.is_wrapper: _, source, _ = script.get_source( wrapper.get_script(job.rule.wrapper, prefix=workflow.wrapper_prefix), basedir=job.rule.basedir, wildcards=job.wildcards, params=job.params, ) h.update(source) # Hash params. for key, value in sorted(job.params._allitems()): if key is not None: h.update(key.encode()) # If this raises a TypeError, we cannot calculate a reliable hash. try: h.update(json.dumps(value, sort_keys=True).encode()) except TypeError as e: raise WorkflowError( "Rule {} cannot be cached, because params " "are not JSON serializable. " "Consider converting them into a suitable format " "if you are sure that caching is necessary. " "Otherwise, deactivate caching for this rule " "by removing it from the --cache command line argument " "or removing the cache: true directive from the rule itself." .format(job.rule.name), e, ) # Hash input files that are not generated by other jobs (sorted by hash value). for file_hash in sorted( hash_file(f) for f in job.input if not any( f in depfiles for depfiles in job.dag.dependencies[job].values())): h.update(file_hash.encode()) # Hash used containers or conda environments. if workflow.use_conda and job.conda_env: if workflow.use_singularity and job.conda_env.container_img_url: h.update(job.conda_env.container_img_url.encode()) h.update(job.conda_env.content) elif workflow.use_singularity and job.container_img_url: h.update(job.container_img_url.encode()) # Generate hashes of dependencies, and add them in a blockchain fashion (as input to the current hash, sorted by hash value). for dep_hash in sorted( self._get_provenance_hash(dep) for dep in set(job.dag.dependencies[job].keys())): h.update(dep_hash.encode()) provenance_hash = h.hexdigest() # Store for re-use. self._hashes[job] = provenance_hash return provenance_hash
def notebook( path, basedir, input, output, params, wildcards, threads, resources, log, config, rulename, conda_env, conda_base_path, container_img, singularity_args, env_modules, bench_record, jobid, bench_iteration, cleanup_scripts, shadow_dir, edit, runtime_sourcecache_path, ): """ Load a script from the given basedir + path and execute it. """ draft = False if edit is not None: if is_local_file(path): if not os.path.isabs(path): local_path = os.path.join(basedir, path) else: local_path = path if not os.path.exists(local_path): # draft the notebook, it does not exist yet language = None draft = True path = "file://{}".format(os.path.abspath(local_path)) if path.endswith(".py.ipynb"): language = "jupyter_python" elif path.endswith(".r.ipynb"): language = "jupyter_r" else: raise WorkflowError( "Notebook to edit has to end on .py.ipynb or .r.ipynb in order " "to decide which programming language shall be used.") else: raise WorkflowError( "Notebook {} is not local, but edit mode is only allowed for " "local notebooks.".format(path)) if not draft: path, source, language, is_local = get_source( path, SourceCache(runtime_sourcecache_path), basedir, wildcards, params) else: source = None is_local = True path = infer_source_file(path) exec_class = get_exec_class(language) executor = exec_class( path, source, basedir, input, output, params, wildcards, threads, resources, log, config, rulename, conda_env, conda_base_path, container_img, singularity_args, env_modules, bench_record, jobid, bench_iteration, cleanup_scripts, shadow_dir, is_local, ) if edit is None: executor.evaluate(edit=edit) elif edit.draft_only: executor.draft() msg = "Generated skeleton notebook:\n{} ".format(path) if conda_env and not container_img: msg += ( "\n\nEditing with VSCode:\nOpen notebook, run command 'Select notebook kernel' (Ctrl+Shift+P or Cmd+Shift+P), and choose:" "\n{}\n".format( str( Path(conda_env) / "bin" / executor.get_interpreter_exec()))) msg += ("\nEditing with Jupyter CLI:" "\nconda activate {}\njupyter notebook {}\n".format( conda_env, path)) logger.info(msg) elif draft: executor.draft_and_edit(listen=edit) else: executor.evaluate(edit=edit)
def _get_provenance_hash(self, job: Job): """ Recursively calculate hash for the output of the given job and all upstream jobs in a blockchain fashion. This is based on an idea of Sven Nahnsen. Fails if job has more than one output file. The reason is that there is no way to generate a per-output file hash without generating the files. This hash, however, shall work without having to generate the files, just by describing all steps down to a given job. """ if job in self._hashes: return self._hashes[job] workflow = job.dag.workflow h = hashlib.sha256() # Hash shell command or script. if job.is_shell: # We cannot use the formatted shell command, because it also contains threads, # resources, and filenames (which shall be irrelevant for the hash). h.update(job.rule.shellcmd.encode()) elif job.is_script: _, source, _ = script.get_source(job.rule.script) h.update(source) elif job.is_wrapper: _, source, _ = script.get_source( wrapper.get_script(job.rule.wrapper, prefix=workflow.wrapper_prefix)) h.update(source) # Hash params. for key, value in sorted(job.params._allitems()): h.update(key.encode()) # If this raises a TypeError, we cannot calculate a reliable hash. h.update(json.dumps(value, sort_keys=True).encode()) # Hash input files that are not generated by other jobs. for f in job.input: if not any(f in depfiles for depfiles in job.dag.dependencies[job].values()): with open(f, "b") as f: # Read and update hash string value in blocks of 4K for byte_block in iter(lambda: f.read(4096), b""): h.update(byte_block) # Hash used containers or conda environments. if workflow.use_conda and job.conda_env: if workflow.use_singularity and job.conda_env.singularity_img_url: h.update(job.conda_env.singularity_img_url.encode()) h.update(job.conda_env.content) elif workflow.use_singularity and job.singularity_img_url: h.update(job.singularity_img_url.encode()) # Generate hashes of dependencies, and add them in a blockchain fashion (as input to the current hash). for dep_hash in sorted( self._get_provenance_hash(dep) for dep in set(job.dag.dependencies[job].keys())): h.update(dep_hash.encode()) provenance_hash = h.hexdigest() # Store for re-use. self._hashes[job] = provenance_hash return provenance_hash