class LSFLauncher(BatchSystemLauncher): """A BatchSystemLauncher subclass for LSF.""" submit_command = List(['bsub'], config=True, help="The PBS submit command ['bsub']") delete_command = List(['bkill'], config=True, help="The PBS delete command ['bkill']") job_id_regexp = CRegExp(r'\d+', config=True, help="Regular expresion for identifying the job ID [r'\d+']") batch_file = Unicode('') job_array_regexp = CRegExp('#BSUB[ \t]-J+\w+\[\d+-\d+\]') job_array_template = Unicode('#BSUB -J ipengine[1-{n}]') queue_regexp = CRegExp('#BSUB[ \t]+-q[ \t]+\w+') queue_template = Unicode('#BSUB -q {queue}') def start(self, n): """Start n copies of the process using LSF batch system. This cant inherit from the base class because bsub expects to be piped a shell script in order to honor the #BSUB directives : bsub < script """ # Here we save profile_dir in the context so they # can be used in the batch script template as {profile_dir} self.write_batch_script(n) #output = check_output(self.args, env=os.environ) piped_cmd = self.args[0]+'<\"'+self.args[1]+'\"' self.log.debug("Starting %s: %s", self.__class__.__name__, piped_cmd) p = Popen(piped_cmd, shell=True,env=os.environ,stdout=PIPE) output,err = p.communicate() job_id = self.parse_job_id(output) self.notify_start(job_id) return job_id
class AutocallChecker(PrefilterChecker): priority = Integer(1000, config=True) function_name_regexp = CRegExp( re_fun_name, config=True, help="RegExp to identify potential function names.") exclude_regexp = CRegExp( re_exclude_auto, config=True, help="RegExp to exclude strings with this start from autocalling.") def check(self, line_info): "Check if the initial word/function is callable and autocall is on." if not self.shell.autocall: return None oinfo = line_info.ofind( self.shell) # This can mutate state via getattr if not oinfo['found']: return None if callable(oinfo['obj']) \ and (not self.exclude_regexp.match(line_info.the_rest)) \ and self.function_name_regexp.match(line_info.ifun): return self.prefilter_manager.get_handler_by_name('auto') else: return None
class SLURMLauncher(launcher.BatchSystemLauncher): """A BatchSystemLauncher subclass for SLURM """ submit_command = List(['sbatch'], config=True, help="The SLURM submit command ['sbatch']") # Send SIGKILL instead of term, otherwise the job is "CANCELLED", not # "FINISHED" delete_command = List(['scancel', '--signal=KILL'], config=True, help="The SLURM delete command ['scancel']") job_id_regexp = CRegExp( r'\d+', config=True, help= "A regular expression used to get the job id from the output of 'sbatch'" ) batch_file = Unicode( u'', config=True, help="The string that is the batch script template itself.") queue_regexp = CRegExp('#SBATCH\W+-p\W+\w') queue_template = Unicode('#SBATCH -p {queue}')
class PBSLauncher(BatchSystemLauncher): """A BatchSystemLauncher subclass for PBS.""" submit_command = List(['qsub'], config=True, help="The PBS submit command ['qsub']") delete_command = List(['qdel'], config=True, help="The PBS delete command ['qsub']") job_id_regexp = CRegExp(r'\d+', config=True, help="Regular expresion for identifying the job ID [r'\d+']") batch_file = Unicode('') job_array_regexp = CRegExp('#PBS\W+-t\W+[\w\d\-\$]+') job_array_template = Unicode('#PBS -t 1-{n}') queue_regexp = CRegExp('#PBS\W+-q\W+\$?\w+') queue_template = Unicode('#PBS -q {queue}')
class BatchSystemLauncher(BaseLauncher): """Launch an external process using a batch system. This class is designed to work with UNIX batch systems like PBS, LSF, GridEngine, etc. The overall model is that there are different commands like qsub, qdel, etc. that handle the starting and stopping of the process. This class also has the notion of a batch script. The ``batch_template`` attribute can be set to a string that is a template for the batch script. This template is instantiated using string formatting. Thus the template can use {n} fot the number of instances. Subclasses can add additional variables to the template dict. """ # Subclasses must fill these in. See PBSEngineSet submit_command = List([''], config=True, help="The name of the command line program used to submit jobs.") delete_command = List([''], config=True, help="The name of the command line program used to delete jobs.") job_id_regexp = CRegExp('', config=True, help="""A regular expression used to get the job id from the output of the submit_command.""") batch_template = Unicode('', config=True, help="The string that is the batch script template itself.") batch_template_file = Unicode('', config=True, help="The file that contains the batch template.") batch_file_name = Unicode('batch_script', config=True, help="The filename of the instantiated batch script.") queue = Unicode('', config=True, help="The PBS Queue.") def _queue_changed(self, name, old, new): self.context[name] = new n = Integer(1) _n_changed = _queue_changed # not configurable, override in subclasses # PBS Job Array regex job_array_regexp = CRegExp('') job_array_template = Unicode('') # PBS Queue regex queue_regexp = CRegExp('') queue_template = Unicode('') # The default batch template, override in subclasses default_template = Unicode('') # The full path to the instantiated batch script. batch_file = Unicode('') # the format dict used with batch_template: context = Dict() def _context_default(self): """load the default context with the default values for the basic keys because the _trait_changed methods only load the context if they are set to something other than the default value. """ return dict(n=1, queue='', profile_dir='', cluster_id='') # the Formatter instance for rendering the templates: formatter = Instance(EvalFormatter, (), {}) def find_args(self): return self.submit_command + [self.batch_file] def __init__(self, work_dir='.', config=None, **kwargs): super(BatchSystemLauncher, self).__init__( work_dir=work_dir, config=config, **kwargs ) self.batch_file = os.path.join(self.work_dir, self.batch_file_name) def parse_job_id(self, output): """Take the output of the submit command and return the job id.""" m = self.job_id_regexp.search(output) if m is not None: job_id = m.group() else: raise LauncherError("Job id couldn't be determined: %s" % output) self.job_id = job_id self.log.info('Job submitted with job id: %r', job_id) return job_id def write_batch_script(self, n): """Instantiate and write the batch script to the work_dir.""" self.n = n # first priority is batch_template if set if self.batch_template_file and not self.batch_template: # second priority is batch_template_file with open(self.batch_template_file) as f: self.batch_template = f.read() if not self.batch_template: # third (last) priority is default_template self.batch_template = self.default_template # add jobarray or queue lines to user-specified template # note that this is *only* when user did not specify a template. # print self.job_array_regexp.search(self.batch_template) if not self.job_array_regexp.search(self.batch_template): self.log.debug("adding job array settings to batch script") firstline, rest = self.batch_template.split('\n',1) self.batch_template = '\n'.join([firstline, self.job_array_template, rest]) # print self.queue_regexp.search(self.batch_template) if self.queue and not self.queue_regexp.search(self.batch_template): self.log.debug("adding PBS queue settings to batch script") firstline, rest = self.batch_template.split('\n',1) self.batch_template = '\n'.join([firstline, self.queue_template, rest]) script_as_string = self.formatter.format(self.batch_template, **self.context) self.log.debug('Writing batch script: %s', self.batch_file) with open(self.batch_file, 'w') as f: f.write(script_as_string) os.chmod(self.batch_file, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) def start(self, n): """Start n copies of the process using a batch system.""" self.log.debug("Starting %s: %r", self.__class__.__name__, self.args) # Here we save profile_dir in the context so they # can be used in the batch script template as {profile_dir} self.write_batch_script(n) output = check_output(self.args, env=os.environ) job_id = self.parse_job_id(output) self.notify_start(job_id) return job_id def stop(self): output = check_output(self.delete_command+[self.job_id], env=os.environ) self.notify_stop(dict(job_id=self.job_id, output=output)) # Pass the output of the kill cmd return output
class WindowsHPCLauncher(BaseLauncher): job_id_regexp = CRegExp(r'\d+', config=True, help="""A regular expression used to get the job id from the output of the submit_command. """ ) job_file_name = Unicode('ipython_job.xml', config=True, help="The filename of the instantiated job script.") # The full path to the instantiated job script. This gets made dynamically # by combining the work_dir with the job_file_name. job_file = Unicode('') scheduler = Unicode('', config=True, help="The hostname of the scheduler to submit the job to.") job_cmd = Unicode(find_job_cmd(), config=True, help="The command for submitting jobs.") def __init__(self, work_dir='.', config=None, **kwargs): super(WindowsHPCLauncher, self).__init__( work_dir=work_dir, config=config, **kwargs ) @property def job_file(self): return os.path.join(self.work_dir, self.job_file_name) def write_job_file(self, n): raise NotImplementedError("Implement write_job_file in a subclass.") def find_args(self): return ['job.exe'] def parse_job_id(self, output): """Take the output of the submit command and return the job id.""" m = self.job_id_regexp.search(output) if m is not None: job_id = m.group() else: raise LauncherError("Job id couldn't be determined: %s" % output) self.job_id = job_id self.log.info('Job started with id: %r', job_id) return job_id def start(self, n): """Start n copies of the process using the Win HPC job scheduler.""" self.write_job_file(n) args = [ 'submit', '/jobfile:%s' % self.job_file, '/scheduler:%s' % self.scheduler ] self.log.debug("Starting Win HPC Job: %s" % (self.job_cmd + ' ' + ' '.join(args),)) output = check_output([self.job_cmd]+args, env=os.environ, cwd=self.work_dir, stderr=STDOUT ) job_id = self.parse_job_id(output) self.notify_start(job_id) return job_id def stop(self): args = [ 'cancel', self.job_id, '/scheduler:%s' % self.scheduler ] self.log.info("Stopping Win HPC Job: %s" % (self.job_cmd + ' ' + ' '.join(args),)) try: output = check_output([self.job_cmd]+args, env=os.environ, cwd=self.work_dir, stderr=STDOUT ) except: output = 'The job already appears to be stoppped: %r' % self.job_id self.notify_stop(dict(job_id=self.job_id, output=output)) # Pass the output of the kill cmd return output
class SGELauncher(PBSLauncher): """Sun GridEngine is a PBS clone with slightly different syntax""" job_array_regexp = CRegExp('#\$\W+\-t') job_array_template = Unicode('#$ -t 1-{n}') queue_regexp = CRegExp('#\$\W+-q\W+\$?\w+') queue_template = Unicode('#$ -q {queue}')
class CRegExpTrait(HasTraits): value = CRegExp(r'')
class Knitpy(LoggingConfigurable): """Engine used to convert from python markdown (``*.pymd``) to html/latex/...""" keep_md = Bool(False, config=True, help="""Whether to keep the temporary md files""") log_to_file = Bool(False, config=True, help="""Whether to send the log to a file""") extra_document_configs = List( default_value=[], config=True, help="Additional configurations for FinalOutputDocuments") default_export_format = CaselessStrEnum( VALID_OUTPUT_FORMAT_NAMES, default_value=DEFAULT_OUTPUT_FORMAT_NAME, config=True, help= """The export format to be used (can't by from extra_document_configs!).""" ) kernel_debug = Bool( False, config=True, help="""Whether to output kernel messages to the (debug) log""") timeout = Integer(10, config=True, help="timeout for individual code executions") # Things for the parser... chunk_begin = CRegExpMultiline( r'^\s*```+\s*{[.]?(?P<engine>[a-z]+)\s*(?P<args>.*)}\s*$', config=True, help="chunk begin regex (must include the named " "group 'engine' and 'args'") chunk_end = CRegExpMultiline(r'^\s*```+\s*$', config=True, help="chunk end regex") inline_code = CRegExpMultiline( r'`(?P<engine>[a-z]+) +([^`]+)\s*`', config=True, help="inline code regex (must include a named group 'engine')") comment_line = CRegExp(r'^\s*#', config=True, help="comment line regex") yaml_separator = CRegExpMultiline(r"^---\s*$", config=True, help="separator for the yaml metadata") def __init__(self, **kwargs): super(Knitpy, self).__init__(**kwargs) self.init_kernel_manager() self.init_engines() self.init_output_configurations() def init_kernel_manager(self): self._km = MultiKernelManager(log=self.log, parent=self) self._ksm = KernelSpecManager(log=self.log, parent=self) self._kernels = {} #ksm.find_kernel_specs() def init_engines(self): self._engines = {} self._engines["python"] = PythonKnitpyEngine(parent=self) # TODO: check that every kernel_name is in ksm.find_kernel_specs() def init_output_configurations(self): self._outputs = {} for config in DEFAULT_FINAL_OUTPUT_FORMATS: fod = FinalOutputConfiguration(parent=self, **config) self._outputs[config["name"]] = fod self._outputs[config["alias"]] = fod for config in self.extra_document_configs: fod = FinalOutputConfiguration(parent=self, **config) self._outputs[config["name"]] = fod self._outputs[config["alias"]] = fod def parse_document(self, input): if os.path.exists(input): filename = input f = codecs.open(filename, 'r', 'UTF-8') doc = f.read() else: doc = input filename = "anonymous_input" # the yaml can stay in the doc, pandoc will remove '---' blocks # pandoc will also do it's own interpretation and use title/author and so on... # ToDo: not sure of that should stay or if we should start with clean metadata # title, author, date # title: "A first try" # author: "Jan Schulz" # date: "Monday, February 23, 2015" # default values metadata = { "title": filename, "author": getpass.getuser(), "date": datetime.datetime.now().strftime("%A, %B %d, %Y") } pos = 0 start = self.yaml_separator.search(doc, pos) if not start is None: end = self.yaml_separator.search(doc, start.end()) if end is None: raise ParseException("Found no metadata end separator.") try: res = yaml.load(doc[start.end():end.start()]) self.log.debug("Metadata: %s", res) metadata.update(res) except Exception as e: raise ParseException("Malformed metadata: %s" % str(e)) parsed_doc = self._parse_blocks(doc) return parsed_doc, metadata def _parse_blocks(self, doc): result = [] doc_pos = 0 blocks = self.chunk_begin.finditer(doc) for block_start in blocks: # process the text before the match text = doc[doc_pos:block_start.start()] self._parse_inline(text, result) # TODO: somehow a empty line before a codeblock vanishes, so add one here result.append((TTEXT, "\n")) # now the block itself # find the end of the block block_end = self.chunk_end.search(doc, block_start.end()) if block_end is None: raise ParseException( "Found no end for the block starting at pos %s" % block_start.end()) result.append((TBLOCK, (doc[block_start.end():block_end.start()], block_start.groupdict()))) doc_pos = block_end.end() # text after the last block self._parse_inline(doc[doc_pos:], result) return result def _parse_inline(self, text, result): text_pos = 0 for inline in self.inline_code.finditer(text): # text before inline code result.append((TTEXT, text[text_pos:inline.start()])) # inline code engine_offset = len(inline.group('engine')) + 1 result.append( (TINLINE, (text[inline.start() + engine_offset + 1:inline.end() - 1], inline.groupdict()))) text_pos = inline.end() # text after the last inline code result.append((TTEXT, text[text_pos:])) def _all_lines_comments(self, lines): for line in lines.split("\n"): if not self.comment_line.match(line): return False return True def convert(self, parsed, output): context = ExecutionContext(output=output) for entry in parsed: if entry[0] == TBLOCK: context.mode = "block" self._process_code(entry[1], context=context) elif entry[0] == TINLINE: context.mode = "inline" self._process_code(entry[1], context=context) elif entry[0] == TTEXT: output.add_text(entry[1]) else: raise ParseException("Found something unexpected: %s" % entry) # process_code opened kernels, so close them here self._km.shutdown_all() # workaround for https://github.com/ipython/ipython/issues/8007 # FIXME: remove if IPython >3.0 is in require self._km._kernels.clear() self._kernels = {} return output def _process_code(self, input, context): context.execution_started() # setup the execution context code = input[0] intro = input[1] engine_name = intro["engine"] raw_args = intro.get("args", "") args = self._parse_args(raw_args) # for compatibility with knitr, where python is specified via "{r engine='python'}" if "engine" in args: engine_name = args.pop("engine") self.log.debug("Running on engine: %s", engine_name) try: engine = self._engines[engine_name] except: raise ParseException("Unknown codeblock type: %s" % engine_name) assert not engine is None, "Engine is None" context.engine = engine if not engine.name in context.enabled_documents: plotting_formats = context.output.export_config.accepted_image_formats plot_code = engine.get_plotting_format_code(plotting_formats) self._run_silently(context.engine.kernel, plot_code) context.enabled_documents.append(engine.name) self.log.info("Enabled image formats '%s' in engine '%s'.", plotting_formats, engine.name) # configure the context if "echo" in args: context.echo = args.pop("echo") # eval=False means that we don't execute the block at all if "eval" in args: _eval = args.pop("eval") if _eval is False: if context.echo: code = code.replace(os.linesep, "\n").lstrip("\n") context.output.add_code(code, language=engine.language) return if "results" in args: context.results = args.pop("results") if "include" in args: context.include = args.pop("include") if "chunk_label" in args: context.chunk_label = args.pop("chunk_label") else: context.chunk_label = u"unnamed-chunk-%s" % context.chunk_number if "comment" in args: context.comment = args.pop("comment") if args: self.log.debug("Found unhandled args: %s", args) lines = '' code_lines = code.split('\n') space_re = re.compile(r'^([\s]+)') spaces = [] # TODO: this whole "merge with the next line" should be rewritten as a generator def loop_continues(line_no): if len(code_lines) <= line_no: return False candidate = code_lines[line_no] # comments should be swallowed if a line further down has code in it with the # right number of spaces in front while candidate.strip() == "" or self._all_lines_comments( candidate): line_no += 1 if len(code_lines) <= line_no: return False candidate = code_lines[line_no] # the next code line must have either the same number of spaces (still in a loop), # or less spaces as in 'spaces' (nested loop) or none (end of loop). If more spaces # are found or different types of spaces, this will result in an error which will be # shown when the code is executed... while spaces: possible_space = spaces[-1] if candidate[:len(possible_space)] == possible_space: # ok, we are at the "right" level of space return True # not our "space", so remove it and try the one one nesting above spaces.pop() return False for line_no in range(len(code_lines)): cur_line = code_lines[line_no] lines = lines + cur_line # check if we are in a loop and if so, if the next line also belongs to this loop # this only catches the case where we are *in* a loop and not the loop start (the line # with a ':' in it. That line is catched by the is_complete call below. nested loops # are also catched due to the space in front of it m = space_re.match(cur_line) if m: cur_space = m.group(0) spaces.append(cur_space) if loop_continues(line_no + 1): lines += "\n" continue if spaces: # we are in a loop, as spaces has some spaces in it, but the code above didn't find # any spaces in front of the line -> this is the case when loop_continues found a # new codeline from this loop after a comment with different spaces in front of # it or an empty line. This could be such an empty/comment line and we have to # look at the next line as well! if cur_line.strip() == "" or self._all_lines_comments( cur_line): lines += "\n" continue # we have a block of code, including all lines of a loop msg = engine.kernel.is_complete(lines + "\n\n") reply = engine.kernel.get_shell_msg(timeout=self.timeout) assert reply['msg_type'] == 'is_complete_reply', str(reply) if self.kernel_debug: self.log.debug("completion_request: %s", msg) if reply['content']['status'] == 'complete': if lines.strip() == "": # No requests for "no code" lines = "" continue elif self._all_lines_comments(lines): # comments should go to to the next code block lines += "\n" continue # run the lines self._run_lines(lines + "\n", context) lines = "" elif reply['content']['status'] == 'invalid': # TODO: not sure how this should be handled # Either abort execution of the whole file or just retry with the next line? # However this should be handled via a user message self.log.info("Code invalid:\n%s", lines) context.output.add_code(lines, language=engine.language) context.output.add_execution_error("Code invalid") lines = "" else: # the "incomplete" case: don't run anything wait for the next line lines += "\n" # This can only happen if the last line is incomplete # This will always result in an error! if lines.strip() != "": self._run_lines(lines, context) context.execution_finished() def _parse_args(self, raw_args): # Todo: knitr interprets all values, so code references are possible # This also means that we have to do args parsing at interpretation time, so that # variable from other code can be taken into account.. args = {} if raw_args.strip() == "": return args converter = { "True": True, "False": False, "None": None, "T": True, # Rs True/False "F": False, "TRUE": True, "FALSE": False, # treat Rs NA as None, probably overkill to look for float("NA") "NA": None, "NULL": None, } # The first is special as that can be the name of the chunk first = True for arg in raw_args.split(","): arg = arg.strip() if not "=" in arg: if not first: raise ParseException( "Malformed options for code chunk: '%s' in '%s'" % (arg, raw_args)) args["chunk_label"] = arg continue first = False label, value = arg.split("=") v = value.strip() # convert to real types. # TODO: Should be done by submitting the whole thing to the kernel, like knitr does # -> variables form one codecell can be used in the args of the next one ... if (v[0] == '"' and v[-1] == '"'): v = v[1:-1] elif (v[0] == "'" and v[-1] == "'"): v = v[1:-1] elif v in converter: v = converter[v] else: try: v = int(v) except: self.log.error( "Could not decode option value: '%s=%s'. Discarded...", label, v) continue args[label.strip()] = v return args def _run_lines(self, lines, context): kernel = context.engine.kernel msg_id = kernel.execute(lines) if self.kernel_debug: self.log.debug("Executing lines (msg_id=%s):\n%s", msg_id, lines) # wait for finish, with timeout # At first we have to wait until the kernel tells us it is finished with running the code while True: try: msg = kernel.shell_channel.get_msg(timeout=self.timeout) if self.kernel_debug: self.log.debug("shell msg: %s", msg) except Empty: # This indicates that something bad happened, as AFAIK this should return... self.log.error("Timeout waiting for execute reply") raise KnitpyException("Timeout waiting for execute reply.") if msg['parent_header'].get('msg_id') == msg_id: # It's finished, and we got our reply, so next look at the results break else: # not our reply self.log.debug( "Discarding message from a different client: %s" % msg) continue # Now look at the results of our code execution and earlier completion requests # We handle messages until the kernel indicates it's ide again status_idle_again = False while True: try: msg = kernel.get_iopub_msg(timeout=self.timeout) except Empty: # There should be at least some messages: we just executed code! # The only valid time could be when the timeout happened too early (aka long # running code in the document) -> we handle that below self.log.warn("Timeout waiting for expected IOPub output") break if msg['parent_header'].get('msg_id') != msg_id: if msg['parent_header'].get( u'msg_type') != u'is_complete_request': # not an output from our execution and not one of the complete_requests self.log.debug( "Discarding output from a different client: %s" % msg) else: # complete_requests are ok pass continue # Here we have some message which corresponds to our code execution msg_type = msg['msg_type'] content = msg['content'] # The kernel indicates some status: executing -> idle if msg_type == 'status': if content['execution_state'] == 'idle': # When idle, the kernel has executed all input status_idle_again = True break else: # the "starting execution" messages continue elif msg_type == 'clear_output': # we don't handle that!? self.log.debug( "Discarding unexpected 'clear_output' message: %s" % msg) continue ## So, from here on we have a messages with real content if self.kernel_debug: self.log.debug("iopub msg (%s): %s", msg_type, msg) if context.include: self._handle_return_message(msg, context) if not status_idle_again: self.log.error( "Code lines didn't execute in time. Don't use long-running code in " "documents or increase the timeout!") self.log.error("line(s): %s" % lines) def _handle_return_message(self, msg, context): if context.mode == "inline": #self.log.debug("inline: %s" % msg) if msg["msg_type"] == "execute_result": context.output.add_text(_plain_text(msg["content"])) elif context.mode == "block": #self.log.debug("block: %s" % msg) type = msg["msg_type"] if type == "execute_input": if context.echo: context.output.add_code(_code(msg[u'content']), language=context.engine.language) elif type == "stream": # {u'text': u'a\nb\nc\n', u'name': u'stdout'} # TODO: format stdout and stderr differently? txt = msg["content"].get("text", "") if txt.strip() == "": return if context.results == 'markup': context.output.add_output(txt) elif context.results == 'asis': context.output.add_asis(txt) elif context.results == 'hide': return else: # TODO: implement a caching system... again... self.log.warn( "Can't handle results='hold' yet, falling back to 'markup'." ) context.output.add_output(txt) elif (type == "execute_result") or (type == "display_data"): if context.results == 'hide': return if context.results == 'hold': self.log.warn( "Can't handle results='hold' yet, falling back to 'markup'." ) # Here we handle the output from the IPython display framework. # 1. If a object has a _display_ipython(), that will be called. This method should # publish (one) display_data message and return -> the content ends up in # "display_data" msg and the "executive_result" has no data # 2. else try different IPython.core.formatters for the object, which basically # call the right _repr_<whatever>_ method to get a formated string in that # mimetype. This is added as alternatives under content.data of the # "executive_result". # data has/can have multiple types of the same message data = msg[u"content"][u'data'] #self.log.debug(str(data)) # handle plots #self.log.debug("Accepted image mimetypes: %s", context.output.export_config.accepted_image_mimetypes) for mime_type in context.output.export_config.accepted_image_mimetypes: mime_data = data.get(mime_type, None) if mime_data is None: self.log.debug("No image found: %s", mime_type) continue try: self.log.debug("Trying to include image...") context.output.add_image(mime_type, mime_data, title="") except KnitpyOutputException as e: self.log.info("Couldn't include image: %s", e) continue return # now try some marked up text formats for mime_type in context.output.markup_mimetypes: mime_data = data.get(mime_type, None) if mime_data is None: continue try: self.log.debug("Trying to include markup text...") context.output.add_markup_text(mime_type, mime_data) except KnitpyOutputException as e: self.log.info("Couldn't include markup text: %s", e) continue return # as a last resort, try plain text... if u'text/plain' in data: txt = data.get(u"text/plain", "") if txt != "": if context.results == 'markup': context.output.add_output(txt) if txt[-1] != "\n": context.output.add_output("\n") elif context.results == 'asis': context.output.add_asis(txt) if txt[-1] != "\n": context.output.add_asis("\n") return # If we are here, we couldn't handle any of the more specific data types # and didn't find any output text excuse = "\n(Found data of type '{}', but couldn't handle it)\n" context.output.add_output(excuse.format(data.keys())) elif (type == "error"): ename = msg["content"].get("ename", "unknown exception") evalue = msg["content"].get("evalue", "unknown exception value") tb = msg["content"].get("traceback", "<not available>") if not is_string(tb): # remove the first line... tb = "\n".join(tb[1:]) self.log.info(tb) #there are ansi escape sequences in the traceback, which kills pandoc :-( if u"\x1b[1;32m" in tb: tb = "!! traceback unavailable due to included color sequences;\n" \ "!! execute `%colors NoColor` once before this line to remove them!" context.output.add_execution_error("%s: %s" % (ename, evalue), tb) else: self.log.debug("Ignored msg of type %s" % type) def _run_silently(self, kc, lines): try: msg_id = kc.execute(lines + "\n\n", silent=self.kernel_debug, store_history=False) self.log.debug("Executed silent code: %s", lines) reply = kc.get_shell_msg(timeout=self.timeout) assert reply['parent_header'].get( 'msg_id') == msg_id, "Wrong reply! " + str(reply) if self.kernel_debug: self.log.debug("Silent code shell reply: %s", reply) except Empty: self.log.error("Code took too long:\n %s", lines) # now empty the iopub channel (there is at least a "starting" message) while True: try: msg = kc.get_iopub_msg(timeout=0.1) if self.kernel_debug: self.log.debug("Silent code iopub msg: %s", msg) except Empty: break def _get_kernel(self, engine): kernel_name = engine.kernel_name kernel_startup_lines = engine.startup_lines if not kernel_name in self._kernels: self.log.info("Starting a new kernel: %s" % kernel_name) kernelid = self._km.start_kernel(kernel_name=kernel_name) #km.list_kernel_ids() kn = self._km.get_kernel(kernelid) kc = kn.client() self._kernels[kernel_name] = kc # now initalize the channels kc.start_channels() kc.wait_for_ready() self._run_silently(kc, kernel_startup_lines) self.log.info("Executed kernel startup lines for engine '%s'.", engine.name) return self._kernels[kernel_name] def get_output_format(self, fmt_name, config=None): self._ensure_valid_output(fmt_name) fod = self._outputs.get(fmt_name).copy() # self.log.info("%s: %s", fmt_name, config) if not config: pass elif isinstance(config, dict): fod.update(**config) elif config == "default": # html_document: default pass else: self.log.error( "Unknown config for document '%s': '%s'. Ignored...", fmt_name, config) return fod def _knit(self, input, outputdir_name, final_format="html", config=None): """Internal function to aid testing""" parsed, metadata = self.parse_document(input) # sets kpydoc.parsed and final_format = self.get_output_format(final_format, config=config) md_temp = TemporaryOutputDocument(fileoutputs=outputdir_name, export_config=final_format, log=self.log, parent=self) # get the temporary md file self.convert(parsed, md_temp) return md_temp.content def render(self, filename, output=None): """ Convert the filename to the given output format(s) """ # Export each documents conversion_success = 0 converted_docs = [] # save here to change back after the conversation. orig_cwd = os.getcwd() needs_chdir = False # expand $HOME and so on... filename = expand_path(filename) filename = os.path.abspath(filename) self.log.info("Converting %s..." % filename) basedir = os.path.dirname(filename) basename = os.path.splitext(os.path.basename(filename))[0] # It's easier if we just change wd to the dir of the file if unicode_type(basedir) != py3compat.getcwd(): os.chdir(basedir) needs_chdir = True self.log.info("Changing to working dir: %s" % basedir) filename = os.path.basename(filename) outputdir_name = os.path.splitext(basename)[0] + "_files" # parse the input document parsed, metadata = self.parse_document(filename) # get the output formats # order: kwarg overwrites default overwrites document output_formats = [self._outputs[self.default_export_format]] if output is None: self.log.debug("Converting to default output format [%s]!" % self.default_export_format) elif output == "all": outputs = metadata.get("output", None) # if nothing is specified, we keep the default if outputs is None: self.log.debug( "Did not find any specified output formats: using only default!" ) else: output_formats = [] for fmt_name, config in iteritems(outputs): fod = self.get_output_format(fmt_name, config) output_formats.append(fod) self.log.debug( "Converting to all specified output formats: %s" % [fmt.name for fmt in output_formats]) else: self._ensure_valid_output(output) output_formats = [self._outputs[output]] for final_format in output_formats: self.log.info("Converting document %s to %s", filename, final_format.name) # TODO: build a proper way to specify final output... md_temp = TemporaryOutputDocument(fileoutputs=outputdir_name, export_config=final_format, log=self.log, parent=self) # get the temporary md file self.convert(parsed, md_temp) if final_format.keep_md or self.keep_md: mdfilename = basename + "." + final_format.name + ".md" self.log.info("Saving the temporary markdown as '%s'." % mdfilename) # TODO: remove the first yaml metadata block and # put "#<title>\n<author>\n<date>" before the rest with codecs.open(mdfilename, 'w+b', 'UTF-8') as f: f.write(md_temp.content) # convert the md file to the final filetype input_format = "markdown" \ "+autolink_bare_uris" \ "+ascii_identifiers" \ "+tex_math_single_backslash-implicit_figures" \ "+fenced_code_attributes" extra = [ "--smart", # typographically correct output (curly quotes, etc) "--email-obfuscation", "none", #do not obfuscation email names with javascript "--self-contained", # include img/scripts as data urls "--standalone", # html with header + footer "--section-divs", ] outfilename = basename + "." + final_format.file_extension # exported is irrelevant, as we pass in a filename exported = pandoc(source=md_temp.content, to=final_format.pandoc_export_format, format=input_format, extra_args=extra, outputfile=outfilename) self.log.info("Written final output: %s" % outfilename) converted_docs.append(os.path.join(basedir, outfilename)) if needs_chdir: os.chdir(orig_cwd) return converted_docs def _ensure_valid_output(self, fmt_name): if fmt_name in self._outputs: return raise KnitpyException("Format '%s' is not a valid output format!" % fmt_name)
class PBSPROLauncher(launcher.PBSLauncher): """A BatchSystemLauncher subclass for PBSPro.""" job_array_regexp = CRegExp('#PBS\W+-J\W+[\w\d\-\$]+') job_array_template = Unicode('#PBS -J 1-{n}')