def read(cls, lines): # first line start_line = lines.lineno + 1 line = cls.convert_leading_tabs(next(lines).lstrip()).split(">", 1)[1] if len(line) > 0 and line[0] == " ": line = line[1:] line_buffer = [line] # set booleans in_code_fence = CodeFence.start(line) in_block_code = BlockCode.start(line) blank_line = line.strip() == "" # loop next_line = lines.peek() while not cls.transition(next_line): stripped = cls.convert_leading_tabs(next_line.lstrip()) prepend = 0 if stripped[0] == ">": # has leader, not lazy continuation prepend += 1 if stripped[1] == " ": prepend += 1 stripped = stripped[prepend:] in_code_fence = CodeFence.start(stripped) in_block_code = BlockCode.start(stripped) blank_line = stripped.strip() == "" line_buffer.append(stripped) elif in_code_fence or in_block_code or blank_line: # not paragraph continuation text break else: # lazy continuation, preserve whitespace line_buffer.append(next_line) next(lines) next_line = lines.peek() # block level tokens are parsed here, so that link_definitions # in quotes can be recognized before span-level tokenizing. Paragraph.parse_setext = False try: child_tokens = tokenizer.tokenize_block( SourceLines(line_buffer, start_line=start_line) ) finally: Paragraph.parse_setext = True return cls( children=child_tokens, position=Position.from_source_lines(lines, start_line=start_line), )
def read(cls, lines): # first line line = cls.convert_leading_tabs(next(lines).lstrip()).split('>', 1)[1] if len(line) > 0 and line[0] == ' ': line = line[1:] line_buffer = [line] # set booleans in_code_fence = CodeFence.start(line) in_block_code = BlockCode.start(line) blank_line = line.strip() == '' # loop next_line = lines.peek() while (next_line is not None and next_line.strip() != '' and not Heading.start(next_line) and not CodeFence.start(next_line) and not ThematicBreak.start(next_line) and not List.start(next_line) and not Setting.start(next_line)): stripped = cls.convert_leading_tabs(next_line.lstrip()) prepend = 0 if stripped[0] == '>': # has leader, not lazy continuation prepend += 1 if stripped[1] == ' ': prepend += 1 stripped = stripped[prepend:] in_code_fence = CodeFence.start(stripped) in_block_code = BlockCode.start(stripped) blank_line = stripped.strip() == '' line_buffer.append(stripped) elif in_code_fence or in_block_code or blank_line: # not paragraph continuation text break else: # lazy continuation, preserve whitespace line_buffer.append(next_line) next(lines) next_line = lines.peek() # block level tokens are parsed here, so that footnotes # in quotes can be recognized before span-level tokenizing. Paragraph.parse_setext = False parse_buffer = tokenizer.tokenize_block(line_buffer, _token_types) Paragraph.parse_setext = True return parse_buffer
def read(cls, lines, prev_marker=None): next_marker = None lines.anchor() prepend = -1 leader = None line_buffer = [] # first line line = next(lines) prepend, leader = prev_marker if prev_marker else cls.parse_marker( line) line = line.replace(leader + '\t', leader + ' ', 1).replace('\t', ' ') empty_first_line = line[prepend:].strip() == '' if not empty_first_line: line_buffer.append(line[prepend:]) next_line = lines.peek() if empty_first_line and next_line is not None and next_line.strip( ) == '': parse_buffer = tokenizer.tokenize_block([next(lines)], _token_types) next_line = lines.peek() if next_line is not None: marker_info = cls.parse_marker(next_line) if marker_info is not None: next_marker = marker_info return (parse_buffer, prepend, leader), next_marker # loop newline = 0 while True: # no more lines if next_line is None: # strip off newlines if newline: lines.backstep() del line_buffer[-newline:] break next_line = next_line.replace('\t', ' ') # not in continuation if not cls.in_continuation(next_line, prepend): # directly followed by another token if cls.other_token(next_line): if newline: lines.backstep() del line_buffer[-newline:] break # next_line is a new list item marker_info = cls.parse_marker(next_line) if marker_info is not None: next_marker = marker_info break # not another item, has newlines -> not continuation if newline: lines.backstep() del line_buffer[-newline:] break next(lines) line = next_line stripped = line.lstrip(' ') diff = len(line) - len(stripped) if diff > prepend: stripped = ' ' * (diff - prepend) + stripped line_buffer.append(stripped) newline = newline + 1 if next_line.strip() == '' else 0 next_line = lines.peek() # block-level tokens are parsed here, so that footnotes can be # recognized before span-level parsing. parse_buffer = tokenizer.tokenize_block(line_buffer, _token_types) return (parse_buffer, prepend, leader), next_marker
def read(cls, lines, prev_marker=None): next_marker = None lines.anchor() prepend = -1 leader = None start_line = lines.lineno line_buffer = [] # first line line = next(lines) prepend, leader = prev_marker if prev_marker else cls.parse_marker(line) line = line.replace(leader + "\t", leader + " ", 1).replace("\t", " ") empty_first_line = line[prepend:].strip() == "" if not empty_first_line: line_buffer.append(line[prepend:]) next_line = lines.peek() if empty_first_line and next_line is not None and next_line.strip() == "": child_tokens = tokenizer.tokenize_block( SourceLines([next(lines)], start_line=lines.lineno) ) next_line = lines.peek() if next_line is not None: marker_info = cls.parse_marker(next_line) if marker_info is not None: next_marker = marker_info return cls( children=child_tokens, loose=child_tokens.loose, prepend=prepend, leader=leader, next_marker=next_marker, position=Position.from_source_lines(lines, start_line=start_line), ) # loop newline = 0 while True: # no more lines if next_line is None: # strip off newlines if newline: lines.backstep() del line_buffer[-newline:] break next_line = next_line.replace("\t", " ") # not in continuation if not cls.in_continuation(next_line, prepend): # directly followed by another token if cls.transition(next_line): if newline: lines.backstep() del line_buffer[-newline:] break # next_line is a new list item marker_info = cls.parse_marker(next_line) if marker_info is not None: next_marker = marker_info break # not another item, has newlines -> not continuation if newline: lines.backstep() del line_buffer[-newline:] break next(lines) line = next_line stripped = line.lstrip(" ") diff = len(line) - len(stripped) if diff > prepend: stripped = " " * (diff - prepend) + stripped line_buffer.append(stripped) newline = newline + 1 if next_line.strip() == "" else 0 next_line = lines.peek() child_tokens = tokenizer.tokenize_block( SourceLines(line_buffer, start_line=start_line) ) return cls( children=child_tokens, loose=child_tokens.loose, prepend=prepend, leader=leader, next_marker=next_marker, position=Position.from_source_lines(lines, start_line=start_line), )
def run_block(self): with self.set_state(): self._blocks = block_tokenizer.tokenize_block(self._lines, block_token._token_types) return self._blocks
def parse(self, inputstring, document): # de-serialize the notebook ntbk = nbf.reads(inputstring, nbf.NO_CONVERT) # This is a contaner for top level markdown tokens # which we will add to as we walk the document mkdown_tokens = [] # type: list[BlockToken] # First we ensure that we are using a 'clean' global context # for parsing, which is setup with the MyST parsing tokens # the logger will report on duplicate link/footnote definitions, etc parse_context = ParseContext( find_blocks=SphinxNBRenderer.default_block_tokens, find_spans=SphinxNBRenderer.default_span_tokens, logger=SPHINX_LOGGER, ) set_parse_context(parse_context) for cell_index, nb_cell in enumerate(ntbk.cells): # Skip empty cells if len(nb_cell["source"].strip()) == 0: continue # skip cells tagged for removal tags = nb_cell.metadata.get("tags", []) if "remove_cell" in tags: continue if nb_cell["cell_type"] == "markdown": # we add the document path and cell index # to the source lines, so they can be included in the error logging # NOTE: currently the logic to report metadata is not written # into SphinxRenderer, but this will be introduced in a later update lines = SourceLines( nb_cell["source"], uri=document["source"], metadata={"cell_index": cell_index}, standardize_ends=True, ) # parse the source markdown text; # at this point span/inline level tokens are not yet processed, but # link/footnote definitions are collected/stored in the global context mkdown_tokens.extend(tokenize_block(lines)) # TODO for md cells, think of a way to implement the previous # `if "hide_input" in tags:` logic elif nb_cell["cell_type"] == "code": # here we do nothing but store the cell as a custom token mkdown_tokens.append( NbCodeCell( cell=nb_cell, position=Position( line_start=0, uri=document["source"], data={"cell_index": cell_index}, ), )) # Now all definitions have been gathered, we walk the tokens and # process any inline text for token in mkdown_tokens + list( get_parse_context().foot_definitions.values()): token.expand_spans() # If there are widgets, this will embed the state of all widgets in a script if contains_widgets(ntbk): mkdown_tokens.insert(0, JupyterWidgetState(state=get_widgets(ntbk))) # create the front matter token front_matter = FrontMatter(content=ntbk.metadata, position=None) # Finally, we create the top-level markdown document markdown_doc = Document( children=mkdown_tokens, front_matter=front_matter, link_definitions=parse_context.link_definitions, footnotes=parse_context.foot_definitions, footref_order=parse_context.foot_references, ) self.reporter = document.reporter self.config = self.default_config.copy() try: new_cfg = document.settings.env.config.myst_config self.config.update(new_cfg) except AttributeError: pass # Remove all the mime prefixes from "glue" step. # This way, writing properly captures the glued images replace_mime = [] for cell in ntbk.cells: if hasattr(cell, "outputs"): for out in cell.outputs: if "data" in out: # Only do the mimebundle replacing for the scrapbook outputs mime_prefix = (out.get("metadata", {}).get("scrapbook", {}).get("mime_prefix")) if mime_prefix: out["data"] = { key.replace(mime_prefix, ""): val for key, val in out["data"].items() } replace_mime.append(out) # Write the notebook's output to disk. This changes metadata in notebook cells path_doc = Path(document.settings.env.docname) doc_relpath = path_doc.parent doc_filename = path_doc.name build_dir = Path(document.settings.env.app.outdir).parent output_dir = build_dir.joinpath("jupyter_execute", doc_relpath) write_notebook_output(ntbk, str(output_dir), doc_filename) # Now add back the mime prefixes to the right outputs so they aren't rendered # until called from the role/directive for out in replace_mime: out["data"] = { f"{GLUE_PREFIX}{key}": val for key, val in out["data"].items() } # Update our glue key list with new ones defined in this page glue_domain = NbGlueDomain.from_env(document.settings.env) glue_domain.add_notebook(ntbk, path_doc) # render the Markdown AST to docutils AST renderer = SphinxNBRenderer(parse_context=parse_context, document=document, current_node=None) renderer.render(markdown_doc)