def is_html_block( line_to_parse: str, start_index: int, extracted_whitespace: Optional[str], token_stack: List[StackToken], ) -> Tuple[Optional[str], Optional[str]]: """ Determine if the current sequence of characters would start a html block element. """ assert extracted_whitespace is not None if ( ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3) ) and ParserHelper.is_character_at_index( line_to_parse, start_index, HtmlHelper.__html_block_start_character, ): ( html_block_type, remaining_html_tag, ) = HtmlHelper.__determine_html_block_type( token_stack, line_to_parse, start_index, ) else: html_block_type, remaining_html_tag = None, None return html_block_type, remaining_html_tag
def parse_html_block(parser_state, position_marker, extracted_whitespace): """ Determine if we have the criteria that we need to start an HTML block. """ new_tokens = [] if ( ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3) ) and ParserHelper.is_character_at_index( position_marker.text_to_parse, position_marker.index_number, HtmlHelper.__html_block_start_character, ): ( html_block_type, remaining_html_tag, ) = HtmlHelper.__determine_html_block_type( parser_state, position_marker.text_to_parse, position_marker.index_number, ) if html_block_type: new_tokens, _, _ = parser_state.close_open_blocks_fn( parser_state, only_these_blocks=[ParagraphStackToken], ) parser_state.token_stack.append( HtmlBlockStackToken(html_block_type, remaining_html_tag) ) new_tokens.append( HtmlBlockMarkdownToken(position_marker, extracted_whitespace) ) return new_tokens
def parse_setext_headings( parser_state, position_marker, extracted_whitespace, this_bq_count, stack_bq_count, ): """ Handle the parsing of an setext heading. """ new_tokens = [] if (ParserHelper.is_length_less_than_or_equal_to( extracted_whitespace, 3) and ParserHelper.is_character_at_index_one_of( position_marker.text_to_parse, position_marker.index_number, LeafBlockProcessor.__setext_characters, ) and parser_state.token_stack[-1].is_paragraph and (this_bq_count == stack_bq_count)): _, collected_to_index = ParserHelper.collect_while_character( position_marker.text_to_parse, position_marker.index_number, position_marker.text_to_parse[position_marker.index_number], ) ( after_whitespace_index, extra_whitespace_after_setext, ) = ParserHelper.extract_whitespace(position_marker.text_to_parse, collected_to_index) if after_whitespace_index == len(position_marker.text_to_parse): # This is unusual. Normally, close_open_blocks is used to close off # blocks based on the stack token. However, since the setext takes # the last paragraph of text (see case 61) and translates it # into a heading, this has to be done separately, as there is no # stack token to close. new_tokens.append( EndMarkdownToken( MarkdownToken.token_setext_heading, extracted_whitespace, extra_whitespace_after_setext, None, )) token_index = len(parser_state.token_document) - 1 while not parser_state.token_document[token_index].is_paragraph: token_index -= 1 replacement_token = SetextHeadingMarkdownToken( position_marker.text_to_parse[ position_marker.index_number], collected_to_index - position_marker.index_number, parser_state.token_document[token_index].extra_data, position_marker, parser_state.token_document[token_index], ) parser_state.token_document[token_index] = replacement_token del parser_state.token_stack[-1] return new_tokens
def is_olist_start( parser_state, line_to_parse, start_index, extracted_whitespace, skip_whitespace_check=False, adj_ws=None, ): """ Determine if we have the start of an numbered or ordered list. """ is_start = False end_whitespace_index = -1 index = None my_count = None if adj_ws is None: adj_ws = extracted_whitespace if (ParserHelper.is_length_less_than_or_equal_to(adj_ws, 3) or skip_whitespace_check ) and ParserHelper.is_character_at_index_one_of( line_to_parse, start_index, string.digits): index = start_index while ParserHelper.is_character_at_index_one_of( line_to_parse, index, string.digits): index += 1 my_count = index - start_index olist_index_number = line_to_parse[start_index:index] LOGGER.debug("olist?%s<<count>>%s<<", olist_index_number, str(my_count)) LOGGER.debug("olist>>%s", str(line_to_parse[index])) LOGGER.debug("index+1>>%s>>len>>%s", str(index + 1), str(len(line_to_parse))) end_whitespace_index, _ = ParserHelper.extract_whitespace( line_to_parse, index + 1) LOGGER.debug( "end_whitespace_index>>%s>>len>>%s>>%s", str(end_whitespace_index), str(len(line_to_parse)), olist_index_number, ) if (my_count <= 9 and ParserHelper.is_character_at_index_one_of( line_to_parse, index, ListBlockProcessor.__olist_start_characters) and not (parser_state.token_stack[-1].is_paragraph and not parser_state.token_stack[-2].is_list and ((end_whitespace_index == len(line_to_parse)) or olist_index_number != "1")) and (ParserHelper.is_character_at_index_whitespace( line_to_parse, index + 1) or ((index + 1) == len(line_to_parse)))): is_start = True LOGGER.debug("is_olist_start>>result>>%s", str(is_start)) return is_start, index, my_count, end_whitespace_index
def parse_setext_headings( parser_state: ParserState, position_marker: PositionMarker, extracted_whitespace: Optional[str], block_quote_data: BlockQuoteData, ) -> List[MarkdownToken]: """ Handle the parsing of an setext heading. """ new_tokens: List[MarkdownToken] = [] assert extracted_whitespace is not None if ( ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3) and ParserHelper.is_character_at_index_one_of( position_marker.text_to_parse, position_marker.index_number, LeafBlockProcessor.__setext_characters, ) and parser_state.token_stack[-1].is_paragraph and (block_quote_data.current_count == block_quote_data.stack_count) ): is_paragraph_continuation = ( LeafBlockProcessor.__adjust_continuation_for_active_list( parser_state, position_marker ) ) _, collected_to_index = ParserHelper.collect_while_character( position_marker.text_to_parse, position_marker.index_number, position_marker.text_to_parse[position_marker.index_number], ) assert collected_to_index is not None ( after_whitespace_index, extra_whitespace_after_setext, ) = ParserHelper.extract_whitespace( position_marker.text_to_parse, collected_to_index ) if not is_paragraph_continuation and after_whitespace_index == len( position_marker.text_to_parse ): LeafBlockProcessor.__create_setext_token( parser_state, position_marker, collected_to_index, new_tokens, extracted_whitespace, extra_whitespace_after_setext, ) return new_tokens
def is_thematic_break( line_to_parse: str, start_index: int, extracted_whitespace: Optional[str], skip_whitespace_check: bool = False, whitespace_allowed_between_characters: bool = True, ) -> Tuple[Optional[str], Optional[int]]: """ Determine whether or not we have a thematic break. """ assert extracted_whitespace is not None thematic_break_character, end_of_break_index = None, None is_thematic_character = ParserHelper.is_character_at_index_one_of( line_to_parse, start_index, LeafBlockProcessor.__thematic_break_characters ) POGGER.debug("skip_whitespace_check>>$", skip_whitespace_check) POGGER.debug("is_thematic_character>>$", is_thematic_character) if ( ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3) or skip_whitespace_check ) and is_thematic_character: start_char, index, char_count, line_to_parse_size = ( line_to_parse[start_index], start_index, 0, len(line_to_parse), ) while index < line_to_parse_size: if ( whitespace_allowed_between_characters and ParserHelper.is_character_at_index_whitespace( line_to_parse, index ) ): index += 1 elif line_to_parse[index] == start_char: index += 1 char_count += 1 else: break # pragma: no cover POGGER.debug("char_count>>$", char_count) POGGER.debug("index>>$", index) POGGER.debug("line_to_parse_size>>$", line_to_parse_size) if char_count >= 3 and index == line_to_parse_size: thematic_break_character, end_of_break_index = start_char, index return thematic_break_character, end_of_break_index
def __close_indented_block_if_indent_not_there(parser_state, extracted_whitespace): pre_tokens = [] if parser_state.token_stack[ -1 ].is_indented_code_block and ParserHelper.is_length_less_than_or_equal_to( extracted_whitespace, 3 ): pre_tokens.append(parser_state.token_stack[-1].generate_close_token()) del parser_state.token_stack[-1] pre_tokens.extend( ContainerBlockProcessor.extract_markdown_tokens_back_to_blank_line( parser_state ) ) return pre_tokens
def is_block_quote_start(line_to_parse, start_index, extracted_whitespace, adj_ws=None): """ Determine if we have the start of a block quote section. """ if adj_ws is None: adj_ws = extracted_whitespace if ParserHelper.is_length_less_than_or_equal_to( adj_ws, 3) and ParserHelper.is_character_at_index( line_to_parse, start_index, BlockQuoteProcessor.__block_quote_character): return True return False
def is_atx_heading( line_to_parse: str, start_index: int, extracted_whitespace: Optional[str], skip_whitespace_check: bool = False, ) -> Tuple[bool, Optional[int], Optional[int], Optional[str]]: """ Determine whether or not an ATX Heading is about to start. """ assert extracted_whitespace is not None if ( ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3) or skip_whitespace_check ) and ParserHelper.is_character_at_index( line_to_parse, start_index, LeafBlockProcessor.__atx_character, ): hash_count, new_index = ParserHelper.collect_while_character( line_to_parse, start_index, LeafBlockProcessor.__atx_character, ) assert new_index is not None _, non_whitespace_index = ParserHelper.collect_while_character( line_to_parse, new_index, " " ) extracted_whitespace_at_start = line_to_parse[ new_index:non_whitespace_index ] assert hash_count is not None if hash_count <= 6 and ( extracted_whitespace_at_start or non_whitespace_index == len(line_to_parse) ): return ( True, non_whitespace_index, hash_count, extracted_whitespace_at_start, ) return False, None, None, None
def is_ulist_start( parser_state, line_to_parse, start_index, extracted_whitespace, skip_whitespace_check=False, adj_ws=None, ): """ Determine if we have the start of an un-numbered list. """ LOGGER.debug("is_ulist_start>>pre>>") is_start = False after_all_whitespace_index = -1 if adj_ws is None: adj_ws = extracted_whitespace if ((ParserHelper.is_length_less_than_or_equal_to(adj_ws, 3) or skip_whitespace_check) and ParserHelper.is_character_at_index_one_of( line_to_parse, start_index, ListBlockProcessor.__ulist_start_characters) and (ParserHelper.is_character_at_index_whitespace( line_to_parse, start_index + 1) or ((start_index + 1) == len(line_to_parse)))): LOGGER.debug("is_ulist_start>>mid>>") after_all_whitespace_index, _ = ParserHelper.extract_whitespace( line_to_parse, start_index + 1) LOGGER.debug( "after_all_whitespace_index>>%s>>len>>%s", str(after_all_whitespace_index), str(len(line_to_parse)), ) is_break, _ = LeafBlockProcessor.is_thematic_break( line_to_parse, start_index, extracted_whitespace) if not is_break and not ( parser_state.token_stack[-1].is_paragraph and not parser_state.token_stack[-2].is_list and (after_all_whitespace_index == len(line_to_parse))): is_start = True LOGGER.debug("is_ulist_start>>result>>%s", str(is_start)) return is_start, after_all_whitespace_index
def __is_link_reference_definition( parser_state: ParserState, line_to_parse: str, start_index: int, extracted_whitespace: Optional[str], ) -> bool: """ Determine whether or not we have the start of a link reference definition. """ if parser_state.token_stack[-1].is_paragraph: return False assert extracted_whitespace is not None if (ParserHelper.is_length_less_than_or_equal_to( extracted_whitespace, 3)) and ParserHelper.is_character_at_index_one_of( line_to_parse, start_index, LinkReferenceDefinitionHelper.__lrd_start_character, ): remaining_line, continue_with_lrd = line_to_parse[start_index + 1:], True if (remaining_line and remaining_line[-1] == InlineHelper.backslash_character): remaining_line_size, start_index, found_index = ( len(remaining_line), 0, remaining_line.find(InlineHelper.backslash_character, start_index), ) POGGER.debug(">>$<<$", remaining_line, remaining_line_size) POGGER.debug(">>$<<$", remaining_line, start_index) POGGER.debug(">>$<<", found_index) while found_index != -1 and found_index < ( remaining_line_size - 1): start_index = found_index + 2 POGGER.debug(">>$<<$", remaining_line, start_index) found_index = remaining_line.find( InlineHelper.backslash_character, start_index) POGGER.debug(">>$<<", found_index) POGGER.debug(">>>>>>>$<<", found_index) continue_with_lrd = found_index != remaining_line_size - 1 return continue_with_lrd return False
def __is_link_reference_definition(position_marker, line_to_parse, start_index, extracted_whitespace): """ Determine whether or not we have the start of a link reference definition. """ if position_marker.token_stack[-1].is_paragraph: return False if (ParserHelper.is_length_less_than_or_equal_to( extracted_whitespace, 3)) and ParserHelper.is_character_at_index_one_of( line_to_parse, start_index, LinkReferenceDefinitionHelper.__lrd_start_character, ): return True return False
def is_fenced_code_block( line_to_parse: str, start_index: int, extracted_whitespace: Optional[str], skip_whitespace_check: bool = False, ) -> Tuple[bool, Optional[int], Optional[str], Optional[int]]: """ Determine if we have the start of a fenced code block. """ assert extracted_whitespace is not None if ( skip_whitespace_check or ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3) ) and ParserHelper.is_character_at_index_one_of( line_to_parse, start_index, LeafBlockProcessor.__fenced_code_block_start_characters, ): POGGER.debug("ifcb:collected_count>>$<<$<<", line_to_parse, start_index) collected_count, new_index = ParserHelper.collect_while_character( line_to_parse, start_index, line_to_parse[start_index] ) POGGER.debug("ifcb:collected_count:$", collected_count) assert collected_count is not None assert new_index is not None ( non_whitespace_index, extracted_whitespace_before_info_string, ) = ParserHelper.extract_whitespace(line_to_parse, new_index) if collected_count >= 3: POGGER.debug("ifcb:True") return ( True, non_whitespace_index, extracted_whitespace_before_info_string, collected_count, ) return False, None, None, None
def is_thematic_break( line_to_parse, start_index, extracted_whitespace, skip_whitespace_check=False, ): """ Determine whether or not we have a thematic break. """ thematic_break_character = None end_of_break_index = None if (ParserHelper.is_length_less_than_or_equal_to( extracted_whitespace, 3) or skip_whitespace_check ) and ParserHelper.is_character_at_index_one_of( line_to_parse, start_index, LeafBlockProcessor.__thematic_break_characters): start_char = line_to_parse[start_index] index = start_index char_count = 0 while index < len(line_to_parse): if ParserHelper.is_character_at_index_whitespace( line_to_parse, index): index += 1 elif line_to_parse[index] == start_char: index += 1 char_count += 1 else: break if char_count >= 3 and index == len(line_to_parse): thematic_break_character = start_char end_of_break_index = index return thematic_break_character, end_of_break_index
def is_fenced_code_block( line_to_parse, start_index, extracted_whitespace, skip_whitespace_check=False, ): """ Determine if we have the start of a fenced code block. """ if (ParserHelper.is_length_less_than_or_equal_to( extracted_whitespace, 3) or skip_whitespace_check ) and ParserHelper.is_character_at_index_one_of( line_to_parse, start_index, LeafBlockProcessor.__fenced_code_block_start_characters, ): LOGGER.debug("ifcb:collected_count>>%s<<%s<<", line_to_parse, str(start_index)) collected_count, new_index = ParserHelper.collect_while_character( line_to_parse, start_index, line_to_parse[start_index]) LOGGER.debug("ifcb:collected_count:%s", str(collected_count)) ( non_whitespace_index, extracted_whitespace_before_info_string, ) = ParserHelper.extract_whitespace(line_to_parse, new_index) if collected_count >= 3: LOGGER.debug("ifcb:True") return ( True, non_whitespace_index, extracted_whitespace_before_info_string, collected_count, ) return False, None, None, None
def parse_atx_headings(parser_state, position_marker, extracted_whitespace): """ Handle the parsing of an atx heading. """ new_tokens = [] if ParserHelper.is_length_less_than_or_equal_to( extracted_whitespace, 3) and ParserHelper.is_character_at_index( position_marker.text_to_parse, position_marker.index_number, LeafBlockProcessor.__atx_character, ): hash_count, new_index = ParserHelper.collect_while_character( position_marker.text_to_parse, position_marker.index_number, LeafBlockProcessor.__atx_character, ) ( non_whitespace_index, extracted_whitespace_at_start, ) = ParserHelper.extract_whitespace(position_marker.text_to_parse, new_index) if hash_count <= 6 and (extracted_whitespace_at_start or non_whitespace_index == len( position_marker.text_to_parse)): new_tokens, _, _ = parser_state.close_open_blocks_fn( parser_state, new_tokens) remaining_line = position_marker.text_to_parse[ non_whitespace_index:] ( end_index, extracted_whitespace_at_end, ) = ParserHelper.extract_whitespace_from_end(remaining_line) remove_trailing_count = 0 while (end_index > 0 and remaining_line[end_index - 1] == LeafBlockProcessor.__atx_character): end_index -= 1 remove_trailing_count += 1 extracted_whitespace_before_end = "" if remove_trailing_count: if end_index > 0: if ParserHelper.is_character_at_index_whitespace( remaining_line, end_index - 1): remaining_line = remaining_line[:end_index] ( end_index, extracted_whitespace_before_end, ) = ParserHelper.extract_whitespace_from_end( remaining_line) remaining_line = remaining_line[:end_index] else: extracted_whitespace_at_end = "" remove_trailing_count = 0 else: remaining_line = "" else: extracted_whitespace_at_end = remaining_line[end_index:] remaining_line = remaining_line[0:end_index] start_token = AtxHeadingMarkdownToken( hash_count, remove_trailing_count, extracted_whitespace, position_marker, ) new_tokens.append(start_token) new_tokens.append( TextMarkdownToken(remaining_line, extracted_whitespace_at_start)) end_token = EndMarkdownToken( "atx", extracted_whitespace_at_end, extracted_whitespace_before_end, None, ) end_token.start_markdown_token = start_token new_tokens.append(end_token) return new_tokens