def __parse_angle_link_destination(source_text, new_index): """ Parse a link destination that is included in angle brackets. """ collected_destination = "" new_index += 1 keep_collecting = True while keep_collecting: keep_collecting = False new_index, ert_new = ParserHelper.collect_until_one_of_characters( source_text, new_index, LinkHelper.__angle_link_destination_breaks) collected_destination = collected_destination + ert_new if ParserHelper.is_character_at_index( source_text, new_index, InlineHelper.backslash_character): old_new_index = new_index inline_request = InlineRequest(source_text, new_index) inline_response = InlineHelper.handle_inline_backslash( inline_request) new_index = inline_response.new_index collected_destination = (collected_destination + source_text[old_new_index:new_index]) keep_collecting = True if ParserHelper.is_character_at_index(source_text, new_index, LinkHelper.__angle_link_end): new_index += 1 else: new_index = -1 collected_destination = "" return new_index, collected_destination
def __process_inline_link_body(source_text, new_index): """ Given that an inline link has been identified, process it's body. """ LOGGER.debug("process_inline_link_body>>%s<<", source_text[new_index:]) inline_link = "" pre_inline_link = "" inline_title = "" pre_inline_title = "" new_index, _ = ParserHelper.extract_any_whitespace( source_text, new_index) LOGGER.debug("new_index>>%s>>source_text[]>>%s>", str(new_index), source_text[new_index:]) if not ParserHelper.is_character_at_index( source_text, new_index, LinkHelper.__link_format_inline_end): ( inline_link, pre_inline_link, new_index, _, ) = LinkHelper.__parse_link_destination(source_text, new_index) if new_index != -1: LOGGER.debug("before ws>>%s<", source_text[new_index:]) new_index, _ = ParserHelper.extract_any_whitespace( source_text, new_index) LOGGER.debug("after ws>>%s>", source_text[new_index:]) if ParserHelper.is_character_at_index_not( source_text, new_index, LinkHelper.__link_format_inline_end): ( inline_title, pre_inline_title, new_index, ) = LinkHelper.__parse_link_title(source_text, new_index) if new_index != -1: new_index, _ = ParserHelper.extract_any_whitespace( source_text, new_index) LOGGER.debug( "inline_link>>%s>>inline_title>>%s>new_index>%s>", str(inline_link), str(inline_title), str(new_index), ) if new_index != -1: if ParserHelper.is_character_at_index( source_text, new_index, LinkHelper.__link_format_inline_end): new_index += 1 else: new_index = -1 LOGGER.debug( "process_inline_link_body>>inline_link>>%s>>inline_title>>%s>new_index>%s>", str(inline_link), str(inline_title), str(new_index), ) return inline_link, pre_inline_link, inline_title, pre_inline_title, new_index
def extract_bounded_string(source_text, new_index, close_character, start_character): """ Extract a string that is bounded by some manner of characters. """ break_characters = InlineHelper.backslash_character + close_character if start_character: break_characters = break_characters + start_character nesting_level = 0 LOGGER.debug( "extract_bounded_string>>new_index>>%s>>data>>%s>>", str(new_index), source_text[new_index:], ) next_index, data = ParserHelper.collect_until_one_of_characters( source_text, new_index, break_characters) LOGGER.debug(">>next_index1>>%s>>data>>%s>>", str(next_index), data) while next_index < len(source_text) and not (source_text[next_index] == close_character and nesting_level == 0): if ParserHelper.is_character_at_index( source_text, next_index, InlineHelper.backslash_character): LOGGER.debug("pre-back>>next_index>>%s>>", str(next_index)) old_index = next_index inline_request = InlineRequest(source_text, next_index) inline_response = InlineHelper.handle_inline_backslash( inline_request) next_index = inline_response.new_index data = data + source_text[old_index:next_index] elif start_character is not None and ParserHelper.is_character_at_index( source_text, next_index, start_character): LOGGER.debug("pre-start>>next_index>>%s>>", str(next_index)) data = data + start_character next_index += 1 nesting_level += 1 else: assert ParserHelper.is_character_at_index( source_text, next_index, close_character) LOGGER.debug("pre-close>>next_index>>%s>>", str(next_index)) data = data + close_character next_index += 1 nesting_level -= 1 next_index, new_data = ParserHelper.collect_until_one_of_characters( source_text, next_index, break_characters) LOGGER.debug("back>>next_index>>%s>>data>>%s>>", str(next_index), data) data = data + new_data LOGGER.debug(">>next_index2>>%s>>data>>%s>>", str(next_index), data) if (ParserHelper.is_character_at_index(source_text, next_index, close_character) and nesting_level == 0): LOGGER.debug("extract_bounded_string>>found-close") return next_index + 1, data LOGGER.debug( "extract_bounded_string>>ran out of string>>next_index>>%s", str(next_index)) return next_index, None
def __parse_non_angle_link_destination(source_text, new_index): """ Parse a link destination that is not included in angle brackets. """ collected_destination = "" nesting_level = 0 keep_collecting = True while keep_collecting: LOGGER.debug( "collected_destination>>%s<<source_text<<%s>>nesting_level>>%s>>", str(collected_destination), source_text[new_index:], str(nesting_level), ) keep_collecting = False new_index, before_part = ParserHelper.collect_until_one_of_characters( source_text, new_index, LinkHelper.__non_angle_link_breaks) collected_destination = collected_destination + before_part LOGGER.debug(">>>>>>%s<<<<<", source_text[new_index:]) if ParserHelper.is_character_at_index( source_text, new_index, InlineHelper.backslash_character): LOGGER.debug("backslash") old_new_index = new_index inline_request = InlineRequest(source_text, new_index) inline_response = InlineHelper.handle_inline_backslash( inline_request) new_index = inline_response.new_index collected_destination = (collected_destination + source_text[old_new_index:new_index]) keep_collecting = True elif ParserHelper.is_character_at_index( source_text, new_index, LinkHelper.__non_angle_link_nest): LOGGER.debug("+1") nesting_level += 1 collected_destination += LinkHelper.__non_angle_link_nest new_index += 1 keep_collecting = True elif ParserHelper.is_character_at_index( source_text, new_index, LinkHelper.__non_angle_link_unnest): LOGGER.debug("-1") if nesting_level != 0: collected_destination += LinkHelper.__non_angle_link_unnest new_index += 1 nesting_level -= 1 keep_collecting = True ex_link = collected_destination LOGGER.debug("collected_destination>>%s", str(collected_destination)) if nesting_level != 0: return -1, None return new_index, ex_link
def extract_link_label(line_to_parse, new_index, include_reference_colon=True): """ Extract the link reference definition's link label. """ collected_destination = "" keep_collecting = True while keep_collecting: keep_collecting = False new_index, ert_new = ParserHelper.collect_until_one_of_characters( line_to_parse, new_index, LinkHelper.__link_label_breaks) collected_destination = collected_destination + ert_new if ParserHelper.is_character_at_index( line_to_parse, new_index, InlineHelper.backslash_character): old_new_index = new_index inline_request = InlineRequest(line_to_parse, new_index) inline_response = InlineHelper.handle_inline_backslash( inline_request) new_index = inline_response.new_index collected_destination = ( collected_destination + line_to_parse[old_new_index:new_index]) keep_collecting = True elif ParserHelper.is_character_at_index( line_to_parse, new_index, LinkHelper.link_label_start): LOGGER.debug(">> unescaped [, bailing") return False, -1, None LOGGER.debug("look for ]>>%s<<", line_to_parse[new_index:]) if not ParserHelper.is_character_at_index(line_to_parse, new_index, LinkHelper.link_label_end): LOGGER.debug(">> no end ], bailing") return False, new_index, None new_index += 1 if include_reference_colon: LOGGER.debug("look for :>>%s<<", line_to_parse[new_index:]) if not ParserHelper.is_character_at_index( line_to_parse, new_index, LinkHelper.__link_label_is_definition_character, ): LOGGER.debug(">> no :, bailing") return False, -1, None new_index += 1 return True, new_index, collected_destination
def is_html_block( line_to_parse: str, start_index: int, extracted_whitespace: Optional[str], token_stack: List[StackToken], ) -> Tuple[Optional[str], Optional[str]]: """ Determine if the current sequence of characters would start a html block element. """ assert extracted_whitespace is not None if ( ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3) ) and ParserHelper.is_character_at_index( line_to_parse, start_index, HtmlHelper.__html_block_start_character, ): ( html_block_type, remaining_html_tag, ) = HtmlHelper.__determine_html_block_type( token_stack, line_to_parse, start_index, ) else: html_block_type, remaining_html_tag = None, None return html_block_type, remaining_html_tag
def parse_html_block(parser_state, position_marker, extracted_whitespace): """ Determine if we have the criteria that we need to start an HTML block. """ new_tokens = [] if ( ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3) ) and ParserHelper.is_character_at_index( position_marker.text_to_parse, position_marker.index_number, HtmlHelper.__html_block_start_character, ): ( html_block_type, remaining_html_tag, ) = HtmlHelper.__determine_html_block_type( parser_state, position_marker.text_to_parse, position_marker.index_number, ) if html_block_type: new_tokens, _, _ = parser_state.close_open_blocks_fn( parser_state, only_these_blocks=[ParagraphStackToken], ) parser_state.token_stack.append( HtmlBlockStackToken(html_block_type, remaining_html_tag) ) new_tokens.append( HtmlBlockMarkdownToken(position_marker, extracted_whitespace) ) return new_tokens
def __parse_link_title(source_text, new_index): """ Parse an inline link's link title. """ LOGGER.debug("parse_link_title>>new_index>>%s>>", source_text[new_index:]) ex_title = "" pre_ex_title = "" if ParserHelper.is_character_at_index(source_text, new_index, LinkHelper.__link_title_single): new_index, ex_title = InlineHelper.extract_bounded_string( source_text, new_index + 1, LinkHelper.__link_title_single, None) elif ParserHelper.is_character_at_index( source_text, new_index, LinkHelper.__link_title_double): new_index, ex_title = InlineHelper.extract_bounded_string( source_text, new_index + 1, LinkHelper.__link_title_double, None) elif ParserHelper.is_character_at_index( source_text, new_index, LinkHelper.__link_title_parenthesis_open): new_index, ex_title = InlineHelper.extract_bounded_string( source_text, new_index + 1, LinkHelper.__link_title_parenthesis_close, LinkHelper.__link_title_parenthesis_open, ) else: new_index = -1 LOGGER.debug( "parse_link_title>>new_index>>%s>>ex_link>>%s>>", str(new_index), str(ex_title), ) pre_ex_title = ex_title if ex_title is not None: ex_title = InlineHelper.append_text( "", InlineHelper.handle_backslashes(ex_title, add_text_signature=False), add_text_signature=False, ) LOGGER.debug("parse_link_title>>pre>>%s>>", str(pre_ex_title)) LOGGER.debug("parse_link_title>>after>>%s>>", str(ex_title)) return ex_title, pre_ex_title, new_index
def __handle_next_extract_bounded_string_item( source_text: str, next_index: int, extracted_parts: List[str], start_character: Optional[str], nesting_level: int, close_character: str, break_characters: str, ) -> Tuple[int, int]: if ParserHelper.is_character_at_index( source_text, next_index, InlineHelper.backslash_character): POGGER.debug("pre-back>>next_index>>$>>", next_index) old_index = next_index inline_request = InlineRequest(source_text, next_index) inline_response = InlineHelper.handle_inline_backslash( inline_request) assert inline_response.new_index is not None next_index = inline_response.new_index extracted_parts.append(source_text[old_index:next_index]) elif start_character is not None and ParserHelper.is_character_at_index( source_text, next_index, start_character): POGGER.debug("pre-start>>next_index>>$>>", next_index) extracted_parts.append(start_character) next_index += 1 nesting_level += 1 else: assert ParserHelper.is_character_at_index(source_text, next_index, close_character) POGGER.debug("pre-close>>next_index>>$>>", next_index) extracted_parts.append(close_character) next_index += 1 nesting_level -= 1 nexter_index, new_data = ParserHelper.collect_until_one_of_characters( source_text, next_index, break_characters) assert new_data is not None assert nexter_index is not None extracted_parts.append(new_data) return nexter_index, nesting_level
def __parse_raw_open_tag(text_to_parse): """ Parse the current line as if it is an open tag, and determine if it is valid. """ end_parse_index = -1 valid_raw_html = None tag_name = HtmlHelper.__parse_raw_tag_name(text_to_parse, 0) if tag_name: parse_index, extracted_whitespace = ParserHelper.extract_any_whitespace( text_to_parse, len(tag_name) ) if extracted_whitespace: while ( extracted_whitespace and ParserHelper.is_character_at_index_one_of( text_to_parse, parse_index, HtmlHelper.__tag_attribute_name_start, ) ): ( parse_index, extracted_whitespace, ) = HtmlHelper.__parse_tag_attributes(text_to_parse, parse_index) if parse_index is None: return parse_index, extracted_whitespace if ParserHelper.is_character_at_index( text_to_parse, parse_index, HtmlHelper.__html_tag_start ): parse_index += 1 if ParserHelper.is_character_at_index( text_to_parse, parse_index, HtmlHelper.__html_tag_end ): valid_raw_html = text_to_parse[0:parse_index] end_parse_index = parse_index + 1 return valid_raw_html, end_parse_index
def __parse_raw_open_tag(text_to_parse: str) -> Tuple[Optional[str], int]: """ Parse the current line as if it is an open tag, and determine if it is valid. """ end_parse_index, valid_raw_html, tag_name = ( -1, None, HtmlHelper.__parse_raw_tag_name(text_to_parse, 0), ) if tag_name: parse_index, extracted_whitespace = ParserHelper.extract_any_whitespace( text_to_parse, len(tag_name) ) assert parse_index is not None while extracted_whitespace and ParserHelper.is_character_at_index_one_of( text_to_parse, parse_index, HtmlHelper.__tag_attribute_name_start, ): ( parse_index, extracted_whitespace, ) = HtmlHelper.__parse_tag_attributes(text_to_parse, parse_index) if parse_index is None: return None, -1 if ParserHelper.is_character_at_index( text_to_parse, parse_index, HtmlHelper.__html_tag_start ): parse_index += 1 if ParserHelper.is_character_at_index( text_to_parse, parse_index, HtmlHelper.__html_tag_end ): valid_raw_html = text_to_parse[:parse_index] end_parse_index = parse_index + 1 return valid_raw_html, end_parse_index
def __check_for_special_html_blocks( line_to_parse: str, character_index: int ) -> Optional[str]: """ Check for the easy to spot special blocks: 2-5. """ if character_index >= len(line_to_parse): return None html_block_type = None if ParserHelper.is_character_at_index( line_to_parse, character_index, HtmlHelper.__html_block_2_to_5_start ): if ParserHelper.are_characters_at_index( line_to_parse, character_index + 1, HtmlHelper.__html_block_2_continued_start, ): html_block_type = HtmlHelper.html_block_2 elif ParserHelper.is_character_at_index_one_of( line_to_parse, character_index + 1, HtmlHelper.__html_block_4_continued_start, ): html_block_type = HtmlHelper.html_block_4 elif ParserHelper.are_characters_at_index( line_to_parse, character_index + 1, HtmlHelper.__html_block_5_continued_start, ): html_block_type = HtmlHelper.html_block_5 elif ParserHelper.is_character_at_index( line_to_parse, character_index, HtmlHelper.__html_block_3_continued_start, ): html_block_type = HtmlHelper.html_block_3 return html_block_type
def is_block_quote_start(line_to_parse, start_index, extracted_whitespace, adj_ws=None): """ Determine if we have the start of a block quote section. """ if adj_ws is None: adj_ws = extracted_whitespace if ParserHelper.is_length_less_than_or_equal_to( adj_ws, 3) and ParserHelper.is_character_at_index( line_to_parse, start_index, BlockQuoteProcessor.__block_quote_character): return True return False
def is_atx_heading( line_to_parse: str, start_index: int, extracted_whitespace: Optional[str], skip_whitespace_check: bool = False, ) -> Tuple[bool, Optional[int], Optional[int], Optional[str]]: """ Determine whether or not an ATX Heading is about to start. """ assert extracted_whitespace is not None if ( ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3) or skip_whitespace_check ) and ParserHelper.is_character_at_index( line_to_parse, start_index, LeafBlockProcessor.__atx_character, ): hash_count, new_index = ParserHelper.collect_while_character( line_to_parse, start_index, LeafBlockProcessor.__atx_character, ) assert new_index is not None _, non_whitespace_index = ParserHelper.collect_while_character( line_to_parse, new_index, " " ) extracted_whitespace_at_start = line_to_parse[ new_index:non_whitespace_index ] assert hash_count is not None if hash_count <= 6 and ( extracted_whitespace_at_start or non_whitespace_index == len(line_to_parse) ): return ( True, non_whitespace_index, hash_count, extracted_whitespace_at_start, ) return False, None, None, None
def test_is_character_at_index_with_whitespace(): """ Make sure that a string with one of the characters present at the index is handled properly. """ # Arrange input_string = "a" start_index = 0 valid_character = "a" expected_output = True # Act actual_output = ParserHelper.is_character_at_index(input_string, start_index, valid_character) # Assert assert expected_output == actual_output
def __parse_raw_close_tag(text_to_parse): """ Parse the current line as if it is a close tag, and determine if it is valid. """ valid_raw_html = None if ParserHelper.is_character_at_index( text_to_parse, 0, HtmlHelper.__html_tag_start ): tag_name = HtmlHelper.__parse_raw_tag_name(text_to_parse, 1) if tag_name: parse_index = len(tag_name) if parse_index != len(text_to_parse): parse_index, _ = ParserHelper.extract_whitespace( text_to_parse, parse_index ) if parse_index == len(text_to_parse): valid_raw_html = text_to_parse return valid_raw_html
def test_is_character_at_index_without_whitespace(): """ Make sure that a string without any characters at the index is handled properly. """ # Arrange input_string = "this is a test" start_index = 0 valid_character = "b" expected_output = False # Act actual_output = ParserHelper.is_character_at_index(input_string, start_index, valid_character) # Assert assert expected_output == actual_output
def test_is_character_at_index_with_character_at_end(): """ Make sure that a string with one of the characters at the index is handled properly. """ # Arrange input_string = "this is a test!" start_index = len(input_string) - 1 valid_character = "!" expected_output = True # Act actual_output = ParserHelper.is_character_at_index(input_string, start_index, valid_character) # Assert assert expected_output == actual_output
def __parse_raw_close_tag(text_to_parse: str) -> Optional[str]: """ Parse the current line as if it is a close tag, and determine if it is valid. """ valid_raw_html = None if ParserHelper.is_character_at_index( text_to_parse, 0, HtmlHelper.__html_tag_start ): if tag_name := HtmlHelper.__parse_raw_tag_name(text_to_parse, 1): parse_index: Optional[int] = len(tag_name) assert parse_index is not None text_to_parse_size = len(text_to_parse) if parse_index != text_to_parse_size: parse_index, _ = ParserHelper.extract_whitespace( text_to_parse, parse_index ) if parse_index == text_to_parse_size: valid_raw_html = text_to_parse
def test_is_character_at_index_with_high_index(): """ Make sure that a string with a high index is handled properly. """ # Arrange input_string = "this is a test" start_index = len(input_string) valid_character = "a" expected_output = False # Act actual_output = ParserHelper.is_character_at_index(input_string, start_index, valid_character) # Assert assert expected_output == actual_output
def test_is_character_at_index_with_empty_string(): """ Make sure that an empty string is handled properly. """ # Arrange input_string = "" start_index = 0 valid_character = "a" expected_output = False # Act actual_output = ParserHelper.is_character_at_index(input_string, start_index, valid_character) # Assert assert expected_output == actual_output
def extract_bounded_string( source_text: str, new_index: int, close_character: str, start_character: Optional[str], ) -> Tuple[Optional[int], Optional[str]]: """ Extract a string that is bounded by some manner of characters. """ break_characters = ( f"{InlineHelper.backslash_character}{close_character}{start_character}" if start_character else f"{InlineHelper.backslash_character}{close_character}") nesting_level: int = 0 POGGER.debug( "extract_bounded_string>>new_index>>$>>data>>$>>", new_index, source_text[new_index:], ) next_index, data = ParserHelper.collect_until_one_of_characters( source_text, new_index, break_characters) assert data is not None extracted_parts: List[str] = [data] POGGER.debug( ">>next_index1>>$>>data>>$>>", next_index, data, ) assert next_index is not None while next_index < len(source_text) and not (source_text[next_index] == close_character and nesting_level == 0): ( next_index, nesting_level, ) = InlineHelper.__handle_next_extract_bounded_string_item( source_text, next_index, extracted_parts, start_character, nesting_level, close_character, break_characters, ) assert next_index is not None POGGER.debug( "back>>next_index>>$>>data>>$>>", next_index, data, ) POGGER.debug( ">>next_index2>>$>>data>>$>>", next_index, data, ) assert next_index is not None if (ParserHelper.is_character_at_index(source_text, next_index, close_character) and nesting_level == 0): POGGER.debug("extract_bounded_string>>found-close") return next_index + 1, "".join(extracted_parts) POGGER.debug( "extract_bounded_string>>ran out of string>>next_index>>$", next_index) return next_index, None
def parse_atx_headings(parser_state, position_marker, extracted_whitespace): """ Handle the parsing of an atx heading. """ new_tokens = [] if ParserHelper.is_length_less_than_or_equal_to( extracted_whitespace, 3) and ParserHelper.is_character_at_index( position_marker.text_to_parse, position_marker.index_number, LeafBlockProcessor.__atx_character, ): hash_count, new_index = ParserHelper.collect_while_character( position_marker.text_to_parse, position_marker.index_number, LeafBlockProcessor.__atx_character, ) ( non_whitespace_index, extracted_whitespace_at_start, ) = ParserHelper.extract_whitespace(position_marker.text_to_parse, new_index) if hash_count <= 6 and (extracted_whitespace_at_start or non_whitespace_index == len( position_marker.text_to_parse)): new_tokens, _, _ = parser_state.close_open_blocks_fn( parser_state, new_tokens) remaining_line = position_marker.text_to_parse[ non_whitespace_index:] ( end_index, extracted_whitespace_at_end, ) = ParserHelper.extract_whitespace_from_end(remaining_line) remove_trailing_count = 0 while (end_index > 0 and remaining_line[end_index - 1] == LeafBlockProcessor.__atx_character): end_index -= 1 remove_trailing_count += 1 extracted_whitespace_before_end = "" if remove_trailing_count: if end_index > 0: if ParserHelper.is_character_at_index_whitespace( remaining_line, end_index - 1): remaining_line = remaining_line[:end_index] ( end_index, extracted_whitespace_before_end, ) = ParserHelper.extract_whitespace_from_end( remaining_line) remaining_line = remaining_line[:end_index] else: extracted_whitespace_at_end = "" remove_trailing_count = 0 else: remaining_line = "" else: extracted_whitespace_at_end = remaining_line[end_index:] remaining_line = remaining_line[0:end_index] start_token = AtxHeadingMarkdownToken( hash_count, remove_trailing_count, extracted_whitespace, position_marker, ) new_tokens.append(start_token) new_tokens.append( TextMarkdownToken(remaining_line, extracted_whitespace_at_start)) end_token = EndMarkdownToken( "atx", extracted_whitespace_at_end, extracted_whitespace_before_end, None, ) end_token.start_markdown_token = start_token new_tokens.append(end_token) return new_tokens
def __parse_link_destination(source_text, new_index): """ Parse an inline link's link destination. """ LOGGER.debug("parse_link_destination>>new_index>>%s>>", source_text[new_index:]) start_index = new_index if ParserHelper.is_character_at_index(source_text, new_index, LinkHelper.__angle_link_start): LOGGER.debug( ">parse_angle_link_destination>new_index>%s>%s", str(new_index), str(source_text[new_index:]), ) new_index, ex_link = LinkHelper.__parse_angle_link_destination( source_text, new_index) LOGGER.debug( ">parse_angle_link_destination>new_index>%s>ex_link>%s>", str(new_index), ex_link, ) else: LOGGER.debug( ">parse_non_angle_link_destination>new_index>%s>%s", str(new_index), str(source_text[new_index:]), ) new_index, ex_link = LinkHelper.__parse_non_angle_link_destination( source_text, new_index) LOGGER.debug( ">parse_non_angle_link_destination>new_index>%s>ex_link>%s>", str(new_index), str(ex_link), ) if not ex_link: return None, None, -1, None if new_index != -1 and "\n" in ex_link: return None, None, -1, None LOGGER.debug( "handle_backslashes>>new_index>>%s>>ex_link>>%s>>", str(new_index), str(ex_link), ) pre_handle_link = ex_link if new_index != -1 and ex_link: ex_link = InlineHelper.handle_backslashes(ex_link, add_text_signature=False) LOGGER.debug( "urllib.parse.quote>>ex_link>>%s>>", str(ex_link).replace(InlineHelper.backspace_character, "\\b"), ) ex_link = LinkHelper.__encode_link_destination(ex_link) LOGGER.debug( "parse_link_destination>>new_index>>%s>>ex_link>>%s>>", str(new_index), str(ex_link), ) return ex_link, pre_handle_link, new_index, source_text[ start_index:new_index]
def __parse_tag_attributes(text_to_parse, start_index): """ Handle the parsing of the attributes for an open tag. """ parse_index, _ = ParserHelper.collect_while_one_of_characters( text_to_parse, start_index, HtmlHelper.__tag_attribute_name_characters ) end_name_index, extracted_whitespace = ParserHelper.extract_any_whitespace( text_to_parse, parse_index ) if ParserHelper.is_character_at_index( text_to_parse, end_name_index, HtmlHelper.__html_attribute_name_value_separator, ): ( value_start_index, extracted_whitespace, ) = ParserHelper.extract_any_whitespace(text_to_parse, end_name_index + 1) if ParserHelper.is_character_at_index_one_of( text_to_parse, value_start_index, HtmlHelper.__html_attribute_value_single, ): value_end_index, _ = ParserHelper.collect_until_character( text_to_parse, value_start_index + 1, HtmlHelper.__html_attribute_value_single, ) if not ParserHelper.is_character_at_index( text_to_parse, value_end_index, HtmlHelper.__html_attribute_value_single, ): return None, -1 value_end_index += 1 elif ParserHelper.is_character_at_index_one_of( text_to_parse, value_start_index, HtmlHelper.__html_attribute_value_double, ): value_end_index, _ = ParserHelper.collect_until_character( text_to_parse, value_start_index + 1, HtmlHelper.__html_attribute_value_double, ) if not ParserHelper.is_character_at_index( text_to_parse, value_end_index, HtmlHelper.__html_attribute_value_double, ): return None, -1 value_end_index += 1 else: value_end_index, _ = ParserHelper.collect_until_one_of_characters( text_to_parse, value_start_index, HtmlHelper.__unquoted_attribute_value_stop, ) end_name_index, extracted_whitespace = ParserHelper.extract_any_whitespace( text_to_parse, value_end_index ) return end_name_index, extracted_whitespace
def __look_for_link_formats(source_text, new_index, text_from_blocks): """ Look for links in the various formats. """ inline_link = "" pre_inline_link = "" inline_title = "" pre_inline_title = "" update_index = -1 ex_label = "" label_type = "" tried_full_reference_form = False if ParserHelper.is_character_at_index( source_text, new_index, LinkHelper.__link_format_inline_start): LOGGER.debug("inline reference?") ( inline_link, pre_inline_link, inline_title, pre_inline_title, update_index, ) = LinkHelper.__process_inline_link_body(source_text, new_index + 1) label_type = "inline" elif ParserHelper.is_character_at_index( source_text, new_index, LinkHelper.__link_format_reference_start): LOGGER.debug("collapsed reference?") after_open_index = new_index + 1 if ParserHelper.is_character_at_index( source_text, after_open_index, LinkHelper.__link_format_reference_end): LOGGER.debug("collapsed reference") LOGGER.debug(">>%s>>", text_from_blocks) update_index, inline_link, inline_title = LinkHelper.__look_up_link( text_from_blocks, after_open_index + 1, "collapsed reference", ) tried_full_reference_form = True label_type = "collapsed" else: LOGGER.debug("full reference?") LOGGER.debug(">>did_extract>>%s>", source_text[after_open_index:]) ( did_extract, after_label_index, ex_label, ) = LinkHelper.extract_link_label( source_text, after_open_index, include_reference_colon=False) LOGGER.debug( ">>did_extract>>%s>after_label_index>%s>ex_label>%s>", str(did_extract), str(after_label_index), str(ex_label), ) if did_extract: tried_full_reference_form = True label_type = "full" update_index, inline_link, inline_title = LinkHelper.__look_up_link( ex_label, after_label_index, "full reference") return ( inline_link, pre_inline_link, inline_title, pre_inline_title, update_index, tried_full_reference_form, ex_label, label_type, )
def __prepare_for_create_atx_heading( parser_state: ParserState, position_marker: PositionMarker, new_tokens: List[MarkdownToken], non_whitespace_index: int, ) -> Tuple[StackToken, str, int, str, str, List[MarkdownToken]]: ( old_top_of_stack, remaining_line, remove_trailing_count, extracted_whitespace_before_end, ) = ( parser_state.token_stack[-1], position_marker.text_to_parse[non_whitespace_index:], 0, "", ) new_tokens, _ = parser_state.close_open_blocks_fn(parser_state) ( end_index, extracted_whitespace_at_end, ) = ParserHelper.extract_whitespace_from_end(remaining_line) while ( end_index > 0 and remaining_line[end_index - 1] == LeafBlockProcessor.__atx_character ): end_index -= 1 remove_trailing_count += 1 if remove_trailing_count: if end_index > 0: if ParserHelper.is_character_at_index( remaining_line, end_index - 1, " " ): remaining_line = remaining_line[:end_index] ( _, new_non_whitespace_index, ) = ParserHelper.collect_backwards_while_character( remaining_line, len(remaining_line) - 1, " " ) assert new_non_whitespace_index is not None end_index = new_non_whitespace_index extracted_whitespace_before_end = remaining_line[end_index:] remaining_line = remaining_line[:end_index] else: extracted_whitespace_at_end, remove_trailing_count = "", 0 else: remaining_line = "" else: extracted_whitespace_at_end = remaining_line[end_index:] remaining_line = remaining_line[:end_index] return ( old_top_of_stack, remaining_line, remove_trailing_count, extracted_whitespace_before_end, extracted_whitespace_at_end, new_tokens, )
def __parse_tag_attributes( text_to_parse: str, start_index: int ) -> Tuple[Optional[int], Optional[str]]: """ Handle the parsing of the attributes for an open tag. """ parse_index, _ = ParserHelper.collect_while_one_of_characters( text_to_parse, start_index, HtmlHelper.__tag_attribute_name_characters ) assert parse_index is not None end_name_index, extracted_whitespace = ParserHelper.extract_any_whitespace( text_to_parse, parse_index ) assert end_name_index is not None if ParserHelper.is_character_at_index( text_to_parse, end_name_index, HtmlHelper.__html_attribute_name_value_separator, ): ( value_start_index, extracted_whitespace, ) = ParserHelper.extract_any_whitespace(text_to_parse, end_name_index + 1) assert value_start_index is not None value_end_index: Optional[int] = None if ParserHelper.is_character_at_index_one_of( text_to_parse, value_start_index, HtmlHelper.__html_attribute_value_single, ): value_end_index, _ = ParserHelper.collect_until_character( text_to_parse, value_start_index + 1, HtmlHelper.__html_attribute_value_single, ) assert value_end_index is not None if not ParserHelper.is_character_at_index( text_to_parse, value_end_index, HtmlHelper.__html_attribute_value_single, ): return None, None value_end_index += 1 elif ParserHelper.is_character_at_index_one_of( text_to_parse, value_start_index, HtmlHelper.__html_attribute_value_double, ): value_end_index, _ = ParserHelper.collect_until_character( text_to_parse, value_start_index + 1, HtmlHelper.__html_attribute_value_double, ) assert value_end_index is not None if not ParserHelper.is_character_at_index( text_to_parse, value_end_index, HtmlHelper.__html_attribute_value_double, ): return None, None value_end_index += 1 else: value_end_index, _ = ParserHelper.collect_until_one_of_characters( text_to_parse, value_start_index, HtmlHelper.__unquoted_attribute_value_stop, ) assert value_end_index is not None end_name_index, extracted_whitespace = ParserHelper.extract_any_whitespace( text_to_parse, value_end_index ) return end_name_index, extracted_whitespace