示例#1
0
    def handle_uri_autolink(cls, output_html, next_token, transform_state):
        """
        Handle the uri autolink token.
        """
        assert transform_state
        in_tag_pretext = InlineHelper.append_text(
            "",
            next_token.autolink_text,
            alternate_escape_map=TransformToGfm.
            uri_autolink_html_character_escape_map,
            add_text_signature=False,
        )
        in_tag_text = ""
        for next_character in in_tag_pretext:
            if next_character in TransformToGfm.raw_html_percent_escape_ascii_chars:
                in_tag_text = in_tag_text + "%" + (hex(
                    ord(next_character))[2:]).upper()
            elif ord(next_character) >= 128:
                encoded_data = next_character.encode("utf8")
                for encoded_byte in encoded_data:
                    in_tag_text = in_tag_text + "%" + (
                        hex(encoded_byte)[2:]).upper()
            else:
                in_tag_text = in_tag_text + next_character

        in_anchor_text = InlineHelper.append_text("",
                                                  next_token.autolink_text,
                                                  add_text_signature=False)

        output_html = (output_html + '<a href="' + in_tag_text + '">' +
                       in_anchor_text + "</a>")
        return output_html
    def __init__(self):
        """
        Initializes a new instance of the TransformToMarkdown class.
        """
        self.block_stack = []

        # TODO do I still need this?
        resource_path = None
        if not resource_path:
            resource_path = os.path.join(
                os.path.split(__file__)[0], "../pymarkdown/resources")
        InlineHelper.initialize(resource_path)
    def __init__(self, resource_path: Optional[str] = None) -> None:
        """
        Initializes a new instance of the TokenizedMarkdown class.
        """

        self.__tokenized_document: List[MarkdownToken] = []
        self.__token_stack: List[StackToken] = []
        self.__parse_properties: Optional[ParseBlockPassProperties] = None
        self.__source_provider: Optional[SourceProvider] = None

        if not resource_path:
            resource_path = os.path.join(
                os.path.split(__file__)[0], "resources")
        InlineHelper.initialize(resource_path)
示例#4
0
    def __parse_angle_link_destination(source_text, new_index):
        """
        Parse a link destination that is included in angle brackets.
        """

        collected_destination = ""
        new_index += 1
        keep_collecting = True
        while keep_collecting:
            keep_collecting = False
            new_index, ert_new = ParserHelper.collect_until_one_of_characters(
                source_text, new_index,
                LinkHelper.__angle_link_destination_breaks)
            collected_destination = collected_destination + ert_new
            if ParserHelper.is_character_at_index(
                    source_text, new_index, InlineHelper.backslash_character):
                old_new_index = new_index
                inline_request = InlineRequest(source_text, new_index)
                inline_response = InlineHelper.handle_inline_backslash(
                    inline_request)
                new_index = inline_response.new_index
                collected_destination = (collected_destination +
                                         source_text[old_new_index:new_index])
                keep_collecting = True

        if ParserHelper.is_character_at_index(source_text, new_index,
                                              LinkHelper.__angle_link_end):
            new_index += 1
        else:
            new_index = -1
            collected_destination = ""
        return new_index, collected_destination
示例#5
0
    def __parse_link_title(source_text, new_index):
        """
        Parse an inline link's link title.
        """

        LOGGER.debug("parse_link_title>>new_index>>%s>>",
                     source_text[new_index:])
        ex_title = ""
        pre_ex_title = ""
        if ParserHelper.is_character_at_index(source_text, new_index,
                                              LinkHelper.__link_title_single):
            new_index, ex_title = InlineHelper.extract_bounded_string(
                source_text, new_index + 1, LinkHelper.__link_title_single,
                None)
        elif ParserHelper.is_character_at_index(
                source_text, new_index, LinkHelper.__link_title_double):
            new_index, ex_title = InlineHelper.extract_bounded_string(
                source_text, new_index + 1, LinkHelper.__link_title_double,
                None)
        elif ParserHelper.is_character_at_index(
                source_text, new_index,
                LinkHelper.__link_title_parenthesis_open):
            new_index, ex_title = InlineHelper.extract_bounded_string(
                source_text,
                new_index + 1,
                LinkHelper.__link_title_parenthesis_close,
                LinkHelper.__link_title_parenthesis_open,
            )
        else:
            new_index = -1
        LOGGER.debug(
            "parse_link_title>>new_index>>%s>>ex_link>>%s>>",
            str(new_index),
            str(ex_title),
        )
        pre_ex_title = ex_title
        if ex_title is not None:
            ex_title = InlineHelper.append_text(
                "",
                InlineHelper.handle_backslashes(ex_title,
                                                add_text_signature=False),
                add_text_signature=False,
            )
        LOGGER.debug("parse_link_title>>pre>>%s>>", str(pre_ex_title))
        LOGGER.debug("parse_link_title>>after>>%s>>", str(ex_title))

        return ex_title, pre_ex_title, new_index
示例#6
0
    def __handle_uri_autolink(
        cls,
        output_html: str,
        next_token: MarkdownToken,
        transform_state: TransformState,
    ) -> str:
        """
        Handle the uri autolink token.
        """
        _ = transform_state

        autolink_token = cast(UriAutolinkMarkdownToken, next_token)
        in_tag_pretext = InlineHelper.append_text(
            "",
            autolink_token.autolink_text,
            alternate_escape_map=TransformToGfm.
            uri_autolink_html_character_escape_map,
            add_text_signature=False,
        )

        tag_text_parts = []
        for next_character in in_tag_pretext:
            if next_character in TransformToGfm.raw_html_percent_escape_ascii_chars:
                tag_text_parts.extend(
                    ["%", (hex(ord(next_character))[2:]).upper()])
            elif ord(next_character) >= 128:
                encoded_data = next_character.encode("utf8")
                for encoded_byte in encoded_data:
                    tag_text_parts.extend(
                        ["%", (hex(encoded_byte)[2:]).upper()])
            else:
                tag_text_parts.append(next_character)

        return "".join([
            output_html,
            '<a href="',
            "".join(tag_text_parts),
            '">',
            InlineHelper.append_text("",
                                     autolink_token.autolink_text,
                                     add_text_signature=False),
            "</a>",
        ])
示例#7
0
    def __complete_inline_block_processing(
        inline_blocks,
        source_text,
        start_index,
        current_string,
        end_string,
        starting_whitespace,
        is_setext,
    ):
        have_processed_once = len(inline_blocks) != 0 or start_index != 0

        LOGGER.debug("__cibp>inline_blocks>%s<",
                     str(inline_blocks).replace("\n", "\\n"))
        LOGGER.debug("__cibp>source_text>%s<",
                     str(source_text).replace("\n", "\\n"))
        LOGGER.debug("__cibp>start_index>%s<",
                     str(start_index).replace("\n", "\\n"))
        LOGGER.debug("__cibp>current_string>%s<",
                     str(current_string).replace("\n", "\\n"))
        LOGGER.debug("__cibp>end_string>%s<",
                     str(end_string).replace("\n", "\\n"))
        LOGGER.debug(
            "__cibp>starting_whitespace>%s<",
            str(starting_whitespace).replace("\n", "\\n"),
        )
        LOGGER.debug("__cibp>is_setext>%s<",
                     str(is_setext).replace("\n", "\\n"))

        if (inline_blocks and inline_blocks[-1].token_name
                == MarkdownToken.token_inline_hard_break):
            start_index, extracted_whitespace = ParserHelper.extract_whitespace(
                source_text, start_index)
            if end_string is None:
                end_string = extracted_whitespace
            else:
                end_string += extracted_whitespace

        if start_index < len(source_text):
            current_string = InlineHelper.append_text(
                current_string, source_text[start_index:])

        if end_string is not None:
            LOGGER.debug("xx-end-lf>%s<", end_string.replace("\n", "\\n"))
        if current_string or not have_processed_once:
            inline_blocks.append(
                TextMarkdownToken(current_string,
                                  starting_whitespace,
                                  end_whitespace=end_string))
        LOGGER.debug(
            ">>%s<<",
            str(inline_blocks).replace("\n", "\\n").replace("\x02", "\\x02"))

        return EmphasisHelper.resolve_inline_emphasis(inline_blocks, None)
示例#8
0
    def __parse_non_angle_link_destination(source_text, new_index):
        """
        Parse a link destination that is not included in angle brackets.
        """

        collected_destination = ""
        nesting_level = 0
        keep_collecting = True
        while keep_collecting:
            LOGGER.debug(
                "collected_destination>>%s<<source_text<<%s>>nesting_level>>%s>>",
                str(collected_destination),
                source_text[new_index:],
                str(nesting_level),
            )
            keep_collecting = False
            new_index, before_part = ParserHelper.collect_until_one_of_characters(
                source_text, new_index, LinkHelper.__non_angle_link_breaks)
            collected_destination = collected_destination + before_part
            LOGGER.debug(">>>>>>%s<<<<<", source_text[new_index:])
            if ParserHelper.is_character_at_index(
                    source_text, new_index, InlineHelper.backslash_character):
                LOGGER.debug("backslash")
                old_new_index = new_index
                inline_request = InlineRequest(source_text, new_index)
                inline_response = InlineHelper.handle_inline_backslash(
                    inline_request)
                new_index = inline_response.new_index
                collected_destination = (collected_destination +
                                         source_text[old_new_index:new_index])
                keep_collecting = True
            elif ParserHelper.is_character_at_index(
                    source_text, new_index, LinkHelper.__non_angle_link_nest):
                LOGGER.debug("+1")
                nesting_level += 1
                collected_destination += LinkHelper.__non_angle_link_nest
                new_index += 1
                keep_collecting = True
            elif ParserHelper.is_character_at_index(
                    source_text, new_index,
                    LinkHelper.__non_angle_link_unnest):
                LOGGER.debug("-1")
                if nesting_level != 0:
                    collected_destination += LinkHelper.__non_angle_link_unnest
                    new_index += 1
                    nesting_level -= 1
                    keep_collecting = True
        ex_link = collected_destination
        LOGGER.debug("collected_destination>>%s", str(collected_destination))
        if nesting_level != 0:
            return -1, None
        return new_index, ex_link
示例#9
0
    def extract_link_label(line_to_parse,
                           new_index,
                           include_reference_colon=True):
        """
        Extract the link reference definition's link label.
        """
        collected_destination = ""
        keep_collecting = True
        while keep_collecting:
            keep_collecting = False
            new_index, ert_new = ParserHelper.collect_until_one_of_characters(
                line_to_parse, new_index, LinkHelper.__link_label_breaks)
            collected_destination = collected_destination + ert_new
            if ParserHelper.is_character_at_index(
                    line_to_parse, new_index,
                    InlineHelper.backslash_character):
                old_new_index = new_index
                inline_request = InlineRequest(line_to_parse, new_index)
                inline_response = InlineHelper.handle_inline_backslash(
                    inline_request)
                new_index = inline_response.new_index
                collected_destination = (
                    collected_destination +
                    line_to_parse[old_new_index:new_index])
                keep_collecting = True
            elif ParserHelper.is_character_at_index(
                    line_to_parse, new_index, LinkHelper.link_label_start):
                LOGGER.debug(">> unescaped [, bailing")
                return False, -1, None

        LOGGER.debug("look for ]>>%s<<", line_to_parse[new_index:])
        if not ParserHelper.is_character_at_index(line_to_parse, new_index,
                                                  LinkHelper.link_label_end):
            LOGGER.debug(">> no end ], bailing")
            return False, new_index, None
        new_index += 1

        if include_reference_colon:
            LOGGER.debug("look for :>>%s<<", line_to_parse[new_index:])
            if not ParserHelper.is_character_at_index(
                    line_to_parse,
                    new_index,
                    LinkHelper.__link_label_is_definition_character,
            ):
                LOGGER.debug(">> no :, bailing")
                return False, -1, None
            new_index += 1

        return True, new_index, collected_destination
示例#10
0
    def parse_inline(coalesced_results):
        """
        Parse and resolve any inline elements.
        """
        LOGGER.info("coalesced_results")
        LOGGER.info("-----")
        for next_token in coalesced_results:
            LOGGER.info(
                ">>%s<<",
                str(next_token).replace("\t", "\\t").replace("\n", "\\n"))
        LOGGER.info("-----")

        coalesced_list = []
        coalesced_list.extend(coalesced_results[0:1])
        for coalesce_index in range(1, len(coalesced_results)):
            if coalesced_results[coalesce_index].is_text and (
                    coalesced_list[-1].is_paragraph
                    or coalesced_list[-1].is_setext
                    or coalesced_list[-1].is_atx_heading
                    or coalesced_list[-1].is_code_block):
                if coalesced_list[-1].is_code_block:
                    encoded_text = InlineHelper.append_text(
                        "", coalesced_results[coalesce_index].token_text)
                    processed_tokens = [
                        TextMarkdownToken(
                            encoded_text,
                            coalesced_results[coalesce_index].
                            extracted_whitespace,
                        )
                    ]
                elif coalesced_list[-1].is_setext:
                    combined_test = coalesced_results[
                        coalesce_index].token_text
                    LOGGER.debug("combined_text>>%s",
                                 combined_test.replace("\n", "\\n"))
                    processed_tokens = InlineProcessor.__process_inline_text_block(
                        coalesced_results[coalesce_index].token_text.replace(
                            "\t", "    "),
                        whitespace_to_recombine=coalesced_results[
                            coalesce_index].extracted_whitespace.replace(
                                "\t", "    "),
                        is_setext=True,
                    )
                    LOGGER.debug(
                        "processed_tokens>>%s",
                        str(processed_tokens).replace("\n", "\\n"),
                    )
                elif coalesced_list[-1].is_atx_heading:
                    processed_tokens = InlineProcessor.__process_inline_text_block(
                        coalesced_results[coalesce_index].token_text.replace(
                            "\t", "    "),
                        coalesced_results[coalesce_index].extracted_whitespace.
                        replace("\t", "    "),
                    )
                else:
                    assert coalesced_list[-1].is_paragraph
                    LOGGER.debug(
                        ">>before_add_ws>>%s>>add>>%s>>",
                        str(coalesced_list[-1]),
                        str(coalesced_results[coalesce_index].
                            extracted_whitespace),
                    )
                    coalesced_list[-1].add_whitespace(
                        coalesced_results[coalesce_index].extracted_whitespace.
                        replace("\t", "    "))
                    LOGGER.debug(">>after_add_ws>>%s", str(coalesced_list[-1]))
                    processed_tokens = InlineProcessor.__process_inline_text_block(
                        coalesced_results[coalesce_index].token_text.replace(
                            "\t", "    "), )
                coalesced_list.extend(processed_tokens)
            else:
                coalesced_list.append(coalesced_results[coalesce_index])
        return coalesced_list
示例#11
0
    def __complete_inline_loop(
        source_text,
        new_index,
        end_string,
        whitespace_to_add,
        current_string,
        current_string_unresolved,
        new_string_unresolved,
        new_string,
        original_string,
    ):
        LOGGER.debug(
            "__complete_inline_loop--current_string>>%s>>",
            current_string.replace("\n", "\\n"),
        )
        LOGGER.debug("__complete_inline_loop--new_string>>%s>>",
                     new_string.replace("\n", "\\n"))
        LOGGER.debug(
            "__complete_inline_loop--new_string_unresolved>>%s>>",
            str(new_string_unresolved).replace("\n", "\\n"),
        )
        LOGGER.debug(
            "__complete_inline_loop--original_string>>%s>>",
            str(original_string).replace("\b", "\\b").replace("\a",
                                                              "\\a").replace(
                                                                  "\n", "\\n"),
        )

        if original_string is not None:
            assert not new_string_unresolved or new_string_unresolved == original_string
            current_string += "\a" + original_string + "\a"

        LOGGER.debug(
            "__complete_inline_loop--current_string>>%s>>",
            str(current_string).replace("\b", "\\b").replace("\a",
                                                             "\\a").replace(
                                                                 "\n", "\\n"),
        )
        current_string = InlineHelper.append_text(current_string, new_string)
        LOGGER.debug(
            "__complete_inline_loop--current_string>>%s>>",
            str(current_string).replace("\b", "\\b").replace("\a",
                                                             "\\a").replace(
                                                                 "\n", "\\n"),
        )

        if original_string is not None:
            current_string += "\a"

        LOGGER.debug(
            "__complete_inline_loop--current_string>>%s>>",
            str(current_string).replace("\b", "\\b").replace("\a",
                                                             "\\a").replace(
                                                                 "\n", "\\n"),
        )
        if new_string_unresolved:
            current_string_unresolved += new_string_unresolved
        else:
            current_string_unresolved = InlineHelper.append_text(
                current_string_unresolved, new_string)

        LOGGER.debug(
            "__complete_inline_loop--current_string_unresolved>>%s>>",
            str(current_string_unresolved).replace("\b", "\\b").replace(
                "\a", "\\a").replace("\n", "\\n"),
        )

        if whitespace_to_add is not None:
            end_string = InlineHelper.modify_end_string(
                end_string, whitespace_to_add)

        start_index = new_index
        next_index = ParserHelper.index_any_of(
            source_text,
            InlineProcessor.__valid_inline_text_block_sequence_starts,
            start_index,
        )
        return (
            start_index,
            next_index,
            end_string,
            current_string,
            current_string_unresolved,
        )
示例#12
0
    def __process_inline_text_block(
        source_text,
        starting_whitespace="",
        whitespace_to_recombine=None,
        is_setext=False,
    ):
        """
        Process a text block for any inline items.
        """

        inline_blocks = []
        start_index = 0
        if whitespace_to_recombine and " " in whitespace_to_recombine:
            source_text = InlineProcessor.__recombine_with_whitespace(
                source_text, whitespace_to_recombine)
        else:
            whitespace_to_recombine = None

        current_string = ""
        current_string_unresolved = ""
        end_string = ""

        inline_response = InlineResponse()

        next_index = ParserHelper.index_any_of(
            source_text,
            InlineProcessor.__valid_inline_text_block_sequence_starts,
            start_index,
        )
        LOGGER.debug("__process_inline_text_block>>is_setext>>%s",
                     str(is_setext))
        LOGGER.debug(
            "__process_inline_text_block>>%s>>%s",
            source_text.replace("\n", "\\n"),
            str(start_index),
        )
        while next_index != -1:

            inline_response.clear_fields()
            reset_current_string = False
            whitespace_to_add = None

            LOGGER.debug("__process_inline_text_block>>%s>>%s",
                         str(start_index), str(next_index))
            remaining_line = source_text[start_index:next_index]

            inline_request = InlineRequest(
                source_text,
                next_index,
                inline_blocks,
                remaining_line,
                current_string_unresolved,
            )
            if source_text[
                    next_index] in InlineProcessor.__inline_character_handlers:
                LOGGER.debug("handler(before)>>%s<<", source_text[next_index])
                proc_fn = InlineProcessor.__inline_character_handlers[
                    source_text[next_index]]
                inline_response = proc_fn(inline_request)
                LOGGER.debug("handler(after)>>%s<<", source_text[next_index])
            else:
                assert source_text[next_index] == "\n"
                LOGGER.debug(
                    "end_string(before)>>%s<<",
                    str(end_string).replace("\n",
                                            "\\n").replace("\x02", "\\x02"),
                )
                (
                    inline_response.new_string,
                    whitespace_to_add,
                    inline_response.new_index,
                    inline_response.new_tokens,
                    remaining_line,
                    end_string,
                    current_string,
                ) = InlineHelper.handle_line_end(next_index, remaining_line,
                                                 end_string, current_string)
                LOGGER.debug(
                    "handle_line_end>>new_tokens>>%s<<",
                    str(inline_response.new_tokens).replace(
                        "\n", "\\n").replace("\x02", "\\x02"),
                )
                if not inline_response.new_tokens:
                    end_string = InlineProcessor.__add_recombined_whitespace(
                        bool(whitespace_to_recombine),
                        source_text,
                        inline_response,
                        end_string,
                        is_setext,
                    )
                LOGGER.debug(
                    "handle_line_end>>%s<<",
                    source_text[inline_response.new_index:].replace(
                        "\n", "\\n").replace("\x02", "\\x02"),
                )
                LOGGER.debug(
                    "end_string(after)>>%s<<",
                    str(end_string).replace("\n",
                                            "\\n").replace("\x02", "\\x02"),
                )

            LOGGER.debug(
                "new_string-->%s<--",
                str(inline_response.new_string).replace("\n", "\\n"),
            )
            LOGGER.debug("new_index-->%s<--", str(inline_response.new_index))
            LOGGER.debug(
                "new_tokens-->%s<--",
                str(inline_response.new_tokens).replace("\n", "\\n"),
            )
            LOGGER.debug(
                "new_string_unresolved-->%s<--",
                str(inline_response.new_string_unresolved).replace(
                    "\n", "\\n"),
            )
            LOGGER.debug(
                "consume_rest_of_line-->%s<--",
                str(inline_response.consume_rest_of_line),
            )
            LOGGER.debug(
                "original_string-->%s<--",
                str(inline_response.original_string).replace("\n", "\\n"),
            )

            if inline_response.consume_rest_of_line:
                inline_response.new_string = ""
                reset_current_string = True
                inline_response.new_tokens = None
            else:
                current_string = InlineHelper.append_text(
                    current_string, remaining_line)
                current_string_unresolved = InlineHelper.append_text(
                    current_string_unresolved, remaining_line)

            LOGGER.debug(
                "current_string>>%s<<",
                str(current_string).replace("\n",
                                            "\\n").replace("\x02", "\\x02"),
            )
            LOGGER.debug(
                "current_string_unresolved>>%s<<",
                str(current_string_unresolved).replace("\n", "\\n").replace(
                    "\x02", "\\x02"),
            )
            if inline_response.new_tokens:
                if current_string:
                    # assert end_string is None
                    inline_blocks.append(
                        TextMarkdownToken(
                            current_string,
                            starting_whitespace,
                            end_whitespace=end_string,
                        ))
                    reset_current_string = True
                    starting_whitespace = ""
                    end_string = None

                inline_blocks.extend(inline_response.new_tokens)

            if reset_current_string:
                current_string = ""
                current_string_unresolved = ""

            (
                start_index,
                next_index,
                end_string,
                current_string,
                current_string_unresolved,
            ) = InlineProcessor.__complete_inline_loop(
                source_text,
                inline_response.new_index,
                end_string,
                whitespace_to_add,
                current_string,
                current_string_unresolved,
                inline_response.new_string_unresolved,
                inline_response.new_string,
                inline_response.original_string,
            )
            LOGGER.debug(
                "<<current_string<<%s<<%s<<",
                str(len(current_string)),
                current_string.replace("\b", "\\b").replace("\a",
                                                            "\\a").replace(
                                                                "\n", "\\n"),
            )
            LOGGER.debug(
                "<<current_string_unresolved<<%s<<%s<<",
                str(len(current_string_unresolved)),
                current_string_unresolved.replace("\b", "\\b").replace(
                    "\a", "\\a").replace("\n", "\\n"),
            )

        LOGGER.debug("<<__complete_inline_block_processing<<")
        return InlineProcessor.__complete_inline_block_processing(
            inline_blocks,
            source_text,
            start_index,
            current_string,
            end_string,
            starting_whitespace,
            is_setext,
        )
示例#13
0
    def __process_fenced_start(
        parser_state: ParserState,
        position_marker: PositionMarker,
        non_whitespace_index: int,
        collected_count: int,
        extracted_whitespace: Optional[str],
        extracted_whitespace_before_info_string: Optional[str],
    ) -> List[MarkdownToken]:

        POGGER.debug("pfcb->check")
        new_tokens: List[MarkdownToken] = []
        if (
            position_marker.text_to_parse[position_marker.index_number]
            == LeafBlockProcessor.__fenced_start_tilde
            or LeafBlockProcessor.__fenced_start_backtick
            not in position_marker.text_to_parse[non_whitespace_index:]
        ):
            POGGER.debug("pfcb->start")
            (
                after_extracted_text_index,
                extracted_text,
            ) = ParserHelper.extract_until_whitespace(
                position_marker.text_to_parse, non_whitespace_index
            )
            assert extracted_text is not None
            text_after_extracted_text = position_marker.text_to_parse[
                after_extracted_text_index:
            ]

            old_top_of_stack = parser_state.token_stack[-1]
            new_tokens, _ = parser_state.close_open_blocks_fn(
                parser_state,
                only_these_blocks=[ParagraphStackToken],
            )

            pre_extracted_text, pre_text_after_extracted_text = (
                extracted_text,
                text_after_extracted_text,
            )

            assert extracted_text is not None
            extracted_text = InlineHelper.handle_backslashes(extracted_text)
            text_after_extracted_text = InlineHelper.handle_backslashes(
                text_after_extracted_text
            )

            if pre_extracted_text == extracted_text:
                pre_extracted_text = ""
            if pre_text_after_extracted_text == text_after_extracted_text:
                pre_text_after_extracted_text = ""

            assert extracted_whitespace is not None
            assert extracted_whitespace_before_info_string is not None
            new_token = FencedCodeBlockMarkdownToken(
                position_marker.text_to_parse[position_marker.index_number],
                collected_count,
                extracted_text,
                pre_extracted_text,
                text_after_extracted_text,
                pre_text_after_extracted_text,
                extracted_whitespace,
                extracted_whitespace_before_info_string,
                position_marker,
            )
            new_tokens.append(new_token)
            assert extracted_whitespace is not None
            parser_state.token_stack.append(
                FencedCodeBlockStackToken(
                    code_fence_character=position_marker.text_to_parse[
                        position_marker.index_number
                    ],
                    fence_character_count=collected_count,
                    whitespace_start_count=ParserHelper.calculate_length(
                        extracted_whitespace
                    ),
                    matching_markdown_token=new_token,
                )
            )
            POGGER.debug("StackToken-->$<<", parser_state.token_stack[-1])
            POGGER.debug(
                "StackToken>start_markdown_token-->$<<",
                parser_state.token_stack[-1].matching_markdown_token,
            )

            LeafBlockProcessor.correct_for_leaf_block_start_in_list(
                parser_state,
                position_marker.index_indent,
                old_top_of_stack,
                new_tokens,
            )
        return new_tokens
示例#14
0
    def parse_fenced_code_block(
        parser_state,
        position_marker,
        extracted_whitespace,
    ):
        """
        Handle the parsing of a fenced code block
        """

        LOGGER.debug(
            "line>>%s>>index>>%s>>",
            position_marker.text_to_parse,
            position_marker.index_number,
        )
        new_tokens = []
        (
            is_fence_start,
            non_whitespace_index,
            extracted_whitespace_before_info_string,
            collected_count,
        ) = LeafBlockProcessor.is_fenced_code_block(
            position_marker.text_to_parse,
            position_marker.index_number,
            extracted_whitespace,
        )
        if is_fence_start and not parser_state.token_stack[-1].is_html_block:
            if parser_state.token_stack[-1].is_fenced_code_block:
                LOGGER.debug("pfcb->end")

                if (parser_state.token_stack[-1].code_fence_character
                        == position_marker.text_to_parse[
                            position_marker.index_number] and collected_count
                        >= parser_state.token_stack[-1].fence_character_count
                        and non_whitespace_index >= len(
                            position_marker.text_to_parse)):
                    new_end_token = parser_state.token_stack[
                        -1].generate_close_token(extracted_whitespace)
                    new_tokens.append(new_end_token)
                    new_end_token.start_markdown_token = parser_state.token_stack[
                        -1].start_markdown_token
                    new_end_token.extra_end_data = str(collected_count)
                    new_end_token.compose_data_field()
                    del parser_state.token_stack[-1]
            else:
                LOGGER.debug("pfcb->check")
                if (position_marker.text_to_parse[position_marker.index_number]
                        == LeafBlockProcessor.__fenced_start_tilde
                        or LeafBlockProcessor.__fenced_start_backtick not in
                        position_marker.text_to_parse[non_whitespace_index:]):
                    LOGGER.debug("pfcb->start")
                    (
                        after_extracted_text_index,
                        extracted_text,
                    ) = ParserHelper.extract_until_whitespace(
                        position_marker.text_to_parse, non_whitespace_index)
                    text_after_extracted_text = position_marker.text_to_parse[
                        after_extracted_text_index:]

                    new_tokens, _, _ = parser_state.close_open_blocks_fn(
                        parser_state,
                        only_these_blocks=[ParagraphStackToken],
                    )

                    pre_extracted_text = extracted_text
                    pre_text_after_extracted_text = text_after_extracted_text

                    extracted_text = InlineHelper.handle_backslashes(
                        extracted_text, add_text_signature=False)
                    text_after_extracted_text = InlineHelper.handle_backslashes(
                        text_after_extracted_text, add_text_signature=False)

                    if pre_extracted_text == extracted_text:
                        pre_extracted_text = ""
                    if pre_text_after_extracted_text == text_after_extracted_text:
                        pre_text_after_extracted_text = ""

                    new_token = FencedCodeBlockMarkdownToken(
                        position_marker.text_to_parse[
                            position_marker.index_number],
                        collected_count,
                        extracted_text,
                        pre_extracted_text,
                        text_after_extracted_text,
                        pre_text_after_extracted_text,
                        extracted_whitespace,
                        extracted_whitespace_before_info_string,
                        position_marker,
                    )
                    new_tokens.append(new_token)
                    parser_state.token_stack.append(
                        FencedCodeBlockStackToken(
                            code_fence_character=position_marker.text_to_parse[
                                position_marker.index_number],
                            fence_character_count=collected_count,
                            whitespace_start_count=ParserHelper.
                            calculate_length(extracted_whitespace),
                            start_markdown_token=new_token,
                        ))
                    LOGGER.debug("StackToken-->%s<<",
                                 str(parser_state.token_stack[-1]))
                    LOGGER.debug(
                        "StackToken>start_markdown_token-->%s<<",
                        str(parser_state.token_stack[-1].start_markdown_token),
                    )
        elif (parser_state.token_stack[-1].is_fenced_code_block
              and parser_state.token_stack[-1].whitespace_start_count
              and extracted_whitespace):

            current_whitespace_length = ParserHelper.calculate_length(
                extracted_whitespace)
            whitespace_left = max(
                0,
                current_whitespace_length -
                parser_state.token_stack[-1].whitespace_start_count,
            )
            LOGGER.debug("previous_ws>>%s", str(current_whitespace_length))
            LOGGER.debug("whitespace_left>>%s", str(whitespace_left))
            removed_whitespace = ("\a" + "".rjust(
                current_whitespace_length - whitespace_left, " ") + "\a\x03\a")
            extracted_whitespace = removed_whitespace + "".rjust(
                whitespace_left, " ")
        return new_tokens, extracted_whitespace
示例#15
0
    def __parse_link_destination(source_text, new_index):
        """
        Parse an inline link's link destination.
        """

        LOGGER.debug("parse_link_destination>>new_index>>%s>>",
                     source_text[new_index:])
        start_index = new_index
        if ParserHelper.is_character_at_index(source_text, new_index,
                                              LinkHelper.__angle_link_start):
            LOGGER.debug(
                ">parse_angle_link_destination>new_index>%s>%s",
                str(new_index),
                str(source_text[new_index:]),
            )
            new_index, ex_link = LinkHelper.__parse_angle_link_destination(
                source_text, new_index)
            LOGGER.debug(
                ">parse_angle_link_destination>new_index>%s>ex_link>%s>",
                str(new_index),
                ex_link,
            )
        else:
            LOGGER.debug(
                ">parse_non_angle_link_destination>new_index>%s>%s",
                str(new_index),
                str(source_text[new_index:]),
            )
            new_index, ex_link = LinkHelper.__parse_non_angle_link_destination(
                source_text, new_index)
            LOGGER.debug(
                ">parse_non_angle_link_destination>new_index>%s>ex_link>%s>",
                str(new_index),
                str(ex_link),
            )
            if not ex_link:
                return None, None, -1, None

        if new_index != -1 and "\n" in ex_link:
            return None, None, -1, None
        LOGGER.debug(
            "handle_backslashes>>new_index>>%s>>ex_link>>%s>>",
            str(new_index),
            str(ex_link),
        )

        pre_handle_link = ex_link
        if new_index != -1 and ex_link:
            ex_link = InlineHelper.handle_backslashes(ex_link,
                                                      add_text_signature=False)
        LOGGER.debug(
            "urllib.parse.quote>>ex_link>>%s>>",
            str(ex_link).replace(InlineHelper.backspace_character, "\\b"),
        )

        ex_link = LinkHelper.__encode_link_destination(ex_link)
        LOGGER.debug(
            "parse_link_destination>>new_index>>%s>>ex_link>>%s>>",
            str(new_index),
            str(ex_link),
        )
        return ex_link, pre_handle_link, new_index, source_text[
            start_index:new_index]