Пример #1
0
    def parse_html_block(
        parser_state: ParserState,
        position_marker: PositionMarker,
        extracted_whitespace: Optional[str],
    ) -> List[MarkdownToken]:
        """
        Determine if we have the criteria that we need to start an HTML block.
        """

        html_block_type, _ = HtmlHelper.is_html_block(
            position_marker.text_to_parse,
            position_marker.index_number,
            extracted_whitespace,
            parser_state.token_stack,
        )
        if html_block_type:
            new_tokens, _ = parser_state.close_open_blocks_fn(
                parser_state,
                only_these_blocks=[ParagraphStackToken],
            )
            assert extracted_whitespace is not None
            new_token = HtmlBlockMarkdownToken(position_marker, extracted_whitespace)
            new_tokens.append(new_token)
            parser_state.token_stack.append(
                HtmlBlockStackToken(html_block_type, new_token)
            )
        else:
            new_tokens = []
        return new_tokens
Пример #2
0
    def __handle_blank_line_token_stack(
        parser_state: ParserState,
    ) -> Tuple[Optional[List[MarkdownToken]], bool, Optional[RequeueLineInfo]]:
        is_processing_list, in_index = LeafBlockProcessor.check_for_list_in_process(
            parser_state)
        POGGER.debug(
            "hbl>>is_processing_list>>$>>in_index>>$>>last_stack>>$",
            is_processing_list,
            in_index,
            parser_state.token_stack[-1],
        )

        requeue_line_info: Optional[RequeueLineInfo] = None
        new_tokens: Optional[List[MarkdownToken]] = None
        force_default_handling: bool = parser_state.token_stack[
            -1].was_link_definition_started
        if force_default_handling:
            POGGER.debug(
                "hbl>>process_link_reference_definition>>stopping link definition"
            )
            empty_position_marker = PositionMarker(-1, 0, "")
            (
                _,
                _,
                did_pause_lrd,
                requeue_line_info,
                new_tokens,
            ) = LinkReferenceDefinitionHelper.process_link_reference_definition(
                parser_state, empty_position_marker, "", "", "", 0, 0)
            assert not did_pause_lrd
            POGGER.debug(
                "hbl<<process_link_reference_definition>>stopping link definition"
            )
        elif parser_state.token_stack[-1].is_code_block:
            if parser_state.count_of_block_quotes_on_stack():
                POGGER.debug("hbl>>code block within block quote")
            else:
                POGGER.debug("hbl>>code block")
                new_tokens = []
        elif parser_state.token_stack[-1].is_html_block:
            POGGER.debug("hbl>>check_blank_html_block_end")
            new_tokens = HtmlHelper.check_blank_html_block_end(parser_state)
            POGGER.debug("hbl<<check_blank_html_block_end")
        elif (is_processing_list
              and parser_state.token_document[-1].is_blank_line
              and parser_state.token_document[-2].is_list_start):
            POGGER.debug("hbl>>double blank in list")
            new_tokens, _ = TokenizedMarkdown.__close_open_blocks(
                parser_state, until_this_index=in_index, include_lists=True)
            POGGER.debug("hbl<<double blank in list")
        return new_tokens, force_default_handling, requeue_line_info
Пример #3
0
    def __handle_blank_line_in_block_quote(
            parser_state: ParserState,
            new_tokens: Optional[List[MarkdownToken]]) -> None:

        stack_index = parser_state.find_last_container_on_stack()
        POGGER.debug_with_visible_whitespace("new_tokens>>$", new_tokens)
        POGGER.debug("stack_index>>$", stack_index)
        if new_tokens and stack_index > 0:
            POGGER.debug_with_visible_whitespace("new_tokens[-1]>>$",
                                                 new_tokens[-1])
            assert new_tokens[-1].is_html_block_end and stack_index == (
                len(parser_state.token_stack) - 1)
            close_tokens, _ = parser_state.close_open_blocks_fn(
                parser_state,
                only_these_blocks=[BlockQuoteStackToken],
                include_block_quotes=True,
            )
            POGGER.debug("close_tokens>>$", close_tokens)
            new_tokens.extend(close_tokens)

        stack_index = parser_state.find_last_container_on_stack()
        POGGER.debug(
            "blank>>bq_start>>$",
            parser_state.token_stack[stack_index],
        )
        if stack_index > 0 and parser_state.token_stack[
                stack_index].is_block_quote:
            block_quote_token = cast(
                BlockQuoteMarkdownToken,
                parser_state.token_stack[stack_index].matching_markdown_token,
            )
            POGGER.debug("hblibq>>last_block_token>>$", block_quote_token)
            POGGER.debug("hblibq>>leading_text_index>>$",
                         block_quote_token.leading_text_index)
            block_quote_token.add_leading_spaces("")
            POGGER.debug("hblibq>>last_block_token>>$", block_quote_token)
            POGGER.debug("hblibq>>leading_text_index>>$",
                         block_quote_token.leading_text_index)
Пример #4
0
    def check_normal_html_block_end(
        parser_state: ParserState,
        line_to_parse: str,
        start_index: int,
        extracted_whitespace: str,
        position_marker: PositionMarker,
    ) -> List[MarkdownToken]:
        """
        Check to see if we have encountered the end of the current HTML block
        via text on a normal line.
        """

        new_tokens: List[MarkdownToken] = [
            TextMarkdownToken(
                line_to_parse[start_index:],
                extracted_whitespace,
                position_marker=position_marker,
            )
        ]

        is_block_terminated, adj_line = False, line_to_parse[start_index:]
        assert parser_state.token_stack[-1].is_html_block
        html_token = cast(HtmlBlockStackToken, parser_state.token_stack[-1])
        if html_token.html_block_type == HtmlHelper.html_block_1:
            for next_end_tag in HtmlHelper.__html_block_1_end_tags:
                if next_end_tag in adj_line:
                    is_block_terminated = True
        elif html_token.html_block_type == HtmlHelper.html_block_2:
            is_block_terminated = HtmlHelper.__html_block_2_end in adj_line
        elif html_token.html_block_type == HtmlHelper.html_block_3:
            is_block_terminated = HtmlHelper.__html_block_3_end in adj_line
        elif html_token.html_block_type == HtmlHelper.html_block_4:
            is_block_terminated = HtmlHelper.__html_block_4_end in adj_line
        elif html_token.html_block_type == HtmlHelper.html_block_5:
            is_block_terminated = HtmlHelper.__html_block_5_end in adj_line

        if is_block_terminated:
            terminated_block_tokens, _ = parser_state.close_open_blocks_fn(
                parser_state,
                only_these_blocks=[HtmlBlockStackToken],
            )
            assert terminated_block_tokens
            new_tokens.extend(terminated_block_tokens)
        return new_tokens
Пример #5
0
    def check_blank_html_block_end(parser_state: ParserState) -> List[MarkdownToken]:
        """
        Check to see if we have encountered the end of the current HTML block
        via an empty line or BLANK.
        """

        assert parser_state.token_stack[-1].is_html_block
        html_token = cast(HtmlBlockStackToken, parser_state.token_stack[-1])
        if html_token.html_block_type in [
            HtmlHelper.html_block_6,
            HtmlHelper.html_block_7,
        ]:
            new_tokens, _ = parser_state.close_open_blocks_fn(
                parser_state,
                only_these_blocks=[HtmlBlockStackToken],
            )
        else:
            new_tokens = []

        return new_tokens
Пример #6
0
    def __handle_blank_line(
        parser_state: ParserState,
        input_line: str,
        from_main_transform: bool,
        position_marker: Optional[PositionMarker] = None,
    ) -> Tuple[List[MarkdownToken], Optional[RequeueLineInfo]]:
        """
        Handle the processing of a blank line.
        """

        (
            close_only_these_blocks,
            do_include_block_quotes,
            non_whitespace_index,
            extracted_whitespace,
        ) = TokenizedMarkdown.__handle_blank_line_init(from_main_transform,
                                                       input_line)

        (
            new_tokens,
            force_default_handling,
            requeue_line_info,
        ) = TokenizedMarkdown.__handle_blank_line_token_stack(parser_state)

        if from_main_transform:
            POGGER.debug("hbl>>__handle_blank_line_in_block_quote")
            TokenizedMarkdown.__handle_blank_line_in_block_quote(
                parser_state, new_tokens)

        if force_default_handling or new_tokens is None:
            POGGER.debug("hbl>>default blank handling-->cob")
            n_tokens, _ = TokenizedMarkdown.__close_open_blocks(
                parser_state,
                only_these_blocks=close_only_these_blocks,
                include_block_quotes=do_include_block_quotes,
                was_forced=True,
            )
            if new_tokens:
                new_tokens.extend(n_tokens)
            else:
                new_tokens = n_tokens

        list_stack_index = parser_state.find_last_list_block_on_stack()
        block_stack_index = parser_state.find_last_block_quote_on_stack()
        POGGER.debug("list_stack_index>>$", list_stack_index)
        POGGER.debug("block_stack_index>>$", block_stack_index)
        if list_stack_index > 0 and list_stack_index > block_stack_index:
            list_token = cast(
                ListStartMarkdownToken,
                parser_state.token_stack[list_stack_index].
                matching_markdown_token,
            )
            POGGER.debug("hbl>>last_block_token>>$", list_token)
            list_token.add_leading_spaces("")
            POGGER.debug("hbl>>last_block_token>>$", list_token)

        POGGER.debug("hbl>>new_tokens>>$", new_tokens)
        assert non_whitespace_index == len(input_line)
        if not (requeue_line_info
                and requeue_line_info.force_ignore_first_as_lrd):
            new_tokens.append(
                BlankLineMarkdownToken(extracted_whitespace, position_marker))
        POGGER.debug("hbl>>new_tokens>>$", new_tokens)

        return new_tokens, requeue_line_info
Пример #7
0
    def __parse_blocks_pass(self) -> List[MarkdownToken]:
        """
        The first pass at the tokens is to deal with blocks.
        """

        assert self.__parse_properties is not None
        assert self.__source_provider is not None
        self.__token_stack = [DocumentStackToken()]
        self.__tokenized_document = []
        self.__parse_properties.pragma_lines = {}

        POGGER.debug("---")
        try:
            first_line_in_document, line_number = (
                self.__source_provider.get_next_line(),
                1,
            )
            POGGER.debug("---$---", first_line_in_document)
            (
                first_line_in_document,
                line_number,
                requeue,
            ) = self.__process_front_matter_header_if_present(
                first_line_in_document, line_number, [])
            (
                did_start_close,
                did_started_close,
                keep_on_going,
                ignore_link_definition_start,
            ) = (first_line_in_document is None, False, True, False)
            next_line_in_document = first_line_in_document
            while keep_on_going:
                POGGER.debug("next-line>>$", next_line_in_document)
                POGGER.debug("stack>>$", self.__token_stack)
                POGGER.debug("current_block>>$", self.__token_stack[-1])
                POGGER.debug("line_number>>$", line_number)
                POGGER.debug("---")

                parser_state = ParserState(
                    self.__token_stack,
                    self.__tokenized_document,
                    TokenizedMarkdown.__close_open_blocks,
                    self.__handle_blank_line,
                )
                if did_start_close:
                    (
                        did_started_close,
                        did_start_close,
                        tokens_from_line,
                        line_number,
                        keep_on_going,
                        requeue_line_info,
                    ) = self.__main_pass_did_start_close(
                        parser_state, line_number)
                else:
                    assert next_line_in_document is not None
                    position_marker = PositionMarker(line_number, 0,
                                                     next_line_in_document)
                    (
                        tokens_from_line,
                        requeue_line_info,
                    ) = self.__main_pass_did_not_start_close(
                        parser_state,
                        position_marker,
                        next_line_in_document,
                        ignore_link_definition_start,
                    )

                if keep_on_going:
                    (
                        line_number,
                        ignore_link_definition_start,
                        next_line_in_document,
                        did_start_close,
                        did_started_close,
                    ) = self.__main_pass_keep_on_going(
                        line_number,
                        requeue_line_info,
                        requeue,
                        tokens_from_line,
                        did_start_close,
                        did_started_close,
                    )
        except AssertionError as this_exception:
            error_message = f"A project assertion failed on line {line_number} of the current document."
            raise BadTokenizationError(error_message) from this_exception

        if self.__parse_properties.pragma_lines:
            self.__tokenized_document.append(
                PragmaToken(self.__parse_properties.pragma_lines))
        return self.__tokenized_document