def is_olist_start( parser_state, line_to_parse, start_index, extracted_whitespace, skip_whitespace_check=False, adj_ws=None, ): """ Determine if we have the start of an numbered or ordered list. """ is_start = False end_whitespace_index = -1 index = None my_count = None if adj_ws is None: adj_ws = extracted_whitespace if (ParserHelper.is_length_less_than_or_equal_to(adj_ws, 3) or skip_whitespace_check ) and ParserHelper.is_character_at_index_one_of( line_to_parse, start_index, string.digits): index = start_index while ParserHelper.is_character_at_index_one_of( line_to_parse, index, string.digits): index += 1 my_count = index - start_index olist_index_number = line_to_parse[start_index:index] LOGGER.debug("olist?%s<<count>>%s<<", olist_index_number, str(my_count)) LOGGER.debug("olist>>%s", str(line_to_parse[index])) LOGGER.debug("index+1>>%s>>len>>%s", str(index + 1), str(len(line_to_parse))) end_whitespace_index, _ = ParserHelper.extract_whitespace( line_to_parse, index + 1) LOGGER.debug( "end_whitespace_index>>%s>>len>>%s>>%s", str(end_whitespace_index), str(len(line_to_parse)), olist_index_number, ) if (my_count <= 9 and ParserHelper.is_character_at_index_one_of( line_to_parse, index, ListBlockProcessor.__olist_start_characters) and not (parser_state.token_stack[-1].is_paragraph and not parser_state.token_stack[-2].is_list and ((end_whitespace_index == len(line_to_parse)) or olist_index_number != "1")) and (ParserHelper.is_character_at_index_whitespace( line_to_parse, index + 1) or ((index + 1) == len(line_to_parse)))): is_start = True LOGGER.debug("is_olist_start>>result>>%s", str(is_start)) return is_start, index, my_count, end_whitespace_index
def is_thematic_break( line_to_parse: str, start_index: int, extracted_whitespace: Optional[str], skip_whitespace_check: bool = False, whitespace_allowed_between_characters: bool = True, ) -> Tuple[Optional[str], Optional[int]]: """ Determine whether or not we have a thematic break. """ assert extracted_whitespace is not None thematic_break_character, end_of_break_index = None, None is_thematic_character = ParserHelper.is_character_at_index_one_of( line_to_parse, start_index, LeafBlockProcessor.__thematic_break_characters ) POGGER.debug("skip_whitespace_check>>$", skip_whitespace_check) POGGER.debug("is_thematic_character>>$", is_thematic_character) if ( ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3) or skip_whitespace_check ) and is_thematic_character: start_char, index, char_count, line_to_parse_size = ( line_to_parse[start_index], start_index, 0, len(line_to_parse), ) while index < line_to_parse_size: if ( whitespace_allowed_between_characters and ParserHelper.is_character_at_index_whitespace( line_to_parse, index ) ): index += 1 elif line_to_parse[index] == start_char: index += 1 char_count += 1 else: break # pragma: no cover POGGER.debug("char_count>>$", char_count) POGGER.debug("index>>$", index) POGGER.debug("line_to_parse_size>>$", line_to_parse_size) if char_count >= 3 and index == line_to_parse_size: thematic_break_character, end_of_break_index = start_char, index return thematic_break_character, end_of_break_index
def test_is_character_at_index_whitespace_without_whitespace(): """ Make sure that a string with whitespace at the index is handled properly. """ # Arrange input_string = "a" start_index = 0 expected_output = False # Act actual_output = ParserHelper.is_character_at_index_whitespace( input_string, start_index) # Assert assert expected_output == actual_output
def test_is_character_at_index_whitespace_with_whitespace_at_end(): """ Make sure that a string with whitespace at the index is handled properly. """ # Arrange input_string = "this is a test " start_index = len(input_string) - 1 expected_output = True # Act actual_output = ParserHelper.is_character_at_index_whitespace( input_string, start_index) # Assert assert expected_output == actual_output
def test_is_character_at_index_whitespace_with_low_index(): """ Make sure that a string with a low index is handled properly. """ # Arrange input_string = "this is a test" start_index = -1 expected_output = False # Act actual_output = ParserHelper.is_character_at_index_whitespace( input_string, start_index) # Assert assert expected_output == actual_output
def is_ulist_start( parser_state, line_to_parse, start_index, extracted_whitespace, skip_whitespace_check=False, adj_ws=None, ): """ Determine if we have the start of an un-numbered list. """ LOGGER.debug("is_ulist_start>>pre>>") is_start = False after_all_whitespace_index = -1 if adj_ws is None: adj_ws = extracted_whitespace if ((ParserHelper.is_length_less_than_or_equal_to(adj_ws, 3) or skip_whitespace_check) and ParserHelper.is_character_at_index_one_of( line_to_parse, start_index, ListBlockProcessor.__ulist_start_characters) and (ParserHelper.is_character_at_index_whitespace( line_to_parse, start_index + 1) or ((start_index + 1) == len(line_to_parse)))): LOGGER.debug("is_ulist_start>>mid>>") after_all_whitespace_index, _ = ParserHelper.extract_whitespace( line_to_parse, start_index + 1) LOGGER.debug( "after_all_whitespace_index>>%s>>len>>%s", str(after_all_whitespace_index), str(len(line_to_parse)), ) is_break, _ = LeafBlockProcessor.is_thematic_break( line_to_parse, start_index, extracted_whitespace) if not is_break and not ( parser_state.token_stack[-1].is_paragraph and not parser_state.token_stack[-2].is_list and (after_all_whitespace_index == len(line_to_parse))): is_start = True LOGGER.debug("is_ulist_start>>result>>%s", str(is_start)) return is_start, after_all_whitespace_index
def __ensure_stack_at_level( parser_state, this_bq_count, stack_bq_count, extracted_whitespace, position_marker, original_start_index, ): """ Ensure that the block quote stack is at the proper level on the stack. """ container_level_tokens = [] if this_bq_count > stack_bq_count: container_level_tokens, _, _ = parser_state.close_open_blocks_fn( parser_state, only_these_blocks=[ ParagraphStackToken, IndentedCodeBlockStackToken ], ) while this_bq_count > stack_bq_count: parser_state.token_stack.append(BlockQuoteStackToken()) stack_bq_count += 1 adjusted_position_marker = PositionMarker( position_marker.line_number, original_start_index, position_marker.text_to_parse, ) container_level_tokens.append( BlockQuoteMarkdownToken(extracted_whitespace, adjusted_position_marker)) assert (position_marker.text_to_parse[original_start_index] == BlockQuoteProcessor.__block_quote_character) original_start_index += 1 if ParserHelper.is_character_at_index_whitespace( position_marker.text_to_parse, original_start_index): original_start_index += 1 return container_level_tokens, stack_bq_count
def is_thematic_break( line_to_parse, start_index, extracted_whitespace, skip_whitespace_check=False, ): """ Determine whether or not we have a thematic break. """ thematic_break_character = None end_of_break_index = None if (ParserHelper.is_length_less_than_or_equal_to( extracted_whitespace, 3) or skip_whitespace_check ) and ParserHelper.is_character_at_index_one_of( line_to_parse, start_index, LeafBlockProcessor.__thematic_break_characters): start_char = line_to_parse[start_index] index = start_index char_count = 0 while index < len(line_to_parse): if ParserHelper.is_character_at_index_whitespace( line_to_parse, index): index += 1 elif line_to_parse[index] == start_char: index += 1 char_count += 1 else: break if char_count >= 3 and index == len(line_to_parse): thematic_break_character = start_char end_of_break_index = index return thematic_break_character, end_of_break_index
def parse_atx_headings(parser_state, position_marker, extracted_whitespace): """ Handle the parsing of an atx heading. """ new_tokens = [] if ParserHelper.is_length_less_than_or_equal_to( extracted_whitespace, 3) and ParserHelper.is_character_at_index( position_marker.text_to_parse, position_marker.index_number, LeafBlockProcessor.__atx_character, ): hash_count, new_index = ParserHelper.collect_while_character( position_marker.text_to_parse, position_marker.index_number, LeafBlockProcessor.__atx_character, ) ( non_whitespace_index, extracted_whitespace_at_start, ) = ParserHelper.extract_whitespace(position_marker.text_to_parse, new_index) if hash_count <= 6 and (extracted_whitespace_at_start or non_whitespace_index == len( position_marker.text_to_parse)): new_tokens, _, _ = parser_state.close_open_blocks_fn( parser_state, new_tokens) remaining_line = position_marker.text_to_parse[ non_whitespace_index:] ( end_index, extracted_whitespace_at_end, ) = ParserHelper.extract_whitespace_from_end(remaining_line) remove_trailing_count = 0 while (end_index > 0 and remaining_line[end_index - 1] == LeafBlockProcessor.__atx_character): end_index -= 1 remove_trailing_count += 1 extracted_whitespace_before_end = "" if remove_trailing_count: if end_index > 0: if ParserHelper.is_character_at_index_whitespace( remaining_line, end_index - 1): remaining_line = remaining_line[:end_index] ( end_index, extracted_whitespace_before_end, ) = ParserHelper.extract_whitespace_from_end( remaining_line) remaining_line = remaining_line[:end_index] else: extracted_whitespace_at_end = "" remove_trailing_count = 0 else: remaining_line = "" else: extracted_whitespace_at_end = remaining_line[end_index:] remaining_line = remaining_line[0:end_index] start_token = AtxHeadingMarkdownToken( hash_count, remove_trailing_count, extracted_whitespace, position_marker, ) new_tokens.append(start_token) new_tokens.append( TextMarkdownToken(remaining_line, extracted_whitespace_at_start)) end_token = EndMarkdownToken( "atx", extracted_whitespace_at_end, extracted_whitespace_before_end, None, ) end_token.start_markdown_token = start_token new_tokens.append(end_token) return new_tokens
def __handle_link_reference_definition( self, context: PluginScanContext, token: MarkdownToken, num_container_tokens: int, ) -> None: scoped_block_quote_token = cast(BlockQuoteMarkdownToken, self.__container_tokens[-1]) assert scoped_block_quote_token.leading_spaces is not None lrd_token = cast(LinkReferenceDefinitionMarkdownToken, token) if lrd_token.extracted_whitespace: column_number_delta = -(lrd_token.column_number - len(lrd_token.extracted_whitespace)) # if self.__debug_on: # print("lrd-1-error") self.report_next_token_error( context, token, column_number_delta=column_number_delta) assert lrd_token.link_destination_whitespace is not None found_index = lrd_token.link_destination_whitespace.find( ParserHelper.newline_character) if found_index != -1 and ParserHelper.is_character_at_index_whitespace( lrd_token.link_destination_whitespace, found_index + 1): self.__report_lrd_error( lrd_token, num_container_tokens, context, token, scoped_block_quote_token, ) assert lrd_token.link_title_whitespace is not None found_index = lrd_token.link_title_whitespace.find( ParserHelper.newline_character) if found_index != -1 and ParserHelper.is_character_at_index_whitespace( lrd_token.link_title_whitespace, found_index + 1): assert lrd_token.link_name_debug is not None line_number_delta = (lrd_token.link_name_debug.count( ParserHelper.newline_character) + lrd_token.link_title_whitespace.count( ParserHelper.newline_character) + 1) split_array_index = (self.__bq_line_index[num_container_tokens] + line_number_delta) split_leading_spaces = scoped_block_quote_token.leading_spaces.split( ParserHelper.newline_character) specific_block_quote_prefix = split_leading_spaces[ split_array_index] column_number_delta = -(len(specific_block_quote_prefix) + 1) # if self.__debug_on: # print("line_number_delta>>" + str(line_number_delta)) # print("split_array_index>>" + str(split_array_index)) # print(f"end-container>>{ParserHelper.make_value_visible(self.__container_tokens[-1])}") # print(f"split_leading_spaces>>{ParserHelper.make_value_visible(split_leading_spaces)}") # print("specific_block_quote_prefix>>:" + \ # f"{ParserHelper.make_value_visible(specific_block_quote_prefix)}:") # print("lrd-3-error") self.report_next_token_error( context, token, line_number_delta=line_number_delta, column_number_delta=column_number_delta, ) assert lrd_token.link_name_debug is not None assert lrd_token.link_title_raw is not None self.__bq_line_index[num_container_tokens] += ( 1 + lrd_token.link_name_debug.count(ParserHelper.newline_character) + lrd_token.link_destination_whitespace.count( ParserHelper.newline_character) + lrd_token.link_title_whitespace.count( ParserHelper.newline_character) + lrd_token.link_title_raw.count(ParserHelper.newline_character))
def __count_block_quote_starts( line_to_parse, start_index, stack_bq_count, is_top_of_stack_fenced_code_block, ): """ Having detected a block quote character (">") on a line, continue to consume and count while the block quote pattern is there. """ this_bq_count = 0 last_block_quote_index = -1 adjusted_line = line_to_parse if stack_bq_count == 0 and is_top_of_stack_fenced_code_block: start_index -= 1 else: this_bq_count += 1 start_index += 1 last_block_quote_index = start_index LOGGER.debug( "stack_bq_count--%s--is_top_of_stack_fenced_code_block--%s", str(stack_bq_count), str(is_top_of_stack_fenced_code_block), ) while True: if ParserHelper.is_character_at_index_whitespace( adjusted_line, start_index): if adjusted_line[start_index] == "\t": adjusted_tab_length = ParserHelper.calculate_length( "\t", start_index=start_index) LOGGER.debug("adj--%s--", adjusted_line.replace("\t", "\\t")) adjusted_line = (adjusted_line[0:start_index] + "".rjust(adjusted_tab_length) + adjusted_line[start_index + 1:]) LOGGER.debug("--%s--", adjusted_line.replace("\t", "\\t")) start_index += 1 if is_top_of_stack_fenced_code_block and (this_bq_count >= stack_bq_count): break if start_index == len( adjusted_line ) or ParserHelper.is_character_at_index_not( adjusted_line, start_index, BlockQuoteProcessor.__block_quote_character, ): break this_bq_count += 1 start_index += 1 last_block_quote_index = start_index LOGGER.debug( "__count_block_quote_starts--%s--%s--", str(start_index), adjusted_line.replace("\t", "\\t"), ) return this_bq_count, start_index, adjusted_line, last_block_quote_index