def is_html_block( line_to_parse: str, start_index: int, extracted_whitespace: Optional[str], token_stack: List[StackToken], ) -> Tuple[Optional[str], Optional[str]]: """ Determine if the current sequence of characters would start a html block element. """ assert extracted_whitespace is not None if ( ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3) ) and ParserHelper.is_character_at_index( line_to_parse, start_index, HtmlHelper.__html_block_start_character, ): ( html_block_type, remaining_html_tag, ) = HtmlHelper.__determine_html_block_type( token_stack, line_to_parse, start_index, ) else: html_block_type, remaining_html_tag = None, None return html_block_type, remaining_html_tag
def parse_html_block(parser_state, position_marker, extracted_whitespace): """ Determine if we have the criteria that we need to start an HTML block. """ new_tokens = [] if ( ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3) ) and ParserHelper.is_character_at_index( position_marker.text_to_parse, position_marker.index_number, HtmlHelper.__html_block_start_character, ): ( html_block_type, remaining_html_tag, ) = HtmlHelper.__determine_html_block_type( parser_state, position_marker.text_to_parse, position_marker.index_number, ) if html_block_type: new_tokens, _, _ = parser_state.close_open_blocks_fn( parser_state, only_these_blocks=[ParagraphStackToken], ) parser_state.token_stack.append( HtmlBlockStackToken(html_block_type, remaining_html_tag) ) new_tokens.append( HtmlBlockMarkdownToken(position_marker, extracted_whitespace) ) return new_tokens
def append_text( string_to_append_to: str, text_to_append: str, alternate_escape_map: Optional[Dict[str, str]] = None, add_text_signature: bool = True, ) -> str: """ Append the text to the given string, doing any needed encoding as we go. """ if not alternate_escape_map: alternate_escape_map = InlineHelper.__html_character_escape_map key_map = "".join(alternate_escape_map.keys()) start_index, text_parts = 0, [string_to_append_to] next_index = ParserHelper.index_any_of(text_to_append, key_map, start_index) while next_index != -1: escaped_part = alternate_escape_map[text_to_append[next_index]] text_parts.extend([ text_to_append[start_index:next_index], ParserHelper.create_replacement_markers( text_to_append[next_index], escaped_part) if add_text_signature else escaped_part, ]) start_index = next_index + 1 next_index = ParserHelper.index_any_of(text_to_append, key_map, start_index) if start_index < len(text_to_append): text_parts.append(text_to_append[start_index:]) return "".join(text_parts)
def __munge(cls, show_whitespace: bool, log_format: str, args: List[Any]) -> str: split_log_format = log_format.split("$") split_log_format_length = len(split_log_format) args_length = len(args) if split_log_format_length != args_length + 1: raise Exception( "The number of $ substitution characters does not equal the number of arguments in the list." ) recipient_array: List[str] = [""] * (split_log_format_length + args_length) for next_array_index, _ in enumerate(recipient_array): if next_array_index % 2 == 0: recipient_array[next_array_index] = split_log_format[ int(next_array_index / 2) ] elif show_whitespace: recipient_array[ next_array_index ] = ParserHelper.make_whitespace_visible( args[int(next_array_index / 2)] ) else: recipient_array[next_array_index] = ParserHelper.make_value_visible( args[int(next_array_index / 2)] ) return "".join(recipient_array)
def __parse_angle_link_destination(source_text, new_index): """ Parse a link destination that is included in angle brackets. """ collected_destination = "" new_index += 1 keep_collecting = True while keep_collecting: keep_collecting = False new_index, ert_new = ParserHelper.collect_until_one_of_characters( source_text, new_index, LinkHelper.__angle_link_destination_breaks) collected_destination = collected_destination + ert_new if ParserHelper.is_character_at_index( source_text, new_index, InlineHelper.backslash_character): old_new_index = new_index inline_request = InlineRequest(source_text, new_index) inline_response = InlineHelper.handle_inline_backslash( inline_request) new_index = inline_response.new_index collected_destination = (collected_destination + source_text[old_new_index:new_index]) keep_collecting = True if ParserHelper.is_character_at_index(source_text, new_index, LinkHelper.__angle_link_end): new_index += 1 else: new_index = -1 collected_destination = "" return new_index, collected_destination
def __encode_link_destination(link_to_encode): encoded_link = "" percent_index, before_data = ParserHelper.collect_until_one_of_characters( link_to_encode, 0, LinkHelper.__special_link_destination_characters) encoded_link += urllib.parse.quote( before_data, safe=LinkHelper.__link_safe_characters) while percent_index < len(link_to_encode): special_character = link_to_encode[percent_index] percent_index += 1 if special_character == "%": hex_guess_characters = link_to_encode[ percent_index:percent_index + 2] if len(hex_guess_characters) == 2: try: int(hex_guess_characters, 16) encoded_link += "%" + hex_guess_characters percent_index += 2 except ValueError: encoded_link += "%25" else: encoded_link += "%25" else: assert special_character == "&" encoded_link += "&" percent_index, before_data = ParserHelper.collect_until_one_of_characters( link_to_encode, percent_index, LinkHelper.__special_link_destination_characters, ) encoded_link += urllib.parse.quote( before_data, safe=LinkHelper.__link_safe_characters) return encoded_link
def extract_optional_attribute_value(line_to_parse, value_index): """ Determine and extract an optional attribute value. """ non_whitespace_index, _ = ParserHelper.extract_whitespace( line_to_parse, value_index ) if ( non_whitespace_index < len(line_to_parse) and line_to_parse[non_whitespace_index] != HtmlHelper.__html_attribute_name_value_separator ) or non_whitespace_index >= len(line_to_parse): return non_whitespace_index non_whitespace_index += 1 non_whitespace_index, _ = ParserHelper.extract_whitespace( line_to_parse, non_whitespace_index ) if non_whitespace_index < len(line_to_parse): first_character_of_value = line_to_parse[non_whitespace_index] if first_character_of_value == HtmlHelper.__html_attribute_value_double: ( non_whitespace_index, extracted_text, ) = ParserHelper.collect_until_character( line_to_parse, non_whitespace_index + 1, HtmlHelper.__html_attribute_value_double, ) if non_whitespace_index == len(line_to_parse): return -1 non_whitespace_index += 1 elif first_character_of_value == HtmlHelper.__html_attribute_value_single: ( non_whitespace_index, extracted_text, ) = ParserHelper.collect_until_character( line_to_parse, non_whitespace_index + 1, HtmlHelper.__html_attribute_value_single, ) if non_whitespace_index == len(line_to_parse): return -1 non_whitespace_index += 1 else: ( non_whitespace_index, extracted_text, ) = ParserHelper.collect_until_one_of_characters( line_to_parse, non_whitespace_index, HtmlHelper.__html_tag_attribute_value_terminators, ) if not extracted_text: non_whitespace_index = -1 else: non_whitespace_index = -1 return non_whitespace_index
def __handle_text_token( cls, output_html: str, next_token: MarkdownToken, transform_state: TransformState, ) -> str: """ Handle the text token. """ text_token = cast(TextMarkdownToken, next_token) adjusted_text_token = ParserHelper.resolve_all_from_text( text_token.token_text) token_parts = [] if transform_state.is_in_code_block: token_parts.extend([ ParserHelper.resolve_all_from_text( text_token.extracted_whitespace), adjusted_text_token, ]) elif transform_state.is_in_html_block: token_parts.extend([ text_token.extracted_whitespace, adjusted_text_token, ParserHelper.newline_character, ]) else: token_parts.append(adjusted_text_token) token_parts.insert(0, output_html) return "".join(token_parts)
def __handle_numeric_character_reference(source_text, new_index): """ Handle a character reference that is numeric in nature. """ original_reference = None new_index += 1 translated_reference = -1 if new_index < len(source_text) and ( source_text[new_index] in InlineHelper.__hex_character_reference_start_character): hex_char = source_text[new_index] new_index += 1 end_index, collected_string = ParserHelper.collect_while_one_of_characters( source_text, new_index, string.hexdigits) LOGGER.debug( "&#x>>a>>%s>>b>>%s>>%s", str(end_index), str(collected_string), str(len(source_text)), ) delta = end_index - new_index LOGGER.debug("delta>>%s>>", str(delta)) if 1 <= delta <= 6: translated_reference = int(collected_string, 16) new_string = ( InlineHelper.character_reference_start_character + InlineHelper.__numeric_character_reference_start_character + hex_char + collected_string) new_index = end_index else: end_index, collected_string = ParserHelper.collect_while_one_of_characters( source_text, new_index, string.digits) LOGGER.debug( "&#>>a>>%s>>b>>%s>>%s", str(end_index), str(collected_string), str(len(source_text)), ) delta = end_index - new_index LOGGER.debug("delta>>%s>>", str(delta)) if 1 <= delta <= 7: translated_reference = int(collected_string) new_string = ( InlineHelper.character_reference_start_character + InlineHelper.__numeric_character_reference_start_character + collected_string) new_index = end_index if (translated_reference >= 0 and new_index < len(source_text) and source_text[new_index] == InlineHelper.__character_reference_end_character): new_index += 1 original_reference = new_string + ";" if translated_reference == 0: new_string = InlineHelper.__invalid_reference_character_substitute else: new_string = chr(translated_reference) return new_string, new_index, original_reference
def parse_setext_headings( parser_state, position_marker, extracted_whitespace, this_bq_count, stack_bq_count, ): """ Handle the parsing of an setext heading. """ new_tokens = [] if (ParserHelper.is_length_less_than_or_equal_to( extracted_whitespace, 3) and ParserHelper.is_character_at_index_one_of( position_marker.text_to_parse, position_marker.index_number, LeafBlockProcessor.__setext_characters, ) and parser_state.token_stack[-1].is_paragraph and (this_bq_count == stack_bq_count)): _, collected_to_index = ParserHelper.collect_while_character( position_marker.text_to_parse, position_marker.index_number, position_marker.text_to_parse[position_marker.index_number], ) ( after_whitespace_index, extra_whitespace_after_setext, ) = ParserHelper.extract_whitespace(position_marker.text_to_parse, collected_to_index) if after_whitespace_index == len(position_marker.text_to_parse): # This is unusual. Normally, close_open_blocks is used to close off # blocks based on the stack token. However, since the setext takes # the last paragraph of text (see case 61) and translates it # into a heading, this has to be done separately, as there is no # stack token to close. new_tokens.append( EndMarkdownToken( MarkdownToken.token_setext_heading, extracted_whitespace, extra_whitespace_after_setext, None, )) token_index = len(parser_state.token_document) - 1 while not parser_state.token_document[token_index].is_paragraph: token_index -= 1 replacement_token = SetextHeadingMarkdownToken( position_marker.text_to_parse[ position_marker.index_number], collected_to_index - position_marker.index_number, parser_state.token_document[token_index].extra_data, position_marker, parser_state.token_document[token_index], ) parser_state.token_document[token_index] = replacement_token del parser_state.token_stack[-1] return new_tokens
def __calculate_backtick_between_text( inline_request: InlineRequest, new_index: int, end_backtick_start_index: int) -> Tuple[str, str, str, str]: between_text = inline_request.source_text[ new_index:end_backtick_start_index] original_between_text, leading_whitespace, trailing_whitespace = ( between_text, "", "", ) POGGER.debug( "after_collect>$>>$>>$<<", between_text, end_backtick_start_index, inline_request.source_text[end_backtick_start_index:], ) if (len(between_text) > 2 and between_text[0] in [ ParserHelper.space_character, ParserHelper.newline_character ] and between_text[-1] in [ ParserHelper.space_character, ParserHelper.newline_character ]): stripped_between_attempt = between_text[1:-1] if len(stripped_between_attempt.strip()) != 0: leading_whitespace, trailing_whitespace = ( between_text[0], between_text[-1], ) between_text = stripped_between_attempt replaced_newline = ParserHelper.create_replacement_markers( ParserHelper.newline_character, ParserHelper.space_character) POGGER.debug("between_text>>$<<", between_text) between_text = ParserHelper.escape_special_characters(between_text) POGGER.debug("between_text>>$<<", between_text) POGGER.debug( "leading_whitespace>>$<<", leading_whitespace, ) POGGER.debug( "trailing_whitespace>>$<<", trailing_whitespace, ) between_text, leading_whitespace, trailing_whitespace = ( between_text.replace(ParserHelper.newline_character, replaced_newline), leading_whitespace.replace(ParserHelper.newline_character, replaced_newline), trailing_whitespace.replace(ParserHelper.newline_character, replaced_newline), ) return ( between_text, original_between_text, leading_whitespace, trailing_whitespace, )
def parse_setext_headings( parser_state: ParserState, position_marker: PositionMarker, extracted_whitespace: Optional[str], block_quote_data: BlockQuoteData, ) -> List[MarkdownToken]: """ Handle the parsing of an setext heading. """ new_tokens: List[MarkdownToken] = [] assert extracted_whitespace is not None if ( ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3) and ParserHelper.is_character_at_index_one_of( position_marker.text_to_parse, position_marker.index_number, LeafBlockProcessor.__setext_characters, ) and parser_state.token_stack[-1].is_paragraph and (block_quote_data.current_count == block_quote_data.stack_count) ): is_paragraph_continuation = ( LeafBlockProcessor.__adjust_continuation_for_active_list( parser_state, position_marker ) ) _, collected_to_index = ParserHelper.collect_while_character( position_marker.text_to_parse, position_marker.index_number, position_marker.text_to_parse[position_marker.index_number], ) assert collected_to_index is not None ( after_whitespace_index, extra_whitespace_after_setext, ) = ParserHelper.extract_whitespace( position_marker.text_to_parse, collected_to_index ) if not is_paragraph_continuation and after_whitespace_index == len( position_marker.text_to_parse ): LeafBlockProcessor.__create_setext_token( parser_state, position_marker, collected_to_index, new_tokens, extracted_whitespace, extra_whitespace_after_setext, ) return new_tokens
def __parse_raw_tag_name(text_to_parse: str, start_index: int) -> str: """ Parse a HTML tag name from the string. """ if ParserHelper.is_character_at_index_one_of( text_to_parse, start_index, HtmlHelper.__valid_tag_name_start ): index, __ = ParserHelper.collect_while_one_of_characters( text_to_parse, start_index + 1, HtmlHelper.__valid_tag_name_characters ) return text_to_parse[:index] return ""
def __parse_non_angle_link_destination(source_text, new_index): """ Parse a link destination that is not included in angle brackets. """ collected_destination = "" nesting_level = 0 keep_collecting = True while keep_collecting: LOGGER.debug( "collected_destination>>%s<<source_text<<%s>>nesting_level>>%s>>", str(collected_destination), source_text[new_index:], str(nesting_level), ) keep_collecting = False new_index, before_part = ParserHelper.collect_until_one_of_characters( source_text, new_index, LinkHelper.__non_angle_link_breaks) collected_destination = collected_destination + before_part LOGGER.debug(">>>>>>%s<<<<<", source_text[new_index:]) if ParserHelper.is_character_at_index( source_text, new_index, InlineHelper.backslash_character): LOGGER.debug("backslash") old_new_index = new_index inline_request = InlineRequest(source_text, new_index) inline_response = InlineHelper.handle_inline_backslash( inline_request) new_index = inline_response.new_index collected_destination = (collected_destination + source_text[old_new_index:new_index]) keep_collecting = True elif ParserHelper.is_character_at_index( source_text, new_index, LinkHelper.__non_angle_link_nest): LOGGER.debug("+1") nesting_level += 1 collected_destination += LinkHelper.__non_angle_link_nest new_index += 1 keep_collecting = True elif ParserHelper.is_character_at_index( source_text, new_index, LinkHelper.__non_angle_link_unnest): LOGGER.debug("-1") if nesting_level != 0: collected_destination += LinkHelper.__non_angle_link_unnest new_index += 1 nesting_level -= 1 keep_collecting = True ex_link = collected_destination LOGGER.debug("collected_destination>>%s", str(collected_destination)) if nesting_level != 0: return -1, None return new_index, ex_link
def __is_front_matter_valid( collected_lines: List[str], ) -> Union[Dict[str, str], str]: ascii_letters_and_digits = f"{string.ascii_letters}{string.digits}_-" current_title = "" current_value = "" value_map: Dict[str, str] = {} for next_line in collected_lines: POGGER.debug("Next fm:>$s<", next_line) next_index, _ = ParserHelper.extract_whitespace(next_line, 0) assert next_index is not None if next_index >= 4: POGGER.debug("Indented line established.") if not current_title: return "Continuation line encountered before a keyword line." current_value += f"\n{next_line.strip()}" POGGER.debug("current_value>$<", current_value) else: if not next_line.strip(): return "Blank line encountered before end of metadata." POGGER.debug("Non-indented line established.") if current_title: POGGER.debug("Adding '$' as '$'.", current_title, current_value) value_map[current_title] = current_value ( next_index, collected_title, ) = ParserHelper.collect_while_one_of_characters( next_line, next_index, ascii_letters_and_digits) assert next_index is not None assert collected_title is not None current_title = collected_title if next_index < len( next_line) and next_line[next_index] == ":": current_value = next_line[next_index + 1:].strip() else: return "Newline did not start with `keyword:`." if current_title: POGGER.debug("Adding final '$' as '$'.", current_title, current_value) value_map[current_title.lower()] = current_value # This is specifically to trigger test_front_matter_20. assert current_title != "test" or current_value != "assert" if not value_map: return "No valid metadata header lines were found." return value_map
def is_thematic_break( line_to_parse: str, start_index: int, extracted_whitespace: Optional[str], skip_whitespace_check: bool = False, whitespace_allowed_between_characters: bool = True, ) -> Tuple[Optional[str], Optional[int]]: """ Determine whether or not we have a thematic break. """ assert extracted_whitespace is not None thematic_break_character, end_of_break_index = None, None is_thematic_character = ParserHelper.is_character_at_index_one_of( line_to_parse, start_index, LeafBlockProcessor.__thematic_break_characters ) POGGER.debug("skip_whitespace_check>>$", skip_whitespace_check) POGGER.debug("is_thematic_character>>$", is_thematic_character) if ( ParserHelper.is_length_less_than_or_equal_to(extracted_whitespace, 3) or skip_whitespace_check ) and is_thematic_character: start_char, index, char_count, line_to_parse_size = ( line_to_parse[start_index], start_index, 0, len(line_to_parse), ) while index < line_to_parse_size: if ( whitespace_allowed_between_characters and ParserHelper.is_character_at_index_whitespace( line_to_parse, index ) ): index += 1 elif line_to_parse[index] == start_char: index += 1 char_count += 1 else: break # pragma: no cover POGGER.debug("char_count>>$", char_count) POGGER.debug("index>>$", index) POGGER.debug("line_to_parse_size>>$", line_to_parse_size) if char_count >= 3 and index == line_to_parse_size: thematic_break_character, end_of_break_index = start_char, index return thematic_break_character, end_of_break_index
def compare_versus_expected( cls, stream_name, actual_stream, expected_text, additional_text=None, log_extra=None, ): """ Do a thorough comparison of the actual stream against the expected text. """ if additional_text: assert actual_stream.getvalue().strip().startswith(expected_text.strip()), ( f"Block\n---\n{expected_text}\n---\nwas not found at the start of" + "\n---\n{actual_stream.getvalue()}\nExtra:{log_extra}" ) for next_text_block in additional_text: was_found = next_text_block.strip() in actual_stream.getvalue().strip() diff = difflib.ndiff( next_text_block.strip().splitlines(), actual_stream.getvalue().strip().splitlines(), ) diff_values = ParserHelper.newline_character.join(list(diff)) print(diff_values, file=sys.stderr) if not was_found: raise AssertionError( f"Block\n---\n{next_text_block}\n---\nwas not found in\n---\n{actual_stream.getvalue()}" ) elif actual_stream.getvalue().strip() != expected_text.strip(): diff = difflib.ndiff( expected_text.splitlines(), actual_stream.getvalue().splitlines() ) diff_values = f"{ParserHelper.newline_character.join(list(diff))}\n---\n" LOGGER.warning( "actual>>%s", ParserHelper.make_value_visible(actual_stream.getvalue()), ) print( f"WARN>actual>>{ParserHelper.make_value_visible(actual_stream.getvalue())}" ) LOGGER.warning("expect>>%s", ParserHelper.make_value_visible(expected_text)) print(f"WARN>expect>>{ParserHelper.make_value_visible(expected_text)}") if log_extra: print(f"log_extra:{log_extra}") raise AssertionError(f"{stream_name} not as expected:\n{diff_values}")
def extract_link_label(line_to_parse, new_index, include_reference_colon=True): """ Extract the link reference definition's link label. """ collected_destination = "" keep_collecting = True while keep_collecting: keep_collecting = False new_index, ert_new = ParserHelper.collect_until_one_of_characters( line_to_parse, new_index, LinkHelper.__link_label_breaks) collected_destination = collected_destination + ert_new if ParserHelper.is_character_at_index( line_to_parse, new_index, InlineHelper.backslash_character): old_new_index = new_index inline_request = InlineRequest(line_to_parse, new_index) inline_response = InlineHelper.handle_inline_backslash( inline_request) new_index = inline_response.new_index collected_destination = ( collected_destination + line_to_parse[old_new_index:new_index]) keep_collecting = True elif ParserHelper.is_character_at_index( line_to_parse, new_index, LinkHelper.link_label_start): LOGGER.debug(">> unescaped [, bailing") return False, -1, None LOGGER.debug("look for ]>>%s<<", line_to_parse[new_index:]) if not ParserHelper.is_character_at_index(line_to_parse, new_index, LinkHelper.link_label_end): LOGGER.debug(">> no end ], bailing") return False, new_index, None new_index += 1 if include_reference_colon: LOGGER.debug("look for :>>%s<<", line_to_parse[new_index:]) if not ParserHelper.is_character_at_index( line_to_parse, new_index, LinkHelper.__link_label_is_definition_character, ): LOGGER.debug(">> no :, bailing") return False, -1, None new_index += 1 return True, new_index, collected_destination
def __parse_raw_tag_name(text_to_parse, start_index): """ Parse a HTML tag name from the string. """ tag_name = "" if ParserHelper.is_character_at_index_one_of( text_to_parse, start_index, HtmlHelper.__valid_tag_name_start ): index = start_index + 1 while ParserHelper.is_character_at_index_one_of( text_to_parse, index, HtmlHelper.__valid_tag_name_characters ): index += 1 tag_name = text_to_parse[0:index] return tag_name
def __search_for_matches( self, string_to_check: str, context: PluginScanContext, token: MarkdownToken, same_line_offset: int = 0, start_x_offset: int = 0, start_y_offset: int = 0, ) -> None: string_to_check = ParserHelper.remove_all_from_text(string_to_check) string_to_check_lower = string_to_check.lower() for next_name in self.__proper_name_list: next_name_lower = next_name.lower() search_start = 0 found_index = string_to_check_lower.find(next_name_lower, search_start) while found_index != -1: self.__search_for_possible_matches( string_to_check, string_to_check_lower, search_start, found_index, start_x_offset, start_y_offset, same_line_offset, next_name, context, token, ) search_start = found_index + len(next_name) found_index = string_to_check_lower.find( next_name_lower, search_start)
def __search_for_possible_matches( self, string_to_check: str, string_to_check_lower: str, search_start: int, found_index: int, start_x_offset: int, start_y_offset: int, same_line_offset: int, next_name: str, context: PluginScanContext, token: MarkdownToken, ) -> None: col_adjust, line_adjust = ParserHelper.adjust_for_newlines( string_to_check_lower, search_start, found_index) if line_adjust == 0 and start_y_offset == 0: col_adjust -= same_line_offset line_adjust += start_y_offset if col_adjust == 0 and start_x_offset: col_adjust += (-start_x_offset if start_x_offset > 0 else -(-start_x_offset - 1)) col_adjust = -col_adjust elif col_adjust > 0 and start_x_offset: col_adjust += -start_x_offset - 1 col_adjust = -col_adjust self.__check_for_proper_match( string_to_check, found_index, next_name, context, token, line_adjust, col_adjust, )
def extract_link_destination(line_to_parse, new_index, is_blank_line): """ Extract the link reference definition's link destination. """ new_index, prefix_whitespace = ParserHelper.collect_while_one_of_characters( line_to_parse, new_index, Constants.whitespace) if new_index == len(line_to_parse) and not is_blank_line: return False, new_index, None, None, None, None LOGGER.debug("LD>>%s<<", line_to_parse[new_index:]) ( inline_link, pre_inline_link, new_index, inline_raw_link, ) = LinkHelper.__parse_link_destination(line_to_parse, new_index) if new_index == -1: return False, -1, None, None, None, None return ( True, new_index, inline_link, pre_inline_link, prefix_whitespace, inline_raw_link, )
def __parse_valid_uri_autolink( text_to_parse: str, line_number: int, column_number: int) -> Optional[UriAutolinkMarkdownToken]: """ Parse a possible uri autolink and determine if it is valid. """ if (InlineHelper.angle_bracket_start not in text_to_parse and text_to_parse[0] in string.ascii_letters): path_index, uri_scheme = ParserHelper.collect_while_one_of_characters( text_to_parse, 1, InlineHelper.__valid_scheme_characters) assert path_index is not None uri_scheme, text_to_parse_size = f"{text_to_parse[0]}{uri_scheme}", len( text_to_parse) if (2 <= len(uri_scheme) <= 32 and path_index < text_to_parse_size and text_to_parse[path_index] == InlineHelper.__scheme_end_character): path_index += 1 while path_index < text_to_parse_size: if ord(text_to_parse[path_index]) <= 32: break path_index += 1 if path_index == text_to_parse_size: return UriAutolinkMarkdownToken(text_to_parse, line_number, column_number) else: uri_scheme, path_index = "", -1 return None
def look_for_pragmas( position_marker: PositionMarker, line_to_parse: str, container_depth: int, extracted_whitespace: Optional[str], parser_properties: ParseBlockPassProperties, ) -> bool: """ Look for a pragma in the current line. """ if (not container_depth and not extracted_whitespace and (line_to_parse.startswith(PragmaToken.pragma_prefix) or line_to_parse.startswith(PragmaToken.pragma_alternate_prefix))): was_extended_prefix = line_to_parse.startswith( PragmaToken.pragma_alternate_prefix) start_index, _ = ParserHelper.extract_whitespace( line_to_parse, len(PragmaToken.pragma_alternate_prefix if was_extended_prefix else PragmaToken.pragma_prefix), ) remaining_line = line_to_parse[start_index:].rstrip().lower() if remaining_line.startswith( PragmaToken.pragma_title) and remaining_line.endswith( PragmaToken.pragma_suffix): index_number = (-position_marker.line_number if was_extended_prefix else position_marker.line_number) parser_properties.pragma_lines[index_number] = line_to_parse return True return False
def extract_html_attribute_name(string_to_parse: str, string_index: int) -> int: """ Attempt to extract the attribute name from the provided string. """ string_to_parse_length = len(string_to_parse) if not ( string_index < string_to_parse_length and ( string_to_parse[string_index] in HtmlHelper.__attribute_start_characters ) ): return -1 new_string_index, __ = ParserHelper.collect_while_one_of_characters( string_to_parse, string_index + 1, HtmlHelper.__attribute_other_characters ) assert new_string_index is not None if new_string_index < string_to_parse_length and string_to_parse[ new_string_index ] in [ HtmlHelper.__html_attribute_name_value_separator, HtmlHelper.__html_attribute_separator, HtmlHelper.__html_tag_start, HtmlHelper.__html_tag_end, ]: return new_string_index return -1
def __determine_html_block_type(parser_state, line_to_parse, start_index): """ Determine the type of the html block that we are starting. """ character_index = start_index + 1 remaining_html_tag = "" html_block_type = HtmlHelper.__check_for_special_html_blocks( line_to_parse, character_index ) if not html_block_type: ( character_index, remaining_html_tag, ) = ParserHelper.collect_until_one_of_characters( line_to_parse, character_index, HtmlHelper.__html_tag_name_end ) remaining_html_tag = remaining_html_tag.lower() html_block_type = HtmlHelper.__check_for_normal_html_blocks( remaining_html_tag, line_to_parse, character_index ) if not html_block_type: return None, None if html_block_type == HtmlHelper.html_block_7: if parser_state.token_stack[-1].is_paragraph: return None, None return html_block_type, remaining_html_tag
def __evaluate_possible_url( self, source_text: str, url_prefix: str, found_index: int, context: PluginScanContext, token: MarkdownToken, ) -> None: if found_index == 0 or source_text[found_index - 1] in ( " ", ParserHelper.newline_character, ): url_start_sequence = source_text[found_index + len(url_prefix):] if (len(url_start_sequence) >= 3 and url_start_sequence.startswith("//") and url_start_sequence[2] not in (" ", ParserHelper.newline_character)): ( column_number_delta, line_number_delta, ) = ParserHelper.adjust_for_newlines(source_text, 0, found_index) self.report_next_token_error( context, token, line_number_delta=line_number_delta, column_number_delta=column_number_delta, )
def extract_link_title(line_to_parse, new_index, is_blank_line): """ Extract the link reference definition's optional link title. """ inline_title = "" pre_inline_title = "" LOGGER.debug("before ws>>%s>", line_to_parse[new_index:]) new_index, ex_ws = ParserHelper.extract_any_whitespace( line_to_parse, new_index) LOGGER.debug( "after ws>>%s>ex_ws>%s", line_to_parse[new_index:], ex_ws.replace("\n", "\\n"), ) start_index = new_index if new_index == len(line_to_parse) and not is_blank_line: return False, new_index, None, None, None, None if ex_ws and new_index < len(line_to_parse): inline_title, pre_inline_title, new_index = LinkHelper.__parse_link_title( line_to_parse, new_index) if new_index == -1: return False, -1, None, None, None, None if inline_title is None: return False, new_index, None, None, None, None return ( True, new_index, inline_title, pre_inline_title, ex_ws, line_to_parse[start_index:new_index], )
def __parse_link_title(source_text, new_index): """ Parse an inline link's link title. """ LOGGER.debug("parse_link_title>>new_index>>%s>>", source_text[new_index:]) ex_title = "" pre_ex_title = "" if ParserHelper.is_character_at_index(source_text, new_index, LinkHelper.__link_title_single): new_index, ex_title = InlineHelper.extract_bounded_string( source_text, new_index + 1, LinkHelper.__link_title_single, None) elif ParserHelper.is_character_at_index( source_text, new_index, LinkHelper.__link_title_double): new_index, ex_title = InlineHelper.extract_bounded_string( source_text, new_index + 1, LinkHelper.__link_title_double, None) elif ParserHelper.is_character_at_index( source_text, new_index, LinkHelper.__link_title_parenthesis_open): new_index, ex_title = InlineHelper.extract_bounded_string( source_text, new_index + 1, LinkHelper.__link_title_parenthesis_close, LinkHelper.__link_title_parenthesis_open, ) else: new_index = -1 LOGGER.debug( "parse_link_title>>new_index>>%s>>ex_link>>%s>>", str(new_index), str(ex_title), ) pre_ex_title = ex_title if ex_title is not None: ex_title = InlineHelper.append_text( "", InlineHelper.handle_backslashes(ex_title, add_text_signature=False), add_text_signature=False, ) LOGGER.debug("parse_link_title>>pre>>%s>>", str(pre_ex_title)) LOGGER.debug("parse_link_title>>after>>%s>>", str(ex_title)) return ex_title, pre_ex_title, new_index
def is_block_quote_start(line_to_parse, start_index, extracted_whitespace, adj_ws=None): """ Determine if we have the start of a block quote section. """ if adj_ws is None: adj_ws = extracted_whitespace if ParserHelper.is_length_less_than_or_equal_to( adj_ws, 3) and ParserHelper.is_character_at_index( line_to_parse, start_index, BlockQuoteProcessor.__block_quote_character): return True return False