def educate_tokens(text_tokens: Iterable[Tuple[str, str]], attr: str = smartquotes.default_smartypants_attr, language: str = 'en') -> Generator[str, None, None]: """Return iterator that "educates" the items of `text_tokens`. This is modified to intercept the ``attr='2'`` as it was used by the Docutils 0.13.1 SmartQuotes transform in a hard coded way. Docutils 0.14 uses ``'qDe'``` and is configurable, and its choice is backported here for use by Sphinx with earlier Docutils releases. Similarly ``'1'`` is replaced by ``'qde'``. Use ``attr='qDbe'``, resp. ``'qdbe'`` to recover Docutils effect of ``'2'``, resp. ``'1'``. refs: https://sourceforge.net/p/docutils/mailman/message/35869025/ """ # Parse attributes: # 0 : do nothing # 1 : set all (but backticks) # 2 : set all (but backticks), using old school en- and em- dash shortcuts # 3 : set all, using inverted old school en and em- dash shortcuts # # q : quotes # b : backtick quotes (``double'' only) # B : backtick quotes (``double'' and `single') # d : dashes # D : old school dashes # i : inverted old school dashes # e : ellipses # w : convert " entities to " for Dreamweaver users convert_quot = False # translate " entities into normal quotes? do_dashes = 0 do_backticks = 0 do_quotes = False do_ellipses = False do_stupefy = False if attr == "1": # Do everything, turn all options on. do_quotes = True # do_backticks = 1 do_dashes = 1 do_ellipses = True elif attr == "2": # Do everything, turn all options on, use old school dash shorthand. do_quotes = True # do_backticks = 1 do_dashes = 2 do_ellipses = True elif attr == "3": # Do everything, use inverted old school dash shorthand. do_quotes = True do_backticks = 1 do_dashes = 3 do_ellipses = True elif attr == "-1": # Special "stupefy" mode. do_stupefy = True else: if "q" in attr: do_quotes = True if "b" in attr: do_backticks = 1 if "B" in attr: do_backticks = 2 if "d" in attr: do_dashes = 1 if "D" in attr: do_dashes = 2 if "i" in attr: do_dashes = 3 if "e" in attr: do_ellipses = True if "w" in attr: convert_quot = True prev_token_last_char = " " # Last character of the previous text token. Used as # context to curl leading quote characters correctly. for (ttype, text) in text_tokens: # skip HTML and/or XML tags as well as empty text tokens # without updating the last character if ttype == 'tag' or not text: yield text continue # skip literal text (math, literal, raw, ...) if ttype == 'literal': prev_token_last_char = text[-1:] yield text continue last_char = text[-1:] # Remember last char before processing. text = smartquotes.processEscapes(text) if convert_quot: text = re.sub('"', '"', text) if do_dashes == 1: text = smartquotes.educateDashes(text) elif do_dashes == 2: text = smartquotes.educateDashesOldSchool(text) elif do_dashes == 3: text = smartquotes.educateDashesOldSchoolInverted(text) if do_ellipses: text = smartquotes.educateEllipses(text) # Note: backticks need to be processed before quotes. if do_backticks: text = smartquotes.educateBackticks(text, language) if do_backticks == 2: text = smartquotes.educateSingleBackticks(text, language) if do_quotes: # Replace plain quotes to prevent conversion to # 2-character sequence in French. context = prev_token_last_char.replace('"', ';').replace("'", ';') text = educateQuotes(context + text, language)[1:] if do_stupefy: text = smartquotes.stupefyEntities(text, language) # Remember last char as context for the next token prev_token_last_char = last_char text = smartquotes.processEscapes(text, restore=True) yield text
def educate_tokens(text_tokens, attr=smartquotes.default_smartypants_attr, language='en'): # type: (Iterable[Tuple[str, str]], str, str) -> Generator[str, None, None] """Return iterator that "educates" the items of `text_tokens`. This is modified to intercept the ``attr='2'`` as it was used by the Docutils 0.13.1 SmartQuotes transform in a hard coded way. Docutils 0.14 uses ``'qDe'``` and is configurable, and its choice is backported here for use by Sphinx with earlier Docutils releases. Similarly ``'1'`` is replaced by ``'qde'``. Use ``attr='qDbe'``, resp. ``'qdbe'`` to recover Docutils effect of ``'2'``, resp. ``'1'``. refs: https://sourceforge.net/p/docutils/mailman/message/35869025/ """ # Parse attributes: # 0 : do nothing # 1 : set all (but backticks) # 2 : set all (but backticks), using old school en- and em- dash shortcuts # 3 : set all, using inverted old school en and em- dash shortcuts # # q : quotes # b : backtick quotes (``double'' only) # B : backtick quotes (``double'' and `single') # d : dashes # D : old school dashes # i : inverted old school dashes # e : ellipses # w : convert " entities to " for Dreamweaver users convert_quot = False # translate " entities into normal quotes? do_dashes = 0 do_backticks = 0 do_quotes = False do_ellipses = False do_stupefy = False if attr == "1": # Do everything, turn all options on. do_quotes = True # do_backticks = 1 do_dashes = 1 do_ellipses = True elif attr == "2": # Do everything, turn all options on, use old school dash shorthand. do_quotes = True # do_backticks = 1 do_dashes = 2 do_ellipses = True elif attr == "3": # Do everything, use inverted old school dash shorthand. do_quotes = True do_backticks = 1 do_dashes = 3 do_ellipses = True elif attr == "-1": # Special "stupefy" mode. do_stupefy = True else: if "q" in attr: do_quotes = True if "b" in attr: do_backticks = 1 if "B" in attr: do_backticks = 2 if "d" in attr: do_dashes = 1 if "D" in attr: do_dashes = 2 if "i" in attr: do_dashes = 3 if "e" in attr: do_ellipses = True if "w" in attr: convert_quot = True prev_token_last_char = " " # Last character of the previous text token. Used as # context to curl leading quote characters correctly. for (ttype, text) in text_tokens: # skip HTML and/or XML tags as well as emtpy text tokens # without updating the last character if ttype == 'tag' or not text: yield text continue # skip literal text (math, literal, raw, ...) if ttype == 'literal': prev_token_last_char = text[-1:] yield text continue last_char = text[-1:] # Remember last char before processing. text = smartquotes.processEscapes(text) if convert_quot: text = re.sub('"', '"', text) if do_dashes == 1: text = smartquotes.educateDashes(text) elif do_dashes == 2: text = smartquotes.educateDashesOldSchool(text) elif do_dashes == 3: text = smartquotes.educateDashesOldSchoolInverted(text) if do_ellipses: text = smartquotes.educateEllipses(text) # Note: backticks need to be processed before quotes. if do_backticks: text = smartquotes.educateBackticks(text, language) if do_backticks == 2: text = smartquotes.educateSingleBackticks(text, language) if do_quotes: # Replace plain quotes to prevent converstion to # 2-character sequence in French. context = prev_token_last_char.replace('"', ';').replace("'", ';') text = educateQuotes(context + text, language)[1:] if do_stupefy: text = smartquotes.stupefyEntities(text, language) # Remember last char as context for the next token prev_token_last_char = last_char text = smartquotes.processEscapes(text, restore=True) yield text
def educate_tokens(text_tokens, attr='1', language='en'): # type: (Iterable[Tuple[str, unicode]], unicode, unicode) -> Iterator """Return iterator that "educates" the items of `text_tokens`. """ # Parse attributes: # 0 : do nothing # 1 : set all # 2 : set all, using old school en- and em- dash shortcuts # 3 : set all, using inverted old school en and em- dash shortcuts # # q : quotes # b : backtick quotes (``double'' only) # B : backtick quotes (``double'' and `single') # d : dashes # D : old school dashes # i : inverted old school dashes # e : ellipses # w : convert " entities to " for Dreamweaver users convert_quot = False # translate " entities into normal quotes? do_dashes = 0 do_backticks = 0 do_quotes = False do_ellipses = False do_stupefy = False if attr == "0": # Do nothing. pass elif attr == "1": # Do everything, turn all options on. do_quotes = True do_backticks = 1 do_dashes = 1 do_ellipses = True elif attr == "2": # Do everything, turn all options on, use old school dash shorthand. do_quotes = True do_backticks = 1 do_dashes = 2 do_ellipses = True elif attr == "3": # Do everything, use inverted old school dash shorthand. do_quotes = True do_backticks = 1 do_dashes = 3 do_ellipses = True elif attr == "-1": # Special "stupefy" mode. do_stupefy = True else: if "q" in attr: do_quotes = True if "b" in attr: do_backticks = 1 if "B" in attr: do_backticks = 2 if "d" in attr: do_dashes = 1 if "D" in attr: do_dashes = 2 if "i" in attr: do_dashes = 3 if "e" in attr: do_ellipses = True if "w" in attr: convert_quot = True prev_token_last_char = " " # Last character of the previous text token. Used as # context to curl leading quote characters correctly. for (ttype, text) in text_tokens: # skip HTML and/or XML tags as well as emtpy text tokens # without updating the last character if ttype == 'tag' or not text: yield text continue # skip literal text (math, literal, raw, ...) if ttype == 'literal': prev_token_last_char = text[-1:] yield text continue last_char = text[-1:] # Remember last char before processing. text = smartquotes.processEscapes(text) if convert_quot: text = re.sub('"', '"', text) if do_dashes == 1: text = smartquotes.educateDashes(text) elif do_dashes == 2: text = smartquotes.educateDashesOldSchool(text) elif do_dashes == 3: text = smartquotes.educateDashesOldSchoolInverted(text) if do_ellipses: text = smartquotes.educateEllipses(text) # Note: backticks need to be processed before quotes. if do_backticks: text = smartquotes.educateBackticks(text, language) if do_backticks == 2: text = smartquotes.educateSingleBackticks(text, language) if do_quotes: # Replace plain quotes to prevent converstion to # 2-character sequence in French. context = prev_token_last_char.replace('"', ';').replace("'", ';') text = educateQuotes(context + text, language)[1:] if do_stupefy: text = smartquotes.stupefyEntities(text, language) # Remember last char as context for the next token prev_token_last_char = last_char text = smartquotes.processEscapes(text, restore=True) yield text