def number_suffix(text): """Wraps date suffix in <span class="ord"> so they can be styled with CSS. >>> number_suffix("10th") u'10<span class="rod">th</span>' Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't. """ tokens = _smartypants._tokenize(text) result = [] in_skipped_tag = False suffix_finder = re.compile(r'(?P<number>[\d]+)(?P<ord>st|nd|rd|th)') def _suffix_process(groups): number = groups.group('number') suffix = groups.group('ord') return "%s<span class='ord'>%s</span>" % (number, suffix) return suffix_finder.sub(_suffix_process, text)
def caps(text): """Wraps multiple capital letters in ``<span class="caps">`` so they can be styled with CSS. >>> caps("A message from KU") u'A message from <span class="caps">KU</span>' Uses the smartypants tokenizer to not screw with HTML or with tags it shouldn't. >>> caps("<PRE>CAPS</pre> more CAPS") u'<PRE>CAPS</pre> more <span class="caps">CAPS</span>' >>> caps("A message from 2KU2 with digits") u'A message from <span class="caps">2KU2</span> with digits' >>> caps("Dotted caps followed by spaces should never include them in the wrap D.O.T. like so.") u'Dotted caps followed by spaces should never include them in the wrap <span class="caps">D.O.T.</span> like so.' All caps with with apostrophes in them shouldn't break. Only handles dump apostrophes though. >>> caps("JIMMY'S") u'<span class="caps">JIMMY\\'S</span>' >>> caps("<i>D.O.T.</i>HE34T<b>RFID</b>") u'<i><span class="caps">D.O.T.</span></i><span class="caps">HE34T</span><b><span class="caps">RFID</span></b>' """ tokens = _smartypants._tokenize(text) result = [] in_skipped_tag = False cap_finder = re.compile(r"""( (\b[A-Z\d]* # Group 2: Any amount of caps and digits [A-Z]\d*[A-Z] # A cap string must at least include two caps (but they can have digits between them) [A-Z\d']*\b) # Any amount of caps and digits or dumb apostsrophes | (\b[A-Z]+\.\s? # OR: Group 3: Some caps, followed by a '.' and an optional space (?:[A-Z]+\.\s?)+) # Followed by the same thing at least once more (?:\s|\b|$)) """, re.VERBOSE) def _cap_wrapper(matchobj): """This is necessary to keep dotted cap strings to pick up extra spaces""" if matchobj.group(2): return """<span class="caps">%s</span>""" % matchobj.group(2) else: if matchobj.group(3)[-1] == " ": caps = matchobj.group(3)[:-1] tail = ' ' else: caps = matchobj.group(3) tail = '' return """<span class="caps">%s</span>%s""" % (caps, tail) tags_to_skip_regex = re.compile("<(/)?(?:pre|code|kbd|script|math)[^>]*>", re.IGNORECASE) for token in tokens: if token[0] == "tag": # Don't mess with tags. result.append(token[1]) close_match = tags_to_skip_regex.match(token[1]) if close_match and close_match.group(1) == None: in_skipped_tag = True else: in_skipped_tag = False else: if in_skipped_tag: result.append(token[1]) else: result.append(cap_finder.sub(_cap_wrapper, token[1])) return "".join(result)