def _populate_class_variables(): lookup = {} reverse_lookup = {} characters_for_re = [] # &apos is an XHTML entity and an HTML 5, but not an HTML 4 # entity. We don't want to use it, but we want to recognize it on the way in. # # TODO: Ideally we would be able to recognize all HTML 5 named # entities, but that's a little tricky. extra = [(39, 'apos')] for codepoint, name in list(codepoint2name.items()) + extra: character = unichr(codepoint) if codepoint not in (34, 39): # There's no point in turning the quotation mark into # " or the single quote into ', unless it # happens within an attribute value, which is handled # elsewhere. characters_for_re.append(character) lookup[character] = name # But we do want to recognize those entities on the way in and # convert them to Unicode characters. reverse_lookup[name] = character re_definition = "[%s]" % "".join(characters_for_re) return lookup, reverse_lookup, re.compile(re_definition)
def _populate_class_variables(): lookup = {} reverse_lookup = {} characters = [] for codepoint, name in codepoint2name.items(): if codepoint == 34: # There's no point in turning the quotation mark into # ", unless it happens within an attribute value, which # is handled elsewhere. continue character = unichr(codepoint) characters.append(character) lookup[character] = name reverse_lookup[name] = character re_definition = "[%s]" % "".join(characters) return lookup, reverse_lookup, re.compile(re_definition)
def _populate_class_variables(): lookup = {} reverse_lookup = {} characters = [] for codepoint, name in list(codepoint2name.items()): if codepoint == 34: # There's no point in turning the quotation mark into # ", unless it happens within an attribute value, which # is handled elsewhere. continue character = unichr(codepoint) characters.append(character) lookup[character] = name reverse_lookup[name] = character re_definition = "[%s]" % "".join(characters) return lookup, reverse_lookup, re.compile(re_definition)
def _populate_class_variables(): lookup = {} reverse_lookup = {} characters_for_re = [] for codepoint, name in list(codepoint2name.items()): character = unichr(codepoint) if codepoint != 34: # There's no point in turning the quotation mark into # ", unless it happens within an attribute value, which # is handled elsewhere. characters_for_re.append(character) lookup[character] = name # But we do want to turn " into the quotation mark. reverse_lookup[name] = character re_definition = "[%s]" % "".join(characters_for_re) return lookup, reverse_lookup, re.compile(re_definition)
def _populate_class_variables(self): lookup = {} reverse_lookup = {} characters_for_re = [] for codepoint, name in list(codepoint2name.items()): character = unichr(codepoint) if codepoint != 34: # There's no point in turning the quotation mark into # ", unless it happens within an attribute value, which # is handled elsewhere. characters_for_re.append(character) lookup[character] = name # But we do want to turn " into the quotation mark. reverse_lookup[name] = character re_definition = "[%s]" % "".join(characters_for_re) return lookup, reverse_lookup, re.compile(re_definition)
def _populate_class_variables(): lookup = {} reverse_lookup = {} characters_for_re = [] for codepoint, name in list(codepoint2name.items()): character = unichr(codepoint) if codepoint != 34: characters_for_re.append(character) lookup[character] = name reverse_lookup[name] = character re_definition = "[%s]" % "".join(characters_for_re) return lookup, reverse_lookup, re.compile(re_definition)
def encode_htmlentities(string, encoding="utf-8"): """ Encodes the string with html entities. """ if isinstance(string, unicode): was_unicode = True else: was_unicode = False string = string.decode(encoding) string = string.replace("&", "&") for codepoint, name in cp2n.items(): if name != "amp": string = string.replace(unichr(codepoint), "&%s;" % name) if not was_unicode: string = string.encode(encoding) return string