from xml.sax.saxutils import escape spaceCharacters = "".join(spaceCharacters) try: from codecs import register_error, xmlcharrefreplace_errors except ImportError: unicode_encode_errors = "strict" else: unicode_encode_errors = "htmlentityreplace" from html5lib.constants import entities encode_entity_map = {} for k, v in list(entities.items()): if v != "&" and encode_entity_map.get(v) != k.lower(): # prefer < over < and similarly for &, >, etc. encode_entity_map[v] = k def htmlentityreplace_errors(exc): if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)): res = [] for c in exc.object[exc.start:exc.end]: e = encode_entity_map.get(c) if e: res.append("&") res.append(e) if not e.endswith(";"): res.append(";") else:
from xml.sax.saxutils import escape spaceCharacters = u"".join(spaceCharacters) try: from codecs import register_error, xmlcharrefreplace_errors except ImportError: unicode_encode_errors = "strict" else: unicode_encode_errors = "htmlentityreplace" from html5lib.constants import entities encode_entity_map = {} for k, v in entities.items(): if v != "&" and encode_entity_map.get(v) != k.lower(): # prefer < over < and similarly for &, >, etc. encode_entity_map[v] = k def htmlentityreplace_errors(exc): if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)): res = [] for c in exc.object[exc.start:exc.end]: e = encode_entity_map.get(c) if e: res.append("&") res.append(e) if not e.endswith(";"): res.append(";") else:
from html5lib import utils from xml.sax.saxutils import escape spaceCharacters = u"".join(spaceCharacters) try: from codecs import register_error, xmlcharrefreplace_errors except ImportError: unicode_encode_errors = "strict" else: unicode_encode_errors = "htmlentityreplace" from html5lib.constants import entities encode_entity_map = {} for k, v in entities.items(): if v != "&" and encode_entity_map.get(v) != k.lower(): # prefer < over < and similarly for &, >, etc. encode_entity_map[ord(v)] = k def htmlentityreplace_errors(exc): if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)): res = [] codepoints = [] skip = False for i, c in enumerate(exc.object[exc.start:exc.end]): if skip: skip = False continue index = i + exc.start if utils.isSurrogatePair(exc.object[index:min([exc.end, index+2])]):
#!/usr/bin/env python2 # vim:fileencoding=utf-8 from __future__ import (unicode_literals, division, absolute_import, print_function) __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' from html5lib.constants import entities html5_entities = {k.replace(';', ''): v for k, v in entities.items()}