def _setup_entity_definitions(self): for key, value in entitydefs.items(): value = value self._entitydefs_inverted[value] = key self._badchars_regex = re.compile('|'.join(entitydefs.values())) self._been_fixed_regex = re.compile(r'&\w+;|&#[0-9]+;')
def remove_html_entities(text): """去除内容中包含的 html 实体 注意,若和 remove_html_markup 一起使用,本方法应在其执行之后再使用 """ for entity_name, entity_unicode in entitydefs.items(): text = text.replace('&{};'.format(entity_name), entity_unicode) return text
def fix_special_chars(html_text): leave_these = ['amp','frasl','gt','lt','quot'] for k,v in entitydefs.items(): if k not in leave_these: if v in html_text: html_text=html_text.replace(v,"&"+k+";") html_text=html_text.replace('∙',"∙") return html_text
def description(item): global name_to_oid, oid_to_desc, oid_to_name, oid_to_pid oid = name_to_oid.get(item,0) desc = oid_to_desc.get(oid, None) if desc: for a,b in entitydefs.items(): desc = desc.replace("&%s;"%a,b) desc = desc.replace("\r","") for r,s in regices: desc = r.sub(s,desc) desc = desc.strip() return desc.split("\n") else: return []
def stringhtml(chaine): emap = {} for i in range(256): emap[chr(i)] = "&%d;" % i for entity, char in entitydefs.items(): if char in emap: emap[char] = "&%s;" % entity def remplace(m, get=emap.get): return "".join(map(get, m.group())) return re.sub(r'[&<>\"\x80-\xff]+', remplace, chaine)
def htmlentitydecode(string_cp1252: bytes) -> str: """ decode the input-string, which is supposed to be CP1252 and replace some special characters """ try: ustr = string_cp1252.decode('cp1252') for key, value in entitydefs.items(): if value.startswith('&'): ustr = ustr.replace(value, chr(name2codepoint[key])) for key in codepoint2name: ustr = ustr.replace(f'&#{key};', chr(key)) except UnicodeDecodeError: ustr = string_cp1252 return ustr
def html_escape(text): """ Escape HTML characters / entities in @text. """ # We start by replacing '&' text = text.replace("&", "&") if isinstance(text, bytes): # We use this to avoid UnicodeDecodeError in text.replace() convert = lambda x: x.decode("iso-8859-1") else: convert = lambda x: x # We don't want '&' in our dict, as it would mess up any previous # replace() we'd done entitydefs_inverted = ((convert(value), key) for key, value in entitydefs.items() if value != "&") for key, value in entitydefs_inverted: text = text.replace(key, "&%s;" % value) return text
entitydefs['nbsp'] = ' ' sgmlentity = { 'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\'', 'ndash': '-' } sgmlentityget = sgmlentity.get _sgmlentkeys = list(sgmlentity.keys()) entcharrefs = {} entcharrefsget = entcharrefs.get for _k, _v in list(entitydefs.items()): if _k in _sgmlentkeys: continue if _v[0:2] == '&#': dec_code = _v[1:-1] _v = chr(int(_v[2:-1])) entcharrefs[dec_code] = _v else: dec_code = '#' + str(ord(_v)) entcharrefs[dec_code] = _v entcharrefs[_k] = _v del _sgmlentkeys, _k, _v entcharrefs['#160'] = ' ' entcharrefs['#xA0'] = ' ' entcharrefs['#xa0'] = ' ' entcharrefs['#XA0'] = ' '
(FAN_PIECES[BLACK][BISHOP], 'J'), (FAN_PIECES[BLACK][ROOK], 'L'), (FAN_PIECES[BLACK][QUEEN], 'M'), (FAN_PIECES[BLACK][KING], 'N'), (FAN_PIECES[WHITE][KNIGHT], 'k'), (FAN_PIECES[WHITE][BISHOP], 'j'), (FAN_PIECES[WHITE][ROOK], 'l'), (FAN_PIECES[WHITE][QUEEN], 'm'), (FAN_PIECES[WHITE][KING], 'n'), ('†', '+'), ('‡', '+'), ('1/2', 'Z')) def fanconv(fan): for f, r in lisPieces: fan = fan.replace(f, r) return fan # Dictionaries and expressions for parsing diagrams entitydefs = dict(("&%s;" % a, chr(ord(b)).encode('utf-8')) for a, b in entitydefs.items() if len(b) == 1) def2entity = dict((b, a) for a, b in entitydefs.items()) style = """ @font-face {font-family: "Chess Alpha 2"; src: local("Chess Alpha 2"), url("http://pychess.org/fonts/ChessAlpha2.eot?") format("eot"), url("http://pychess.org/fonts/ChessAlpha2.woff") format("woff"), url("http://pychess.org/fonts/ChessAlpha2.ttf") format("truetype"), url("http://pychess.org/fonts/ChessAlpha2.svg#ChessAlpha2") format("svg"); font-weight:"normal"; font-style:"normal";} table.pychess {display:inline-block; vertical-align:top} table.pychess td {margin:0; padding:0; font-size:10pt; font-family:"Chess Alpha 2"; padding-left:.5em} table.pychess td.numa {width:0; text-align:right} table.pychess td.numa {width:0; text-align:right; padding-left:1em} table.pychess td.status {text-align:center; font-size:12pt; padding-right:2em} table.pychess pre {margin:0; padding:0; font-family:"Chess Alpha 2"; font-size:16pt; text-align:center; line-height:1} """
if size < 1024.0: import math if math.floor(size) == size: return "%d %s" % (int(size), final_unit) else: return "%3.1f %s" % (size, final_unit) if unit != "Yotta": size /= 1024.0 return "%3.1f %s" % (size, final_unit) entitydefs_inverted = {} for k, v in list(entitydefs.items()): entitydefs_inverted[v] = k _badchars_regex = re.compile("|".join(list(entitydefs.values()))) _been_fixed_regex = re.compile("&\w+;|&#[0-9]+;") def html_entity_fixer(text, skipchars=[], extra_careful=1): # if extra_careful we don't attempt to do anything to # the string if it might have been converted already. if extra_careful and _been_fixed_regex.findall(text): return text if type(skipchars) == type("s"): skipchars = [skipchars]
# Handle HTML/XML/SGML entities. try: from html.entities import entitydefs except ImportError: from htmlentitydefs import entitydefs entitydefs = entitydefs.copy() entitydefsget = entitydefs.get entitydefs['nbsp'] = ' ' sgmlentity = {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\'', 'ndash': '-'} sgmlentityget = sgmlentity.get _sgmlentkeys = sgmlentity.keys() entcharrefs = {} entcharrefsget = entcharrefs.get for _k, _v in entitydefs.items(): if _k in _sgmlentkeys: continue if _v[0:2] == '&#': dec_code = _v[1:-1] _v = unichr(int(_v[2:-1])) entcharrefs[dec_code] = _v else: dec_code = '#' + str(ord(_v)) _v = unicode(_v, 'latin_1', 'replace') entcharrefs[dec_code] = _v entcharrefs[_k] = _v del _sgmlentkeys, _k, _v entcharrefs['#160'] = u' ' entcharrefs['#xA0'] = u' ' entcharrefs['#xa0'] = u' ' entcharrefs['#XA0'] = u' '
(FAN_PIECES[BLACK][ROOK], 'L'), (FAN_PIECES[BLACK][QUEEN], 'M'), (FAN_PIECES[BLACK][KING], 'N'), (FAN_PIECES[WHITE][KNIGHT], 'k'), (FAN_PIECES[WHITE][BISHOP], 'j'), (FAN_PIECES[WHITE][ROOK], 'l'), (FAN_PIECES[WHITE][QUEEN], 'm'), (FAN_PIECES[WHITE][KING], 'n'), ('†', '+'), ('‡', '+'), ('1/2', 'Z')) def fanconv(fan): for f, r in lisPieces: fan = fan.replace(f, r) return fan # Dictionaries and expressions for parsing diagrams entitydefs = dict(("&%s;" % a, chr(ord(b)).encode('utf-8')) for a, b in entitydefs.items() if len(b) == 1) def2entity = dict((b, a) for a, b in entitydefs.items()) style = """ @font-face {font-family: "Chess Alpha 2"; src: local("Chess Alpha 2"), url("http://pychess.org/fonts/ChessAlpha2.eot?") format("eot"), url("http://pychess.org/fonts/ChessAlpha2.woff") format("woff"), url("http://pychess.org/fonts/ChessAlpha2.ttf") format("truetype"), url("http://pychess.org/fonts/ChessAlpha2.svg#ChessAlpha2") format("svg"); font-weight:"normal"; font-style:"normal";} table.pychess {display:inline-block; vertical-align:top} table.pychess td {margin:0; padding:0; font-size:10pt; font-family:"Chess Alpha 2"; padding-left:.5em} table.pychess td.numa {width:0; text-align:right} table.pychess td.numa {width:0; text-align:right; padding-left:1em} table.pychess td.status {text-align:center; font-size:12pt; padding-right:2em} table.pychess pre {margin:0; padding:0; font-family:"Chess Alpha 2"; font-size:16pt; text-align:center; line-height:1} """