Пример #1
0
    def __call__(self):
        text = self.orig
        if text is None:
            text = ''
        if not isinstance(text, unicode):
            text = unicode(text, 'utf-8', 'replace')

        # Do & separately, else, it may replace an already-inserted & from
        # an entity with &amp;, so < becomes &lt; becomes &amp;lt;
        text = text.replace('&', '&amp;')
        # Make funny characters into html entity defs
        for entity, letter in entitydefs.items():
            if entity != 'amp':
                text = text.replace(
                    letter.decode('latin-1'), '&' + entity + ';')

        text = self.urlRegexp.subn(self.replaceURL, text)[0]
        text = self.emailRegexp.subn(self.replaceEmail, text)[0]
        text = self.indentRegexp.subn(self.indentWhitespace, text)[0]

        # convert windows line endings
        text = text.replace('\r\n', '\n')
        # Finally, make \n's into br's
        text = text.replace('\n', '<br />')

        text = text.encode('utf-8')

        return text
Пример #2
0
    def __call__(self):
        text = self.orig
        if text is None:
            text = ''
        if not isinstance(text, unicode):
            text = unicode(text, 'utf-8', 'replace')

        # Do &amp; separately, else, it may replace an already-inserted & from
        # an entity with &amp;, so < becomes &lt; becomes &amp;lt;
        text = text.replace('&', '&amp;')
        # Make funny characters into html entity defs
        for entity, letter in entitydefs.items():
            if entity != 'amp':
                text = text.replace(letter.decode('latin-1'),
                                    '&' + entity + ';')

        text = self.urlRegexp.subn(self.replaceURL, text)[0]
        text = self.emailRegexp.subn(self.replaceEmail, text)[0]
        text = self.indentRegexp.subn(self.indentWhitespace, text)[0]

        # convert windows line endings
        text = text.replace('\r\n', '\n')
        # Finally, make \n's into br's
        text = text.replace('\n', '<br />')

        text = text.encode('utf-8')

        return text
        def _parse_dates(self):
            '''

                Also, strptime returns datetime objects when 
                we really want date objects.

                Possible formats:
                * Friday, July 1, 2011 > Single
                * October 10 - 21, 2011 > Duration - Same Month
                * June 27 - August 26, 2011 > Duration - Different Month
                * October 17, 2011 - January 13, 2012 
                    >  Duration - Different month and year
                * March 21, 2011 - January 8, 2012 
                    and October 24, 2011 - January 8, 2012
                    > Multiple durations
            '''
            
            # Clean Up
            ## Replace any HTML entities and codes with a <space>
            for entity,code in entitydefs.items():
                self._when_text = self._when_text.replace('&'+entity+';', ' ')
                self._when_text = self._when_text.replace(code, ' ')
            ## Force to ascii
            self._when_text = self._when_text.encode('ascii', 'replace')
            ## Replace \xFFFD with <space>
            self._when_text = self._when_text.replace(u'\xFFFD', ' ')
            ## Normalize spacing
            self._when_text = re.sub('\s+', ' ', self._when_text)

            for duration in self._when_text.split('and'):
                date_split = duration.strip().split('-')

                start_date = None
                start_text = date_split[0].strip()
                end_date   = None
                
                if len(date_split) == 1:
                    try:
                        start_date = datetime.strptime(start_text, 
                            UCFAcademicCalendar._Event._DF_SINGLE)
                    except ValueError, e:
                        log.error('Unable to parse single date: ' + start_text)
                        raise UCFAcademicCalendar._Event.ParsingError()
                elif len(date_split) == 2:
                    end_text   = date_split[1].strip()
                    same_month = False

                    try: # Month Day Year
                        end_date = datetime.strptime(end_text,
                            UCFAcademicCalendar._Event._DF_MONTH_DAY_YEAR)
                    except ValueError, e:
                        try: # Day, Year
                            end_date = datetime.strptime(end_text,\
                                UCFAcademicCalendar._Event._DF_DAY_YEAR)
                            same_month = True
                        except ValueError, e:
                            log.error('Unable to parse start date: ' + end_text)
                            raise UCFAcademicCalendar._Event.ParsingFailure()
Пример #4
0
    def __entitytoletter(self, s):
        newdefs = []
        for k, v in entitydefs.items():
            newdefs.append(('&'+k+';', v))

        for k, v in newdefs:
            s = s.replace(k, v)

        return s
    def _expand_entities(self, body):
        body = body.replace('&nbsp;', ' ')
        for entity, letter in entitydefs.items():
            # Let plone.intelligenttext handle &lt; and &gt;, or else we may be
            # creating what looks like tags.
            if entity != 'lt' and entity != 'gt':
                body = body.replace('&' + entity + ';',
                                    letter.decode('latin-1'))

        return body
Пример #6
0
    def convert(self, orig, data, **kwargs):
        # "encoding" is both the encoding of orig, and the expected encoding of
        # the data in data.
        if kwargs['encoding'] not in ['utf-8', 'utf_8', 'U8', 'UTF', 'utf8']:
            raise ValueError('Only support unicode, not %s' % kwargs['encoding'])

        if os.path.isfile(TTM_BINARY):
            p = subprocess.Popen(
                [
                    TTM_BINARY,
                    '-a',   # Try to convert picture elements
                    '-e3',  # inline epsfbox w/no icon
                    '-r',   # Don't output a pre/postamble
                    '-u2',   # Unicode please
                ],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
            )
            (out, err) = p.communicate(input=(LATEX_PREAMBLE + orig + LATEX_POSTAMBLE))
            if '****' in err:
                # Probably an error, show it.
                data.setData('<pre class="ttm-output error">%s</pre>\n<div class="ttm-output">%s</div>' % (
                    cgi.escape(err.strip()),
                    out.strip(),
                ))
            else:
                data.setData('<div class="ttm-output">%s</div>' % (
                    out.strip(),
                ))
        else:
            out = convertWebIntelligentPlainTextToHtml(orig.decode(kwargs['encoding'])).decode('utf8')
            # Bodge back entities, to save space
            from htmlentitydefs import entitydefs
            for entity, letter in entitydefs.items():
                if entity not in ('amp', 'lt', 'gt',):
                    out = out.replace('&' + entity + ';', letter.decode('latin-1'))
            data.setData('<div class="parse-as-tex">%s</div>' % out.encode(kwargs['encoding']))
        return data
Пример #7
0
def cleanHTML(text, skipchars=[], extra_careful=1):
# This is an attempt to get rid of " &auml; " etc within a string
# Still working on it ... any help appreicated.

	entitydefs_inverted = {}

	for k,v in entitydefs.items():
		entitydefs_inverted[v] = k

	_badchars_regex = re.compile('|'.join(entitydefs.values()))
	_been_fixed_regex = re.compile('&\w+;|&#[0-9]+;')

	# if extra_careful we don't attempt to do anything to
	# the string if it might have been converted already.
	if extra_careful and _been_fixed_regex.findall(text):
		return text

	if type(skipchars) == type('s'):
		skipchars = [skipchars]

	keyholder= {}
	for x in _badchars_regex.findall(text):
		if x not in skipchars:
			keyholder[x] = 1
	text = text.replace('&','&amp;')
	text = text.replace('\x80', '&#8364;')
	for each in keyholder.keys():
		if each == '&':
			continue

		better = entitydefs_inverted[each]
		if not better.startswith('&#'):
			better = '&%s;'%entitydefs_inverted[each]

		text = text.replace(each, better)
	return text
Пример #8
0
match_movie_url = re.compile(r""".*\.(mpg|mpeg|avi)$""", re.I).match

match_movie = re.compile("^(\x00\x00\x01\xba|RIFF....AVI LIST)").match

match_enum_url = re.compile("(.*?)(\d+)(\.\w{3,4})?$").match
	
enum_blacklisted = re.compile("^http://[^\.]*.yimg.com").match

	

# matches html entities
from htmlentitydefs import entitydefs

entities = []
for (e, s) in entitydefs.items():
	if len(s) == 1: entities.append(e)	
	
entity_sub = re.compile("&(%s);" % '|'.join(entities)).sub
get_match_entitydef = lambda match: entitydefs[match.group(1)]

#unescapes html entities
def unentify(s):
    return entity_sub(get_match_entitydef, s)
    
import Queue
	
class MultiQueue(Queue.Queue):

    def _init(self, (maxq, maxsize)):
        self.maxsize = maxsize
Пример #9
0
        if size < 1024.0:
            import math

            if math.floor(size) == size:
                return "%d %s" % (int(size), final_unit)
            else:
                return "%3.1f %s" % (size, final_unit)

        if unit != "Yotta":
            size /= 1024.0

    return "%3.1f %s" % (size, final_unit)


entitydefs_inverted = {}
for k, v in entitydefs.items():
    entitydefs_inverted[v] = k

_badchars_regex = re.compile("|".join(entitydefs.values()))
_been_fixed_regex = re.compile("&\w+;|&#[0-9]+;")


def html_entity_fixer(text, skipchars=[], extra_careful=1):
    # if extra_careful we don't attempt to do anything to
    # the string if it might have been converted already.
    if extra_careful and _been_fixed_regex.findall(text):
        return text

    if type(skipchars) == type("s"):
        skipchars = [skipchars]
Пример #10
0
from math import floor
from htmlentitydefs import entitydefs
import warnings, pytz
import logging

# import line used by textify
import formatter, htmllib, StringIO

try:
    import markdown
    markdown_converter = markdown.Markdown(safe_mode="escape")
except ImportError:
    markdown_converter = None

entitydefs_inverted = {}
for k, v in entitydefs.items():
    entitydefs_inverted[v] = k

# zope
from Products.PythonScripts.standard import html_quote, newline_to_br, \
         url_quote, url_quote_plus

try:
    # >= Zope 2.12
    from zope.structuredtext import stx2html
except ImportError:
    # < Zope 2.12
    from StructuredText.StructuredText import HTML as stx2html


def structured_text(txt):
Пример #11
0
referlink = re.compile(r"""(http|ftp)://(.*?)(http|ftp)://""", re.I).match

match_movie_url = re.compile(r""".*\.(mpg|mpeg|avi)$""", re.I).match

match_movie = re.compile("^(\x00\x00\x01\xba|RIFF....AVI LIST)").match

match_enum_url = re.compile("(.*?)(\d+)(\.\w{3,4})?$").match

enum_blacklisted = re.compile("^http://[^\.]*.yimg.com").match

# matches html entities
from htmlentitydefs import entitydefs

entities = []
for (e, s) in entitydefs.items():
    if len(s) == 1: entities.append(e)

entity_sub = re.compile("&(%s);" % '|'.join(entities)).sub
get_match_entitydef = lambda match: entitydefs[match.group(1)]


#unescapes html entities
def unentify(s):
    return entity_sub(get_match_entitydef, s)


import Queue


class MultiQueue(Queue.Queue):
def decodeentities(string):
    for (htmlent, ch) in entitydefs.items():
	string = string.replace('&'+htmlent+';', ch)
    return string
Пример #13
0
entitydefs['nbsp'] = ' '

sgmlentity = {
    'lt': '<',
    'gt': '>',
    'amp': '&',
    'quot': '"',
    'apos': '\'',
    'ndash': '-'
}
sgmlentityget = sgmlentity.get
_sgmlentkeys = sgmlentity.keys()

entcharrefs = {}
entcharrefsget = entcharrefs.get
for _k, _v in entitydefs.items():
    if _k in _sgmlentkeys: continue
    if _v[0:2] == '&#':
        dec_code = _v[1:-1]
        _v = unichr(int(_v[2:-1]))
        entcharrefs[dec_code] = _v
    else:
        dec_code = '#' + str(ord(_v))
        _v = unicode(_v, 'latin_1', 'replace')
        entcharrefs[dec_code] = _v
    entcharrefs[_k] = _v
del _sgmlentkeys, _k, _v
entcharrefs['#160'] = u' '
entcharrefs['#xA0'] = u' '
entcharrefs['#xa0'] = u' '
entcharrefs['#XA0'] = u' '
Пример #14
0
                         ("gacute", 501), ("Hacek", 711), ("Breve", 728),
                         ("DiacriticalDot", 729), ("ring", 730), ("ogon", 731),
                         ("DiacriticalTilde", 732),
                         ("DiacriticalDoubleAcute", 733), ("DownBreve", 785),
                         ("UnderBar", 818), ("varepsilon", 949),
                         ("varsigma", 962), ("varphi", 966), ("vartheta", 977),
                         ("Upsi", 978), ("straightphi", 981), ("varpi", 982),
                         ("Gammad", 988), ("digamma", 989), ("varkappa", 1008),
                         ("varrho", 1009), ("straightepsilon", 1013),
                         ("backepsilon", 1014)]

from htmlentitydefs import entitydefs

entitydefs['apos'] = "'"  # only XML entity that isn't an HTML entity as well
List_HTML_Entities = []
for name, value in entitydefs.items():
    List_HTML_Entities.append((name, value[2:-1] or str(ord(value))))
for (name, value) in List_MathML2_Entities:
    assert name not in entitydefs
    List_HTML_Entities.append((name, str(value)))

########## CSS properties ##########

List_CSS_Props = "color display font font-family font-size font-style font-weight list-style margin margin-bottom margin-left margin-right margin-top max-width opacity padding padding-bottom padding-left padding-right padding-top page-break-after page-break-before text-align text-decoration text-indent text-underline white-space word-wrap"

########## CSS colors ##########

# array of name/value for css colors, value is what goes inside MKRGB()
# based on https://developer.mozilla.org/en/CSS/color_value
# TODO: add more colors
List_CSS_Colors = [
Пример #15
0
def convertWebIntelligentPlainTextToHtml(orig, tab_width=4):
    """Converts text/x-web-intelligent to text/html
    """
    # very long urls are abbreviated to allow nicer layout
    def abbreviateUrl(url, max = 60,  ellipsis = "[&hellip;]"):
        if len(url) < max:
            return url
        protocolend = url.find("//")
        if protocolend == -1:
            protocol = ""
        else:
            protocol = url[0 : protocolend+2]
            url = url[protocolend+2 : ]
        list = url.split("/")
        if len(list) < 3 or len(list[0])+len(list[-1] )>max:
            url = protocol + url
            center = (max-5)/2
            return url[:center] + ellipsis + url[-center:]
        
        return protocol + list[0] +"/" +ellipsis + "/" + list[-1]

    urlRegexp = re.compile(r'((?:ftp|https?)://(localhost|([12]?[0-9]{1,2}.){3}([12]?[0-9]{1,2})|(?:[a-z0-9](?:[-a-z0-9]*[a-z0-9])?\.)+(?:com|edu|biz|org|gov|int|info|mil|net|name|museum|coop|aero|[a-z][a-z]))\b(?::\d+)?(?:\/[^"\'<>()\[\]{}\s\x7f-\xff]*(?:[.,?]+[^"\'<>()\[\]{}\s\x7f-\xff]+)*)?)', re.I|re.S|re.U)
    emailRegexp = re.compile(r'["=]?(\b[A-Z0-9._%-]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)', re.I|re.S|re.U)
    indentRegexp = re.compile(r'^(\s+)', re.M|re.U)
    
    text = orig
    if text is None:
        text = ''
    if not isinstance(text, unicode):
        text = unicode(text, 'utf-8', 'replace')
    
    # Do &amp; separately, else, it may replace an already-inserted & from
    # an entity with &amp;, so < becomes &lt; becomes &amp;lt;
    text = text.replace('&', '&amp;')
    # Make funny characters into html entity defs
    for entity, letter in entitydefs.items():
        if entity != 'amp':
            text = text.replace(letter.decode('latin-1'), '&' + entity + ';')
        
    # Replace hyperlinks with clickable <a> tags
    def replaceURL(match):
        url = match.groups()[0]
        linktext = abbreviateUrl(url)
        # In urls we need the revert our earlier change to the ampersands.
        # We do not want something like:
        # http://google.com/ask?question=everything&amp;answer=42
        url = url.replace('&amp;', '&')
        # Also with <some link> we should only link to some link, not
        # including the brackets.
        end = ''
        # XXX Probably better to fix the regex above.  Maurits
        if url.endswith('&gt;'):
            url = url[:-len('&gt;')]
            linktext = linktext[:-len('&gt;')]
            end = '&gt;'

        # rel="nofollow" shall avoid spamming
        return '<a href="%s" rel="nofollow">%s</a>%s' % (url, linktext, end)
    text = urlRegexp.subn(replaceURL, text)[0]
    
    # Replace email strings with mailto: links
    def replaceEmail(match):
        url = match.groups()[0]
        # following unicode substitutions shall avoid email spam crawlers to pickup email addresses
        url = url.replace('@', '&#0064;');
        return '<a href="&#0109;ailto&#0058;%s">%s</a>' % (url, url)
    text = emailRegexp.subn(replaceEmail, text)[0]

    # Make leading whitespace on a line into &nbsp; to preserve indents
    def indentWhitespace(match):
        indent = match.groups()[0]
        indent = indent.replace(' ', '&nbsp;')
        return indent.replace('\t', '&nbsp;' * tab_width)
    text = indentRegexp.subn(indentWhitespace, text)[0]
    
    # Finally, make \n's into br's
    text = text.replace('\n', '<br />')

    text = text.encode('utf-8')
    
    return text
Пример #16
0
def _build_entiries_re():
    p=[]
    for k,v in entitydefs.items():
        if v.startswith('&'): continue
        p.append(re.escape(k))
    return re.compile("&(%s);" % "|".join(p), re.I)
Пример #17
0
			return string.split(pwd.getpwnam(user.id)[4], ",")[0]
		except: pass
	return ""


def urlquote(s): return urllib.quote_plus(s, "")
def urlunquote(s): return urllib.unquote_plus(s)

def quotedtext(text):
	def quotedline(line):
		return "> %s" % line
	return string.join(map(quotedline, string.split(text, "\n")), "\n")


replace_char = {"\n": "<br>\n", "\t": "&nbsp;" * 4}
for key, value in entitydefs.items():
    replace_char[value] = "&%s;" % key


def escape(s, spaces=0):
	nbsp = entitydefs["nbsp"]
	last = ""
	new = StringIO()
	for char in s:
		if spaces and char == " " and last in [" ", nbsp]:
			char = nbsp
		new.write(replace_char.get(char, char))
		last = char
	newstr = new.getvalue()
	if spaces and newstr and newstr[0] == " ":
	    return "&nbsp;%s" % newstr[1:]
Пример #18
0
def convertHtmlToWebIntelligentPlainText(orig):
    """Converts text/html to text/x-web-intelligent.
    """
    preRegex = re.compile(r'<\s*pre[^>]*>(.*?)<\s*/pre\s*>', re.I | re.S)
    
    tagWhitespaceRegex = re.compile(r'\s+((<[^>]+>)\s+)+')
    whitespaceRegex = re.compile(r'\s+')
    
    tdRegex = re.compile(r'<\s*(td)([^>])*>', re.I)
    breakRegex = re.compile(r'<\s*(br)\s*/?>', re.I)
    startBlockRegex = re.compile(r'<\s*(dt)[^>]*>', re.I)
    endBlockRegex = re.compile(r'<\s*/\s*(p|div|tr|ul|ol|dl)[^>]*>', re.I)
    indentBlockRegex = re.compile(r'<\s*(blockquote|dd)[^>]*>', re.I)
    listBlockRegex = re.compile(r'<\s*(li)[^>]*>', re.I)

    tagRegex = re.compile(r'<[^>]+>', re.I | re.M)

    # Save all <pre> sections and restore after other transforms
    preSections = {}
    def savePres(match):
        marker = '__pre_marker__%d__' % (len(preSections),)
        preSections[marker] = match.group(1)
        return marker
    if orig is None:
        orig = ''
    text = preRegex.sub(savePres, orig)

    # Make whitespace-tag-whitespace into whitespace-tag. Repeat this 
    # in case there are directly nested tags
    def fixTagWhitespace(match):
        # Remove any superfluous whitespace, but preserve one leading space
        return ' ' + whitespaceRegex.sub('', match.group(0))
    text = tagWhitespaceRegex.sub(fixTagWhitespace, text)

    # Make all whitespace into a single space
    text = whitespaceRegex.sub(' ', text)

    # Fix entities
    text = text.replace('&nbsp;', ' ')
    for entity, letter in entitydefs.items():
        # Do &lt; and &gt; later, else we may be creating what looks like 
        # tags
        if entity != 'lt' and entity != 'gt':
            text = text.replace('&' + entity + ';', letter)

    # XXX: Remove <head>, <script>, <style> ?

    # Make tabs out of td's
    text = tdRegex.sub('\t', text)

    # Make br's and li's into newlines
    text = breakRegex.sub('\n', text)

    # Make the start of list blocks into paragraphs
    text = startBlockRegex.sub('\n\n', text)

    # Make the close of p's, div's and tr's into paragraphs
    text = endBlockRegex.sub('\n\n', text)

    # Make blockquotes and dd blocks indented
    text = indentBlockRegex.sub('\n\n  ', text)

    # Make list items indented and prefixed with -
    text = listBlockRegex.sub('\n\n  - ', text)

    # Remove other tags
    text = tagRegex.sub('', text)

    # Fix < and > entities
    text = text.replace('&lt;', '<')
    text = text.replace('&gt;', '>')

    # Restore pres
    for marker, section in preSections.items():
        text = text.replace(marker, '\n\n' + section + '\n\n')
    
    return text
Пример #19
0
#!/usrbin/env python
# -*- coding: us-ascii -*-
# vim: syntax=python
#
# 2-clause BSD license
# Copyright 2009 Noriyuki Hosaka [email protected]
#

import sys
import os.path
from htmlentitydefs import entitydefs

from tonic.lineparser import LineParser

_d = dict([(value, '&'+key+';') for key, value in entitydefs.items()])

def escape(s):
  return ''.join([_d.get(c, c) for c in s])

def resource(name):
  if not name.startswith(os.path.sep):
    name = os.path.join(os.path.split(
                      os.path.abspath(__file__))[0], name)
  f = file(name, 'rb')
  try:
    return f.read(-1)
  finally:
    f.close()

class Formatter(LineParser):
  css_src = resource('python.css')
Пример #20
0
                        lastKey=lastKey)


# Handle HTML/XML/SGML entities.
from htmlentitydefs import entitydefs
entitydefs = entitydefs.copy()
entitydefsget = entitydefs.get
entitydefs['nbsp'] = ' '

sgmlentity = {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
sgmlentityget = sgmlentity.get
_sgmlentkeys = sgmlentity.keys()

entcharrefs = {}
entcharrefsget = entcharrefs.get
for _k, _v in entitydefs.items():
    if _k in _sgmlentkeys: continue
    if _v[0:2] == '&#':
        dec_code = _v[1:-1]
        _v = unichr(int(_v[2:-1]))
        entcharrefs[dec_code] = _v
    else:
        dec_code = '#' + str(ord(_v))
        _v = unicode(_v, 'latin_1', 'replace')
        entcharrefs[dec_code] = _v
    entcharrefs[_k] = _v
del _sgmlentkeys, _k, _v
entcharrefs['#160'] = u' '
entcharrefs['#xA0'] = u' '
entcharrefs['#xa0'] = u' '
entcharrefs['#XA0'] = u' '
Пример #21
0
def htmlEncode(s):
    _entitymap = dict((val, key) for (key,val) in entitydefs.items())
    return ''.join("&" + _entitymap[ch] + ";"
    if ch in _entitymap or str(ch) in _entitymap
    else ch
    for ch in s)
Пример #22
0
########## HTML and XML entities ##########

Template_Entities_Comment = """\
// map of entity names to their Unicode runes, cf.
// http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
// and http://www.w3.org/TR/MathML2/bycodes.html
"""

# selection of MathML2 entities that aren't HTML entities
List_MathML2_Entities = [("DoubleDot", 168), ("OverBar", 175), ("PlusMinus", 177), ("Cedilla", 184), ("Amacr", 256), ("amacr", 257), ("Abreve", 258), ("abreve", 259), ("Aogon", 260), ("aogon", 261), ("Cacute", 262), ("cacute", 263), ("Ccirc", 264), ("ccirc", 265), ("Cdot", 266), ("cdot", 267), ("Ccaron", 268), ("ccaron", 269), ("Dcaron", 270), ("dcaron", 271), ("Dstrok", 272), ("dstrok", 273), ("Emacr", 274), ("emacr", 275), ("Edot", 278), ("edot", 279), ("Eogon", 280), ("eogon", 281), ("Ecaron", 282), ("ecaron", 283), ("Gcirc", 284), ("gcirc", 285), ("Gbreve", 286), ("gbreve", 287), ("Gdot", 288), ("gdot", 289), ("Gcedil", 290), ("Hcirc", 292), ("hcirc", 293), ("Hstrok", 294), ("hstrok", 295), ("Itilde", 296), ("itilde", 297), ("Imacr", 298), ("imacr", 299), ("Iogon", 302), ("iogon", 303), ("Idot", 304), ("IJlig", 306), ("ijlig", 307), ("Jcirc", 308), ("jcirc", 309), ("Kcedil", 310), ("kcedil", 311), ("kgreen", 312), ("Lacute", 313), ("lacute", 314), ("Lcedil", 315), ("lcedil", 316), ("Lcaron", 317), ("lcaron", 318), ("Lmidot", 319), ("lmidot", 320), ("Lstrok", 321), ("lstrok", 322), ("Nacute", 323), ("nacute", 324), ("Ncedil", 325), ("ncedil", 326), ("Ncaron", 327), ("ncaron", 328), ("napos", 329), ("ENG", 330), ("eng", 331), ("Omacr", 332), ("omacr", 333), ("Odblac", 336), ("odblac", 337), ("Racute", 340), ("racute", 341), ("Rcedil", 342), ("rcedil", 343), ("Rcaron", 344), ("rcaron", 345), ("Sacute", 346), ("sacute", 347), ("Scirc", 348), ("scirc", 349), ("Scedil", 350), ("scedil", 351), ("Tcedil", 354), ("tcedil", 355), ("Tcaron", 356), ("tcaron", 357), ("Tstrok", 358), ("tstrok", 359), ("Utilde", 360), ("utilde", 361), ("Umacr", 362), ("umacr", 363), ("Ubreve", 364), ("ubreve", 365), ("Uring", 366), ("uring", 367), ("Udblac", 368), ("udblac", 369), ("Uogon", 370), ("uogon", 371), ("Wcirc", 372), ("wcirc", 373), ("Ycirc", 374), ("ycirc", 375), ("Zacute", 377), ("zacute", 378), ("Zdot", 379), ("zdot", 380), ("Zcaron", 381), ("zcaron", 382), ("imped", 437), ("gacute", 501), ("Hacek", 711), ("Breve", 728), ("DiacriticalDot", 729), ("ring", 730), ("ogon", 731), ("DiacriticalTilde", 732), ("DiacriticalDoubleAcute", 733), ("DownBreve", 785), ("UnderBar", 818), ("varepsilon", 949), ("varsigma", 962), ("varphi", 966), ("vartheta", 977), ("Upsi", 978), ("straightphi", 981), ("varpi", 982), ("Gammad", 988), ("digamma", 989), ("varkappa", 1008), ("varrho", 1009), ("straightepsilon", 1013), ("backepsilon", 1014)]

from htmlentitydefs import entitydefs
entitydefs['apos'] = "'" # only XML entity that isn't an HTML entity as well
List_HTML_Entities = []
for name, value in entitydefs.items():
	List_HTML_Entities.append((name, value[2:-1] or str(ord(value))))
for (name, value) in List_MathML2_Entities:
	assert name not in entitydefs
	List_HTML_Entities.append((name, str(value)))

########## CSS properties ##########

List_CSS_Props = "color display font font-family font-size font-style font-weight list-style margin margin-bottom margin-left margin-right margin-top max-width padding padding-bottom padding-left padding-right padding-top page-break-after page-break-before text-align text-decoration text-indent text-underline white-space word-wrap"

########## CSS colors ##########

# array of name/value for css colors, value is what goes inside MKRGB()
# based on https://developer.mozilla.org/en/CSS/color_value
# TODO: add more colors
List_CSS_Colors = [
Пример #23
0
def decodeentities(string):
    for (htmlent, ch) in entitydefs.items():
        string = string.replace('&' + htmlent + ';', ch)
    return string