示例#1
0
def cleanHTML(text, skipchars=[], extra_careful=True):
    '''This is an attempt to get rid of " ä " etc within a string
Still working on it ... any help appreicated.'''

    entitydefs_inverted = {}

    for k, v in entitydefs.iteritems():
        entitydefs_inverted[v] = k

    badchars_regex = re.compile('|'.join(entitydefs.values()))
    been_fixed_regex = re.compile('&\w+;|&#[0-9]+;')

    # if extra_careful we don't attempt to do anything to
    # the string if it might have been converted already.
    if extra_careful and been_fixed_regex.findall(text):
        return text

    if type(skipchars) == type('s'):
        skipchars = [skipchars]

    keyholder = {}
    for x in badchars_regex.findall(text):
        if x not in skipchars:
            keyholder[x] = 1
    text = text.replace('&', '&')
    text = text.replace('\x80', '€')
    for key in keyholder.keys():
        if key == '&':
            continue

        better = entitydefs_inverted[key]
        if not better.startswith('&#'):
            better = '&%s;' % entitydefs_inverted[each]

        text = text.replace(key, better)
    return text
示例#2
0
def cleanHTML(text, skipchars=[], extra_careful=True):
	'''This is an attempt to get rid of " ä " etc within a string
Still working on it ... any help appreicated.'''

	entitydefs_inverted = {}

	for k,v in entitydefs.iteritems():
		entitydefs_inverted[v] = k

	badchars_regex = re.compile('|'.join(entitydefs.values()))
	been_fixed_regex = re.compile('&\w+;|&#[0-9]+;')

	# if extra_careful we don't attempt to do anything to
	# the string if it might have been converted already.
	if extra_careful and been_fixed_regex.findall(text):
		return text

	if type(skipchars) == type('s'):
		skipchars = [skipchars]

	keyholder= {}
	for x in badchars_regex.findall(text):
		if x not in skipchars:
			keyholder[x] = 1
	text = text.replace('&','&')
	text = text.replace('\x80', '€')
	for key in keyholder.keys():
		if key == '&':
			continue

		better = entitydefs_inverted[key]
		if not better.startswith('&#'):
			better = '&%s;' % entitydefs_inverted[each]

		text = text.replace(key, better)
	return text
示例#3
0
            if math.floor(size) == size:
                return "%d %s" % (int(size), final_unit)
            else:
                return "%3.1f %s" % (size, final_unit)

        if unit != "Yotta":
            size /= 1024.0

    return "%3.1f %s" % (size, final_unit)


entitydefs_inverted = {}
for k, v in entitydefs.items():
    entitydefs_inverted[v] = k

_badchars_regex = re.compile("|".join(entitydefs.values()))
_been_fixed_regex = re.compile("&\w+;|&#[0-9]+;")


def html_entity_fixer(text, skipchars=[], extra_careful=1):
    # if extra_careful we don't attempt to do anything to
    # the string if it might have been converted already.
    if extra_careful and _been_fixed_regex.findall(text):
        return text

    if type(skipchars) == type("s"):
        skipchars = [skipchars]

    keyholder = []
    for char in _badchars_regex.findall(text):
        if char not in skipchars:
示例#4
0
                joined = '%s<%s %s>' % (part1, tag, attribute)
                joined += '%s%s</%s>%s</p>' % (dashes, _p_splitted[0], tag,
                                               _p_splitted[1])
            else:
                joined = '%s<%s %s>%s%s</%s>' % (part1, tag, attribute, dashes,
                                                 part2, tag)
            return joined
        return text


def niceboolean(value):
    falseness = ('', 'no', 'off', 'false', 'none', '0', 'f', 'n')
    return str(value).lower().strip() not in falseness


_badchars_regex = re.compile('|'.join(entitydefs.values()))
_been_fixed_regex = re.compile('&\w+;|&#[0-9]+;')


def html_entity_fixer(text, skipchars=[], extra_careful=1):
    """ return a text properly html fixed """
    if not text:
        # then don't even begin to try to do anything
        return text

    # if extra_careful we don't attempt to do anything to
    # the string if it might have been converted already.
    if extra_careful and _been_fixed_regex.findall(text):
        return text

    if isinstance(skipchars, basestring):
示例#5
0
            part.__init__(self, 'tr', style=style, attributes=attributes)
            self.addPart('th', content=self.text)
            self.addPart('td', content=self.field)


# need some functions for HTML
# ought to be somewhere else in Python?
# cgi.escape only seems to do <, >, and &
from htmlentitydefs import entitydefs
import re

entitydefs_inverted = {}
for k, v in entitydefs.items():
    entitydefs_inverted[v] = k

needencoding = re.compile('|'.join(entitydefs.values()))
alreadyencoded = re.compile('&\w+;|&#[0-9]+;')


# encodes any special characters to their HTML equivalents
def encode(text, skip=None, once_only=1):
    # if extra_careful, check to see if this text has already been converted
    if not (once_only and alreadyencoded.findall(text)):
        if not isinstance(skip, list):
            skip = [skip]

        # do ampersands on their own or we might end up converting our conversions
        if '&' not in skip:
            text = text.replace('&', '&amp;')
            skip.append('&')
示例#6
0
            self.addPiece(self.text)
            self.addPiece(self.field)
        else:
            part.__init__(self, 'tr', style=style, attributes=attributes)
            self.addPart('th', content=self.text)
            self.addPart('td', content=self.field)


from htmlentitydefs import entitydefs
import re

entitydefs_inverted = {}
for k,v in entitydefs.items():
    entitydefs_inverted[v] = k

needencoding = re.compile('|'.join(entitydefs.values()))
alreadyencoded = re.compile('&\w+;|&#[0-9]+;')

#need some functions for HTML
#ought to be somewhere else in Python?
#cgi.escape only seems to do <, >, and &

#encodes any special characters to their HTML equivalents
def encode(text, skip=None, once_only=1):
    # if extra_careful, check to see if this text has already been converted
    if not (once_only and alreadyencoded.findall(text)):
        if not isinstance(skip, list):
            skip = [skip]

        #do ampersands on their own or we might end up converting our conversions
        if '&' not in skip: