from cStringIO import StringIO except ImportError: from StringIO import StringIO from london.utils.encoding import force_unicode from london.utils.functional import allow_lazy, SimpleLazyObject from london.utils.translation import ugettext_lazy, ugettext as _, pgettext try: unicode except NameError: unicode = str # Capitalizes the first letter of a string. capfirst = lambda x: x and force_unicode(x)[0].upper() + force_unicode(x)[1:] capfirst = allow_lazy(capfirst, unicode) # Set up regular expressions re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U) re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>') def wrap(text, width): """ A word-wrap function that preserves existing line breaks and most spaces in the text. Expects that existing line breaks are posix newlines. """ text = force_unicode(text) def _generator(): it = iter(text.split(' ')) word = it.next()
__Y2 = r'(?P<year>\d{2})' __T = r'(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})' RFC1123_DATE = re.compile(r'^\w{3}, %s %s %s %s GMT$' % (__D, __M, __Y, __T)) RFC850_DATE = re.compile(r'^\w{6,9}, %s-%s-%s %s GMT$' % (__D, __M, __Y2, __T)) ASCTIME_DATE = re.compile(r'^\w{3} %s %s %s %s$' % (__M, __D2, __T, __Y)) def urlquote(url, safe='/'): """ A version of Python's urllib.quote() function that can operate on unicode strings. The url is first UTF-8 encoded before quoting. The returned string can safely be used as part of an argument to a subsequent iri_to_uri() call without double-quoting occurring. """ return force_unicode(urllib.quote(smart_str(url), smart_str(safe))) urlquote = allow_lazy(urlquote, unicode) def urlquote_plus(url, safe=''): """ A version of Python's urllib.quote_plus() function that can operate on unicode strings. The url is first UTF-8 encoded before quoting. The returned string can safely be used as part of an argument to a subsequent iri_to_uri() call without double-quoting occurring. """ return force_unicode(urllib.quote_plus(smart_str(url), smart_str(safe))) urlquote_plus = allow_lazy(urlquote_plus, unicode) def urlencode(query, doseq=0): """ A version of Python's urllib.urlencode() function that can operate on unicode strings. The parameters are first case to UTF-8 encoded strings and
punctuation_re = re.compile('^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % \ ('|'.join([re.escape(x) for x in LEADING_PUNCTUATION]), '|'.join([re.escape(x) for x in TRAILING_PUNCTUATION]))) simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$') link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+') html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE) hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL) trailing_empty_content_re = re.compile(r'(?:<p>(?: |\s|<br \/>)*?</p>\s*)+\Z') del x # Temporary variable def escape(html): """ Returns the given HTML with ampersands, quotes and angle brackets encoded. """ return mark_safe(force_unicode(html).replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace("'", ''')) escape = allow_lazy(escape, unicode) _base_js_escapes = ( ('\\', r'\u005C'), ('\'', r'\u0027'), ('"', r'\u0022'), ('>', r'\u003E'), ('<', r'\u003C'), ('&', r'\u0026'), ('=', r'\u003D'), ('-', r'\u002D'), (';', r'\u003B'), (u'\u2028', r'\u2028'), (u'\u2029', r'\u2029') )