Пример #1
0
def smart_urlquote(url):
    try:
        scheme, netloc, path, query, fragment = urlsplit(url)
        try:
            netloc = netloc.encode('idna').decode('ascii')
        except UnicodeError:
            pass
        else:
            url = urlunsplit((scheme, netloc, path, query, fragment))
    except ValueError:
        pass

    if '%' not in url or _unquoted_percents_re.search(url):
        url = quote(encoding.force_bytes(url), safe=b'!*\'();:@&=+$,/?#[]~')

    return encoding.force_text(url)
Пример #2
0
def urlize(text):
    words = _word_split_re.split(encoding.force_text(text))

    opening_re = lambda s: re.compile('(^%s)' % s)
    closing_re = lambda s: re.compile('(%s$)' % s)

    for i, word in enumerate(words):
        match = None
        if '.' in word or '@' in word or ':' in word:
            lead, middle, trail = '', word, ''
            
            for opening, closing in _WRAPPING_PUNCTUATION:
                op_m = opening_re(opening).split(middle)
                middle = op_m[-1]
                if len(op_m) > 1:
                    lead = lead + op_m[1]

                clo_m = closing_re(closing).split(middle)
                opening_count = len(re.findall(opening, middle))
                closing_count = len(re.findall(closing, middle))
                if closing_count == opening_count + 1:
                    middle = clo_m[0]
                    if len(clo_m) > 1:
                        trail = clo_m[1] + trail

            for punctuation in _TRAILING_PUNCTUATION:
                m = closing_re(punctuation).split(middle)
                middle = m[0]
                if len(m) > 1:
                    trail = m[1] + trail

            url = None
            if _simple_url_re.match(middle):
                url = smart_urlquote(middle)
            elif _simple_url_2_re.match(middle):
                url = smart_urlquote('http://%s' % middle)

            if url:
                middle = '<a target="_blank" href="%s">%s</a>' % (url, middle)
                words[i] = '%s%s%s' % (lead, middle, trail)
            else:
                words[i] = word

    return ''.join(words)