示例#1
0
def test_wrapping_parentheses():
    assert linkify('(example.com/)') \
        == '(<a href="http://example.com/">example.com/</a>)'
    assert linkify('example.com/hi_(there)') \
        == '<a href="http://example.com/hi_(there)">example.com/hi_(there)</a>'
    assert linkify('(example.com/hi_(there))') \
        == '(<a href="http://example.com/hi_(there)">example.com/hi_(there)</a>)'
示例#2
0
def test_wrapping_parentheses():
    assert linkify('(example.com/)') \
        == '(<a href="http://example.com/">example.com/</a>)'
    assert linkify('example.com/hi_(there)') \
        == '<a href="http://example.com/hi_(there)">example.com/hi_(there)</a>'
    assert linkify('(example.com/hi_(there))') \
        == '(<a href="http://example.com/hi_(there)">example.com/hi_(there)</a>)'
示例#3
0
def test_simple():
    assert linkify('a http://example.com link') \
        == 'a <a href="http://example.com">http://example.com</a> link'
    assert linkify('a https://ya.ru link') \
        == 'a <a href="https://ya.ru">https://ya.ru</a> link'
    assert linkify('a example.com link') \
        == 'a <a href="http://example.com">example.com</a> link'
示例#4
0
def test_simple():
    assert linkify('a http://example.com link') \
        == 'a <a href="http://example.com">http://example.com</a> link'
    assert linkify('a https://ya.ru link') \
        == 'a <a href="https://ya.ru">https://ya.ru</a> link'
    assert linkify('a example.com link') \
        == 'a <a href="http://example.com">example.com</a> link'
示例#5
0
def get_html(cell):
    text = get_text(cell)
    spans = cell.get('textFormatRuns', [])
    eff = cell.get('effectiveFormat', {'textFormat': {}})['textFormat']

    if spans:
        fmt = {**eff, **spans[0]['format']}
        out = build_tags(fmt)
        spans = spans[1:]

        previdx = 0
        while spans:
            out += text[previdx:spans[0]['startIndex']]
            previdx = spans[0]['startIndex']
            out += build_tags(fmt, True)
            fmt = {**eff, **spans[0]['format']}
            out += build_tags(fmt)
            spans = spans[1:]

        out += text[previdx:]
        out += build_tags(fmt, True)
    else:
        out = build_tags(eff)
        out += text
        out += build_tags(eff, True)

    out = out.replace("\n", "<br/>")
    out = linkify(out)
    return out
示例#6
0
def URL_rep(text):
    url_info = dict()
    extractor = URLExtract()
    urls = extractor.find_urls(text)
    if not urls:
        return url_info
    count = 0
    for url in urls:
        url_org = url
        try:
            article = Article(url)
            response = requests.get(url, verify=False, timeout=20)
            if response.status_code != 200:
                raise Exception('---------')

        except:
            temp_url = re.search('"(.*)"', linkify(url)).group(1)
            url = temp_url
            response = requests.get(url, verify=False, timeout=20)
            if response.status_code != 200:
                count += 1
                url_info[url_org] = "NA"
                continue

        try:
            article = Article(url)
            article.download()
            article.parse()
            title = "\n" + article.title + "\n" + article.text
            if not any(item
                       for item in vaccine_terms if item in title.casefold()):
                title = 'NA'
            url_info[url_org] = title

        except Exception as e:
            html = response.content
            soup = BeautifulSoup(html, "html.parser")  #features="lxml"
            matches = soup.findAll(['p', 'title'], text=True,
                                   limit=None)  #text = re.compile('vaccin')
            linkcontent = []
            for node in matches:
                if type(node) is bs4.element.Tag:
                    linkcontent.append(node.get_text())
                else:
                    linkcontent.append(node)
            if not linkcontent:
                title = 'NA'
            elif not any(item for item in vaccine_terms
                         if item in " ".join(linkcontent).casefold()):
                title = 'NA'
            else:
                title = "\n" + " ".join(linkcontent)

            url_info[url_org] = title

    return url_info
示例#7
0
文件: util.py 项目: ssilverm/boac
def process_input_from_rich_text_editor(rich_text):
    parsed = rich_text.strip()
    exclude_from_parse = {}
    now = time.time()
    for index, match in enumerate(re.findall('<a [^>]*>.*?</a>', parsed)):
        placeholder = f'placeholder-{now}-{index}'
        exclude_from_parse[placeholder] = match
        parsed = parsed.replace(match, placeholder, 1)
    parsed = linkify(parsed, {'target': '_blank'})
    for placeholder, match in exclude_from_parse.items():
        parsed = parsed.replace(placeholder, match, 1)
    return parsed
示例#8
0
def render_item(item):
    id, type, timestamp, user_id, content, media_path = item

    if type == 4:
        return ''

    inner = ''
    id_class = ''

    if user_id:
        id_class = ' %s' % user_id
        inner += '<div class="avatar"></div>\n'

    inner += '<div class="container" id="item-%s">\n' % id

    if content:
        inner += '<div class="content">%s</div>\n' % linkify(content)

    if media_path:
        media_path = media_path.replace('output/', '')
        media = ''
        if type == 2:
            media = '<a href="%s"><img src="%s" /></a>\n' % (media_path,
                                                             media_path)
        elif type == 3:
            media = '<audio controls><source src="%s" type="audio/mpeg"></audio>\n' % media_path
        inner += '<div class="media">%s</div>\n' % media

    if timestamp:
        time = datetime.fromtimestamp(timestamp).strftime('%c')
        inner += '<div class="timestamp"><a href="#item-%s">%s</a></div>\n' % (
            id, time)

    inner += '</div>\n'

    return '<div class="item%s">\n%s</div>\n' % (id_class, inner)
示例#9
0
def test_uppercase():
    assert linkify('HTTP://EXAMPLE.COM') \
        == '<a href="HTTP://EXAMPLE.COM">HTTP://EXAMPLE.COM</a>'
示例#10
0
def add_links(s):
    return linkify(s)
示例#11
0
def test_attrs():
    assert linkify('ya.ru', attrs={'rel': 'nofollow'}) \
        == '<a href="http://ya.ru" rel="nofollow">ya.ru</a>'
示例#12
0
def test_nonascii():
    assert linkify(u'http://\u5350.net/') \
        == u'<a href="http://\u5350.net/">http://\u5350.net/</a>'
示例#13
0
def test_query():
    assert linkify('xx.com/?a=hi&b=bye') \
        == '<a href="http://xx.com/?a=hi&amp;b=bye">xx.com/?a=hi&b=bye</a>'
示例#14
0
    wrapper = TextWrapper(
        break_long_words=False,
        break_on_hyphens=False,
        width=80
    )

    wrapped = ''
    for line in content.splitlines():
        prefix = ''
        matches = re.match('^\s+', line)
        if matches:
            prefix = matches.group(0)

        wrapped += ('\n' + prefix).join(wrapper.wrap(line)) + '\n'

    content = autolink.linkify(wrapped)

    # Because autolink is buggy
    content = content.replace("http://(http://", "http://")
    content = content.replace(")\">", "\">")

    # This looks weird, because it is. This is to deal with overlapping patterns.
    oldcontent = False
    while oldcontent != content:
        oldcontent = content
        content = re.sub(
            "(^|[ (])@([a-zA-Z0-9_]+)([. )]|$)", 
            "\\1<a href='https://twitter.com/\\2'>@\\2</a>\\3", 
            content,
            flags=re.M
        )
示例#15
0
def test_query():
    assert linkify('xx.com/?a=hi&b=bye') \
        == '<a href="http://xx.com/?a=hi&amp;b=bye">xx.com/?a=hi&b=bye</a>'
示例#16
0
def test_port():
    assert linkify('foo.com:8000') \
        == '<a href="http://foo.com:8000">foo.com:8000</a>'
    assert linkify('foo.com:xkcd') \
        == '<a href="http://foo.com">foo.com</a>:xkcd'
示例#17
0
def test_bad_protocols():
    assert linkify('foohttp://bar') == 'foohttp://bar'
    assert linkify('fohttp://exampl.com') \
        == 'fohttp://<a href="http://exampl.com">exampl.com</a>'
示例#18
0
def test_non_domain():
    assert linkify('ha.ha') == 'ha.ha'
示例#19
0
def test_fragment():
    assert linkify('http://xx.com/path#frag') \
        == '<a href="http://xx.com/path#frag">http://xx.com/path#frag</a>'
示例#20
0
def test_complete():
    assert linkify('https://*****:*****@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f')           \
        == '<a href="https://*****:*****@ftp.mozilla.org/x/y.exe?a=b&amp;c=d&amp;e#f">' \
           'https://*****:*****@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f</a>'
示例#21
0
def test_port():
    assert linkify('foo.com:8000') \
        == '<a href="http://foo.com:8000">foo.com:8000</a>'
    assert linkify('foo.com:xkcd') \
        == '<a href="http://foo.com">foo.com</a>:xkcd'
示例#22
0
def test_fragment():
    assert linkify('http://xx.com/path#frag') \
        == '<a href="http://xx.com/path#frag">http://xx.com/path#frag</a>'
示例#23
0
def test_punct():
    assert linkify('http://example.com.') \
        == '<a href="http://example.com">http://example.com</a>.'
    assert linkify('http://example.com,') \
        == '<a href="http://example.com">http://example.com</a>,'
示例#24
0
def test_complete():
    assert linkify('https://*****:*****@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f')           \
        == '<a href="https://*****:*****@ftp.mozilla.org/x/y.exe?a=b&amp;c=d&amp;e#f">' \
           'https://*****:*****@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f</a>'
示例#25
0
def test_bad_protocols():
    assert linkify('foohttp://bar') == 'foohttp://bar'
    assert linkify('fohttp://exampl.com') \
        == 'fohttp://<a href="http://exampl.com">exampl.com</a>'
示例#26
0
def test_non_domain():
    assert linkify('ha.ha') == 'ha.ha'
示例#27
0
def test_punct():
    assert linkify('http://example.com.') \
        == '<a href="http://example.com">http://example.com</a>.'
    assert linkify('http://example.com,') \
        == '<a href="http://example.com">http://example.com</a>,'
示例#28
0
def test_uppercase():
    assert linkify('HTTP://EXAMPLE.COM') \
        == '<a href="HTTP://EXAMPLE.COM">HTTP://EXAMPLE.COM</a>'
示例#29
0
def test_attrs():
    assert linkify('ya.ru', attrs={'rel': 'nofollow'}) \
        == '<a href="http://ya.ru" rel="nofollow">ya.ru</a>'
示例#30
0
def test_email():
    assert linkify('mailto:[email protected]') \
        == '<a href="mailto:[email protected]">mailto:[email protected]</a>'
    assert linkify('*****@*****.**') \
        == '<a href="mailto:[email protected]">[email protected]</a>'
示例#31
0
def test_nonascii():
    assert linkify(u'http://\u5350.net/') \
        == u'<a href="http://\u5350.net/">http://\u5350.net/</a>'
示例#32
0
def test_email():
    assert linkify('mailto:[email protected]') \
        == '<a href="mailto:[email protected]">mailto:[email protected]</a>'
    assert linkify('*****@*****.**') \
        == '<a href="mailto:[email protected]">[email protected]</a>'
示例#33
0
def test_nested_email():
    assert linkify('http://[email protected]') \
        == '<a href="http://[email protected]">' \
           'http://[email protected]</a>'
示例#34
0
def test_nested_email():
    assert linkify('http://[email protected]') \
        == '<a href="http://[email protected]">' \
           'http://[email protected]</a>'
示例#35
0
 def html_body(self):
     return Markup(linkify(escape(self.body)))
import jinja2, os
from jinja2 import utils
import datetime
from autolink import linkify
import cgi

def day_of_date(date):
	""" a filter to convert dates into strings suitable for display on the day-of page"""
	offset = datetime.timedelta(hours=-5)
	return (date + offset).strftime('%A, %I:%M %p')

JINJA_ENVIRONMENT = jinja2.Environment(
    loader=jinja2.FileSystemLoader(os.path.join(os.path.dirname(__file__), 'templates')),
    extensions=['jinja2.ext.autoescape'],
    autoescape=True)

JINJA_ENVIRONMENT.filters['day_of_date'] = day_of_date
JINJA_ENVIRONMENT.filters['linkify'] = lambda text: linkify(cgi.escape(text))

def template(filename, vals={}):
	return JINJA_ENVIRONMENT.get_template(filename).render(vals)

def template_string(template, vals={}):
	return jinja2.Template(template).render(vals)
示例#37
0
import jinja2, os
from jinja2 import utils
import datetime
from autolink import linkify
import cgi


def day_of_date(date):
    """ a filter to convert dates into strings suitable for display on the day-of page"""
    offset = datetime.timedelta(hours=-5)
    return (date + offset).strftime('%A, %I:%M %p')


JINJA_ENVIRONMENT = jinja2.Environment(loader=jinja2.FileSystemLoader(
    os.path.join(os.path.dirname(__file__), 'templates')),
                                       extensions=['jinja2.ext.autoescape'],
                                       autoescape=True)

JINJA_ENVIRONMENT.filters['day_of_date'] = day_of_date
JINJA_ENVIRONMENT.filters['linkify'] = lambda text: linkify(cgi.escape(text))


def template(filename, vals={}):
    return JINJA_ENVIRONMENT.get_template(filename).render(vals)


def template_string(template, vals={}):
    return jinja2.Template(template).render(vals)