def test_wrapping_parentheses(): assert linkify('(example.com/)') \ == '(<a href="http://example.com/">example.com/</a>)' assert linkify('example.com/hi_(there)') \ == '<a href="http://example.com/hi_(there)">example.com/hi_(there)</a>' assert linkify('(example.com/hi_(there))') \ == '(<a href="http://example.com/hi_(there)">example.com/hi_(there)</a>)'
def test_simple(): assert linkify('a http://example.com link') \ == 'a <a href="http://example.com">http://example.com</a> link' assert linkify('a https://ya.ru link') \ == 'a <a href="https://ya.ru">https://ya.ru</a> link' assert linkify('a example.com link') \ == 'a <a href="http://example.com">example.com</a> link'
def get_html(cell): text = get_text(cell) spans = cell.get('textFormatRuns', []) eff = cell.get('effectiveFormat', {'textFormat': {}})['textFormat'] if spans: fmt = {**eff, **spans[0]['format']} out = build_tags(fmt) spans = spans[1:] previdx = 0 while spans: out += text[previdx:spans[0]['startIndex']] previdx = spans[0]['startIndex'] out += build_tags(fmt, True) fmt = {**eff, **spans[0]['format']} out += build_tags(fmt) spans = spans[1:] out += text[previdx:] out += build_tags(fmt, True) else: out = build_tags(eff) out += text out += build_tags(eff, True) out = out.replace("\n", "<br/>") out = linkify(out) return out
def URL_rep(text): url_info = dict() extractor = URLExtract() urls = extractor.find_urls(text) if not urls: return url_info count = 0 for url in urls: url_org = url try: article = Article(url) response = requests.get(url, verify=False, timeout=20) if response.status_code != 200: raise Exception('---------') except: temp_url = re.search('"(.*)"', linkify(url)).group(1) url = temp_url response = requests.get(url, verify=False, timeout=20) if response.status_code != 200: count += 1 url_info[url_org] = "NA" continue try: article = Article(url) article.download() article.parse() title = "\n" + article.title + "\n" + article.text if not any(item for item in vaccine_terms if item in title.casefold()): title = 'NA' url_info[url_org] = title except Exception as e: html = response.content soup = BeautifulSoup(html, "html.parser") #features="lxml" matches = soup.findAll(['p', 'title'], text=True, limit=None) #text = re.compile('vaccin') linkcontent = [] for node in matches: if type(node) is bs4.element.Tag: linkcontent.append(node.get_text()) else: linkcontent.append(node) if not linkcontent: title = 'NA' elif not any(item for item in vaccine_terms if item in " ".join(linkcontent).casefold()): title = 'NA' else: title = "\n" + " ".join(linkcontent) url_info[url_org] = title return url_info
def process_input_from_rich_text_editor(rich_text): parsed = rich_text.strip() exclude_from_parse = {} now = time.time() for index, match in enumerate(re.findall('<a [^>]*>.*?</a>', parsed)): placeholder = f'placeholder-{now}-{index}' exclude_from_parse[placeholder] = match parsed = parsed.replace(match, placeholder, 1) parsed = linkify(parsed, {'target': '_blank'}) for placeholder, match in exclude_from_parse.items(): parsed = parsed.replace(placeholder, match, 1) return parsed
def render_item(item): id, type, timestamp, user_id, content, media_path = item if type == 4: return '' inner = '' id_class = '' if user_id: id_class = ' %s' % user_id inner += '<div class="avatar"></div>\n' inner += '<div class="container" id="item-%s">\n' % id if content: inner += '<div class="content">%s</div>\n' % linkify(content) if media_path: media_path = media_path.replace('output/', '') media = '' if type == 2: media = '<a href="%s"><img src="%s" /></a>\n' % (media_path, media_path) elif type == 3: media = '<audio controls><source src="%s" type="audio/mpeg"></audio>\n' % media_path inner += '<div class="media">%s</div>\n' % media if timestamp: time = datetime.fromtimestamp(timestamp).strftime('%c') inner += '<div class="timestamp"><a href="#item-%s">%s</a></div>\n' % ( id, time) inner += '</div>\n' return '<div class="item%s">\n%s</div>\n' % (id_class, inner)
def test_uppercase(): assert linkify('HTTP://EXAMPLE.COM') \ == '<a href="HTTP://EXAMPLE.COM">HTTP://EXAMPLE.COM</a>'
def add_links(s): return linkify(s)
def test_attrs(): assert linkify('ya.ru', attrs={'rel': 'nofollow'}) \ == '<a href="http://ya.ru" rel="nofollow">ya.ru</a>'
def test_nonascii(): assert linkify(u'http://\u5350.net/') \ == u'<a href="http://\u5350.net/">http://\u5350.net/</a>'
def test_query(): assert linkify('xx.com/?a=hi&b=bye') \ == '<a href="http://xx.com/?a=hi&b=bye">xx.com/?a=hi&b=bye</a>'
wrapper = TextWrapper( break_long_words=False, break_on_hyphens=False, width=80 ) wrapped = '' for line in content.splitlines(): prefix = '' matches = re.match('^\s+', line) if matches: prefix = matches.group(0) wrapped += ('\n' + prefix).join(wrapper.wrap(line)) + '\n' content = autolink.linkify(wrapped) # Because autolink is buggy content = content.replace("http://(http://", "http://") content = content.replace(")\">", "\">") # This looks weird, because it is. This is to deal with overlapping patterns. oldcontent = False while oldcontent != content: oldcontent = content content = re.sub( "(^|[ (])@([a-zA-Z0-9_]+)([. )]|$)", "\\1<a href='https://twitter.com/\\2'>@\\2</a>\\3", content, flags=re.M )
def test_port(): assert linkify('foo.com:8000') \ == '<a href="http://foo.com:8000">foo.com:8000</a>' assert linkify('foo.com:xkcd') \ == '<a href="http://foo.com">foo.com</a>:xkcd'
def test_bad_protocols(): assert linkify('foohttp://bar') == 'foohttp://bar' assert linkify('fohttp://exampl.com') \ == 'fohttp://<a href="http://exampl.com">exampl.com</a>'
def test_non_domain(): assert linkify('ha.ha') == 'ha.ha'
def test_fragment(): assert linkify('http://xx.com/path#frag') \ == '<a href="http://xx.com/path#frag">http://xx.com/path#frag</a>'
def test_complete(): assert linkify('https://*****:*****@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f') \ == '<a href="https://*****:*****@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f">' \ 'https://*****:*****@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f</a>'
def test_punct(): assert linkify('http://example.com.') \ == '<a href="http://example.com">http://example.com</a>.' assert linkify('http://example.com,') \ == '<a href="http://example.com">http://example.com</a>,'
def test_email(): assert linkify('mailto:[email protected]') \ == '<a href="mailto:[email protected]">mailto:[email protected]</a>' assert linkify('*****@*****.**') \ == '<a href="mailto:[email protected]">[email protected]</a>'
def test_nested_email(): assert linkify('http://[email protected]') \ == '<a href="http://[email protected]">' \ 'http://[email protected]</a>'
def html_body(self): return Markup(linkify(escape(self.body)))
import jinja2, os from jinja2 import utils import datetime from autolink import linkify import cgi def day_of_date(date): """ a filter to convert dates into strings suitable for display on the day-of page""" offset = datetime.timedelta(hours=-5) return (date + offset).strftime('%A, %I:%M %p') JINJA_ENVIRONMENT = jinja2.Environment( loader=jinja2.FileSystemLoader(os.path.join(os.path.dirname(__file__), 'templates')), extensions=['jinja2.ext.autoescape'], autoescape=True) JINJA_ENVIRONMENT.filters['day_of_date'] = day_of_date JINJA_ENVIRONMENT.filters['linkify'] = lambda text: linkify(cgi.escape(text)) def template(filename, vals={}): return JINJA_ENVIRONMENT.get_template(filename).render(vals) def template_string(template, vals={}): return jinja2.Template(template).render(vals)
import jinja2, os from jinja2 import utils import datetime from autolink import linkify import cgi def day_of_date(date): """ a filter to convert dates into strings suitable for display on the day-of page""" offset = datetime.timedelta(hours=-5) return (date + offset).strftime('%A, %I:%M %p') JINJA_ENVIRONMENT = jinja2.Environment(loader=jinja2.FileSystemLoader( os.path.join(os.path.dirname(__file__), 'templates')), extensions=['jinja2.ext.autoescape'], autoescape=True) JINJA_ENVIRONMENT.filters['day_of_date'] = day_of_date JINJA_ENVIRONMENT.filters['linkify'] = lambda text: linkify(cgi.escape(text)) def template(filename, vals={}): return JINJA_ENVIRONMENT.get_template(filename).render(vals) def template_string(template, vals={}): return jinja2.Template(template).render(vals)