def test_protocol_relative(): """Protocol-relative links aren't relative.""" html = 'bad <a href="//ex.mp">link</a>' expect = 'bad link' eq_(expect, bleach.delinkify(html)) eq_(expect, bleach.delinkify(html, allow_relative=True)) eq_(html, bleach.delinkify(html, allow_domains='ex.mp'))
def test_relative_allow_domains(): """Relative links are OK even with allow_domains.""" html = 'some <a href="/foo/bar">link</a>' eq_('some link', bleach.delinkify(html)) eq_(html, bleach.delinkify(html, allow_relative=True, allow_domains=['ex.mp'])) eq_('some link', bleach.delinkify(html, allow_relative=False, allow_domains=['ex.mp']))
def extract_friend(self, friend_elem, friend_type='in-game'): friend_data = { 'url': friend_elem.find('a.linkFriend_{0}'.format(friend_type)).attr('href'), 'current_name': friend_elem.find('a.linkFriend_{0}'.format(friend_type)).text(), 'avatar': friend_elem.find('.avatarIcon img').attr('src'), 'status': 'unknown', } friend_data['username'] = self.parse_username_from_url(friend_data['url']) # What follows hurts. Hooray for scraping! raw_status_html = friend_elem.find('.friendSmallText').html() raw_status = re.sub('<br/>', ' ', raw_status_html) clean_status = bleach.clean(raw_status, strip=True) tagless_status = re.sub('\s+', ' ', clean_status) friend_status = tagless_status.strip() if friend_status in ('', ' ') or friend_status.startswith('Last') or friend_status.startswith('Friends'): friend_data['status'] = 'offline' elif friend_status.startswith('Online'): friend_data['status'] = 'online' elif friend_status.startswith('In'): friend_data['status'] = 'ingame' # Get the game name as well. friend_data['game'] = bleach.delinkify(friend_status.replace('In-Game', '').strip()) return friend_data
def test_mailto_allow_some(): html = ('<a href="mailto:[email protected]?subject=foo">a</a>' '<a href="mailto:[email protected]?subject=wtf">b</a>' '<a href="mailto:Hah%20%3Celse%40evil.com%3E?subject=ow">c</a> ' '<a href="mailto:BadGuy%20%3Csomeone%40evil.com%3E">d</a>') eq_('<a href="mailto:[email protected]?subject=foo">a</a>bc d', bleach.delinkify(html, allow_domains=['ex.mp']))
def test_delinkify(): eq_('test', bleach.delinkify('<a href="http://ex.mp">test</a>')) eq_('footestbar', bleach.delinkify('foo<a href="http://ex.mp">test</a>bar'))
def test_relative(): """Relative links are optionally OK.""" html = 'some <a href="/foo/bar">link</a>' eq_('some link', bleach.delinkify(html)) eq_(html, bleach.delinkify(html, allow_relative=True))
def test_a_name(): """Don't screw with non-link <a> tags.""" html = '<a name="foo">bar</a>' eq_(html, bleach.delinkify(html))
def test_nested_tag(): html = '<a href="http://ex.mp">test<span>test</span></a>' eq_('test<span>test</span>', bleach.delinkify(html))
def test_nested_a(): html = '<a href="http://ex.mp">test<a href="http://foo.bar">test</a></a>' eq_('testtest', bleach.delinkify(html)) eq_('<a href="http://ex.mp">test</a>test', bleach.delinkify(html, allow_domains=['ex.mp']))
def test_whitelist(): html = '<a href="http://ex.mp">test</a>' eq_(html, bleach.delinkify(html, allow_domains=['ex.mp'])) eq_('test', bleach.delinkify(html, allow_domains=['ex2.mp'])) # Allow a single domain as a special case. eq_(html, bleach.delinkify(html, allow_domains='ex.mp'))
def _check(html, text): output = bleach.delinkify(html, allow_domains=domains) eq_(html if text is None else text, output)
def test_mailto_disallow_all(): html = ('<a href="mailto:[email protected]">Unchanged</a> ' '<a href="mailto:[email protected]">mail</a>') expect = 'Unchanged mail' eq_(expect, bleach.delinkify(html, allow_mailto=False, allow_domains=['ex.mp']))
def test_mailto_allow_all(): html = ('<a href="mailto:[email protected]">Unchanged</a> ' '<a href="mailto:[email protected]">mail</a>') eq_(html, bleach.delinkify(html, allow_mailto=True))
def test_mailto_allow_example(): # example.com is allowed, while ex.mp is not. html = ('<a href="mailto:[email protected]">Unchanged</a> ' '<a href="mailto:[email protected]">mail</a>') expect = '<a href="mailto:[email protected]">Unchanged</a> mail' eq_(expect, bleach.delinkify(html, allow_domains=['example.com']))