def parse_links(html, request_path=None, encoding=None): """Process all links in given html and replace them if markup is added.""" if encoding is None: encoding = settings.DEFAULT_CHARSET # The passed HTML may be a string or bytes, depending on what is calling # this method. For example, Django response.content is always bytes. We # always want this content to be a string for our purposes. html_as_text = force_str(html, encoding=encoding) # This call invokes Wagtail-specific logic that converts references to # Wagtail pages, documents, and images to their proper link URLs. expanded_html = expand_db_html(html_as_text) # Parse links only in the <body> of the HTML body_html = get_body_html(expanded_html) if body_html is None: return expanded_html link_tags = get_link_tags(body_html) for tag in link_tags: tag_with_markup = add_link_markup(tag, request_path) if tag_with_markup: expanded_html = expanded_html.replace(tag, tag_with_markup) return expanded_html
def parse_links(html, encoding=None): """Process all links in given html and replace them if markup is added.""" if encoding is None: encoding = settings.DEFAULT_CHARSET # The passed HTML may be a string or bytes, depending on what is calling # this method. For example, Django response.content is always bytes. We # always want this content to be a string for our purposes. html_as_text = force_text(html, encoding=encoding) # This call invokes Wagail-specific logic that converts references to # Wagtail pages, documents, and images to their proper link URLs. expanded_html = expand_db_html(html_as_text) soup = BeautifulSoup(expanded_html, 'html.parser') link_tags = get_link_tags(soup) for tag in link_tags: original_link = str(tag) link_with_markup = add_link_markup(tag) if link_with_markup: expanded_html = expanded_html.replace( original_link, link_with_markup ) return expanded_html
def test_ask_short_url(self): # Valid Ask CFPB URLs urls = [ '/ask-cfpb/what-is-a-construction-loan-en-108/', 'https://cfpb.gov/ask-cfpb/what-is-a-construction-loan-en-108/', 'https://consumerfinance.gov/ask-cfpb/what-is-a-construction-loan-en-108/', # noqa: E501 'https://www.consumerfinance.gov/ask-cfpb/what-is-a-construction-loan-en-108/' # noqa: E501 ] path = '/' for url in urls: tag = ("<a href='{}'>foo</a>".format(url)) self.assertIn('data-pretty-href="cfpb.gov/askcfpb/108"', add_link_markup(tag, path)) # Invalid Ask CFPB URLs urls = [ '/ask-cfpb/not-a-valid-link/', '/askcfpb/123', 'https://consumerfinance.gov/ask-cfpb-in-the-url', 'https://consumerfinance.gov/ask-cfpb-in-the-url/123' ] for url in urls: tag = ("<a href='{}'>foo</a>".format(url)) self.assertIsNone(add_link_markup(tag, path))
def parse_links(html, encoding=None): """Process all links in given html and replace them if markup is added.""" if encoding is None: encoding = settings.DEFAULT_CHARSET html = html.decode(encoding) html = expand_db_html(html) soup = BeautifulSoup(html, 'html.parser') link_tags = get_link_tags(soup) for tag in link_tags: original_link = str(tag) link_with_markup = add_link_markup(tag) if link_with_markup: html = html.replace(original_link, link_with_markup) return html.encode(encoding)
def parse_links(html, encoding=None): """Process all links in given html and replace them if markup is added.""" if encoding is None: encoding = settings.DEFAULT_CHARSET # The passed HTML may be a string or bytes, depending on what is calling # this method. For example, Django response.content is always bytes. We # always want this content to be a string for our purposes. html_as_text = force_text(html, encoding=encoding) # This call invokes Wagail-specific logic that converts references to # Wagtail pages, documents, and images to their proper link URLs. expanded_html = expand_db_html(html_as_text) soup = BeautifulSoup(expanded_html, 'html.parser') link_tags = get_link_tags(soup) for tag in link_tags: original_link = str(tag) link_with_markup = add_link_markup(tag) if link_with_markup: expanded_html = expanded_html.replace(original_link, link_with_markup) return expanded_html
def test_add_link_markup_download(self): tag = '<a href="https://example.com/file.pdf">foo</a>' path = '/about-us/blog/' self.assertRegex(add_link_markup(tag, path), VALID_LINK_MARKUP)
def test_add_link_markup_external(self): tag = '<a href="/external-site/?ext_url=https%3A%2F%2Fexample.com">foo</a>' # noqa: E501 path = '/about-us/blog/' self.assertRegex(add_link_markup(tag, path), VALID_LINK_MARKUP)
def test_add_link_markup_anchor(self): tag = '<a href="/about-us/blog/#anchor">bar</a>' path = '/about-us/blog/' self.assertEqual(add_link_markup(tag, path), '<a class="" href="#anchor">bar</a>')
def test_add_link_markup_invalid(self): tag = 'not a valid tag' path = '/about-us/blog/' self.assertEqual(add_link_markup(tag, path), None)