def parse_emails(self, response): emails = set() url = response.url depth = response.meta['depth'] markup = remove_tags(response.body, ['script', 'style']) # Parse the page response data for email links for email in extract_emails(markup): yield EmailItem({PageUrl: url, Depth: depth, Email: email})
def test_remove_tags_raises_error(): html = full_page.FULL_PAGE with pytest.raises(ValueError): remove_tags(html, 'script')