def test_create_transaction_does_not_create_new_link_for_an_anchor_tag_with_no_href_attribute( self): mock_link_builder = mock() tag_parser = LinkTagParser(mock_link_builder) tag_parser.create_transaction("a", [('class', '.sp-link')]) verify(mock_link_builder, times=0).create_new(ANY)
def test_commit_returns_nothing_when_there_is_no_existing_transaction( self): mock_link_builder = mock() tag_parser = LinkTagParser(mock_link_builder) link = tag_parser.commit("a") self.assertIsNone(link)
def test_create_transaction_does_not_create_new_link_for_non_anchor_tag( self): mock_link_builder = mock() tag_parser = LinkTagParser(mock_link_builder) tag_parser.create_transaction("span", [('id', '_about')]) verify(mock_link_builder, times=0).create_new(ANY)
def test_create_transaction_creates_new_link_for_anchor_tag_with_valid_attributes( self): mock_link_builder = mock() tag_parser = LinkTagParser(mock_link_builder) tag_parser.create_transaction("a", [('href', '/about')]) verify(mock_link_builder).create_new("/about")
def test_do_not_add_data_to_link_when_no_transaction_exists(self): mock_link_builder = mock() when(mock_link_builder).create_new("/about").thenReturn( mock_link_builder) tag_parser = LinkTagParser(mock_link_builder) tag_parser.add_content("About") verify(mock_link_builder, times=0).with_label("About")
def test_return_created_link_from_existing_transaction_on_commit(self): expected_link = Link(url="/about", label="About", parent_url="http://something.com") mock_link_builder = mock() when(mock_link_builder).create_new("/about").thenReturn( mock_link_builder) when(mock_link_builder).build().thenReturn(expected_link) tag_parser = LinkTagParser(mock_link_builder) tag_parser.create_transaction("a", [('href', '/about')]) link = tag_parser.commit("a") self.assertEquals(expected_link, link)
def test_add_data_to_link_when_a_transaction_exists(self): mock_link_builder = mock() when(mock_link_builder).create_new("/about").thenReturn( mock_link_builder) tag_parser = LinkTagParser(mock_link_builder) tag_parser.create_transaction("a", [('href', '/about')]) tag_parser.add_content("About") verify(mock_link_builder).with_label("About")
def test_create_transaction_does_not_create_new_link_when_a_transaction_exists( self): mock_link_builder = mock() when(mock_link_builder).create_new("/about").thenReturn( mock_link_builder) tag_parser = LinkTagParser(mock_link_builder) tag_parser.create_transaction("a", [('href', '/about')]) tag_parser.create_transaction("a", [('href', '/blog')]) verify(mock_link_builder).create_new("/about") verify(mock_link_builder, times=0).create_new("/blog")
def test_commit_returns_nothing_when_called_the_second_after_a_transaction_commit( self): expected_link = Link(url="/about", label="About", parent_url="http://something.com") mock_link_builder = mock() when(mock_link_builder).create_new("/about").thenReturn( mock_link_builder) when(mock_link_builder).build().thenReturn(expected_link) tag_parser = LinkTagParser(mock_link_builder) tag_parser.create_transaction("a", [('href', '/about')]) link = tag_parser.commit("a") link_second_call = tag_parser.commit("a") self.assertEquals(expected_link, link) self.assertIsNone(link_second_call)
def test_can_parse_anchor_tags(self): tag_parser = LinkTagParser(mock()) can_parse = tag_parser.can_parse("a") self.assertTrue(can_parse)
def test_cannot_parse_non_anchor_tags(self): tag_parser = LinkTagParser(mock()) can_parse = tag_parser.can_parse("html") self.assertFalse(can_parse)
def get_tag_parser(self, url): link_builder = LinkBuilder(url) return LinkTagParser(link_builder)
import unittest from crawler.html_parser import HtmlParser from crawler.link_tag_parser import LinkTagParser, LinkBuilder from crawler.link import Link parent_url = "http://parentlink.com" links_parser = LinkTagParser(LinkBuilder(parent_url)) html_parser = HtmlParser() class HtmlParserTest(unittest.TestCase): def test_parse_html_page_with_no_links_return_empty_list(self): parsed_links = html_parser.parse( links_parser, '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><title></title></head><body><p>Some test</p></body></html>' ) self.assertEquals(0, len(parsed_links)) def test_parse_a_link_in_html_page(self): parsed_links = html_parser.parse( links_parser, '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><title></title></head><body><p>Some test<a href="http://somelink.com">SomeLink</a></p></body></html>' ) self.assertEquals(1, len(parsed_links)) self.assertEquals( Link(url="http://somelink.com", label="SomeLink", parent_url=parent_url), parsed_links[0])