def test_follow_redirects(self): finder = URLFinder() mock_response = MagicMock() mock_response_url = PropertyMock() type(mock_response).url = mock_response_url with patch("requests.get", MagicMock(return_value=mock_response)) as mock_get: # A request is sent to the URL. If there was a redirect return the # final URL. mock_response_url.return_value = "http://finalurl.com" self.assertEquals( finder.follow_redirects("http://redirects.to"), "http://finalurl.com", ) self.assertEquals(mock_get.call_count, 1) self.assertEquals(mock_response_url.call_count, 1) # If there was no redirect return the original URL. self.assertEquals( finder.follow_redirects("http://finalurl.com"), "http://finalurl.com", ) self.assertEquals(mock_get.call_count, 2) self.assertEquals(mock_response_url.call_count, 2) with patch("requests.get", MagicMock(side_effect=requests.RequestException)) as mock_get: # If request failed return None self.assertEquals( finder.follow_redirects("http://redirects.to"), None, ) self.assertEquals(mock_get.call_count, 1)
def test_find_urls(self): finder = URLFinder() finder.clean_url = MagicMock() finder.clean_url.side_effect = lambda url: url if url == "http://good.com" else None self.assertEquals( finder.find_urls("For http://good.com bar http://bad.com"), set([ "http://good.com", ]) ) self.assertEquals(finder.clean_url.call_count, 2)
def test_clean_url(self): finder = URLFinder() finder.is_blacklisted = MagicMock() finder.untiny.extract = MagicMock() finder.follow_redirects = MagicMock() finder.clean_params = MagicMock() finder.is_blacklisted.return_value = True self.assertEquals( finder.clean_url("http://tiny.com"), None ) self.assertEquals(finder.is_blacklisted.call_count, 1) finder.is_blacklisted.return_value = False finder.untiny.extract.side_effect = lambda url: "http://redirect.com" if url == "http://tiny.com" else url finder.follow_redirects.side_effect = lambda url: "http://final.com" if url == "http://redirect.com" else url finder.clean_params.side_effect = lambda url: "http://cleaned.com" if url == "http://final.com" else url self.assertEquals( finder.clean_url("http://tiny.com"), "http://cleaned.com", ) self.assertEquals(finder.is_blacklisted.call_count, 4) self.assertEquals(finder.untiny.extract.call_count, 3) self.assertEquals(finder.follow_redirects.call_count, 2) self.assertEquals(finder.clean_params.call_count, 1)
def test_clean_params(self): finder = URLFinder() url = u"http://www.slideshare.net/mpirnat/web-development-with-python-and-django?utm_source=Python+Weekly+Newsletter&utm_campaign=7fc9a4c2e2-Python_Weekly_Issue_70_January_17_2013&utm_medium=email" reference = u"http://www.slideshare.net/mpirnat/web-development-with-python-and-django" self.assertEquals(finder.clean_params(url), reference) url = u"http://www.youtube.com/watch?v=DDjpOrlfh0Y" reference = url self.assertEquals(finder.clean_params(url), reference) url = u"http://www.youtube.com/watch?v=DDjpOrlfh0Y&utm_medium=email" reference = u"http://www.youtube.com/watch?v=DDjpOrlfh0Y" self.assertEquals(finder.clean_params(url), reference) url = u"http://baunerreon.com.br/compartilhar.php?url=http://cinema.uol.com.br/ultnot/reuters/2013/01/19/bonecos-de-django-livre-sao-considerados-ofensivos-por-afro-americanos.jhtm+&titulo=Bonecos+de+%22Django+Livre%22+s%C3%83%C2%83%C3%82%C2%A3o+considerados+ofensivos+por+afro-americanos&utm_medium=twitter+@revhomemmoderno&utm_source=Not%C3%83%C2%ADcias+Cinema" reference = u"http://baunerreon.com.br/compartilhar.php?url=http%3A%2F%2Fcinema.uol.com.br%2Fultnot%2Freuters%2F2013%2F01%2F19%2Fbonecos-de-django-livre-sao-considerados-ofensivos-por-afro-americanos.jhtm+&titulo=Bonecos+de+%22Django+Livre%22+s%C3%83%C2%83%C3%82%C2%A3o+considerados+ofensivos+por+afro-americanos" self.assertEquals(finder.clean_params(url), reference) url = u"http://www.brankovukelic.com/2013/01/two-scoops-of-django-book-review.html?spref=tw" reference = u"http://www.brankovukelic.com/2013/01/two-scoops-of-django-book-review.html" self.assertEquals(finder.clean_params(url), reference)
def test_is_blacklisted(self): finder = URLFinder() self.assertTrue(finder.is_blacklisted("http://instagr.am/12345")) self.assertFalse(finder.is_blacklisted("http://example.com"))