def test_normalize_relative_root_url(self): c = Consumer() a = {'href':'/blog/'} url = 'http://ajankovic.com' self.assertEqual('http://ajankovic.com/blog/', c.normalize(a, url)) a = {'href':'//ajankovic.com/blog/'} url = 'https://ajankovic.com' self.assertEqual('https://ajankovic.com/blog/', c.normalize(a, url))
def test_run(self, mock_get): c = Consumer() markupq = Queue() markupq.put(('http://ajankovic.com/blog/', BLOG_RESPONSE)) markupq.put(('http://test.com', TEST_RESPONSE)) markupq.put(None) outq = Queue() c.run(markupq, outq) for item in drain(outq): if item is None: break self.assertIn(item, EXTRACTED_URLS)
def test_normalize_invalid_url(self): c = Consumer() a = {'href':'skype:ajankovic?chat'} url = 'http://ajankovic.com' self.assertIsNone(c.normalize(a, url))
def test_normalize_valid_url(self): c = Consumer() a = {'href':'http://ajankovic.com/blog/'} url = 'http://ajankovic.com' self.assertEqual('http://ajankovic.com/blog/', c.normalize(a, url))