Пример #1
0
 def test_start(self):
     crawler = self.get_crawler()
     crawler.closing = True
     crawler_url = CrawlerUrl(crawler, self.url)
     crawler.processing[self.url] = crawler_url
     with requests_mock.mock() as m:
         m.get(self.url, headers={'Content-Type': 'text/html'})
         crawler_url.start()
     self.assertIn(self.url, crawler.processed)
     self.assertNotIn(self.url, crawler.processing)
Пример #2
0
 def test_session_exception(self, req_mock):
     req_mock.get(self.url, exc=requests.exceptions.ConnectTimeout)
     crawler = self.get_crawler()
     with patch('dirhunt.crawler_url.CrawlerUrl.close') as m:
         crawler_url = CrawlerUrl(crawler, self.url)
         self.assertEqual(crawler_url.start(), crawler_url)
         self.assertEqual(crawler.current_processed_count, 1)
         m.assert_called_once()
Пример #3
0
 def test_session_read_exception(self):
     crawler = self.get_crawler()
     crawler.sessions = Mock()
     crawler.sessions.get_session.return_value.get.return_value.status_code = 200
     crawler.sessions.get_session.return_value.get.return_value.raw.read.side_effect = \
         requests.exceptions.ConnectTimeout()
     with patch('dirhunt.crawler_url.CrawlerUrl.close') as m:
         crawler_url = CrawlerUrl(crawler, self.url)
         self.assertEqual(crawler_url.start(), crawler_url)
         self.assertEqual(crawler.current_processed_count, 1)
         m.assert_called_once()