def test_no_retry_on_non_timeout_ftp_errors(self): """FTP errors other than timeouts should not trigger a retry""" with mock.patch('ftplib.FTP'): crawler = crawlers.FTPCrawler('ftp://foo') crawler.ftp.nlst.side_effect = ftplib.error_temp('422') with mock.patch.object(crawler, 'connect') as mock_connect: with self.assertRaises(ftplib.error_temp): crawler._list_folder_contents('/') mock_connect.assert_not_called()
def _create_crawlers(self): return [ crawlers.FTPCrawler( root_url=url, username=self.config.get('username', None), password=self.config.get('password'), time_range=(self.get_time_range()), include=self.config.get('include', None) ) for url in self.config['urls'] ]
def test_retry_on_timeout_decorator_timeout_error(self): """The retry_on_timeout decorator should re-create the connection when a FTP timeout error occurs, and and re-run the method in which the error occurred once """ with mock.patch('ftplib.FTP'): crawler = crawlers.FTPCrawler('ftp://foo') crawler.ftp.nlst.side_effect = ftplib.error_temp('421') with self.assertRaises(ftplib.error_temp), \ self.assertLogs(crawler.LOGGER, level=logging.INFO) as log_cm: crawler._list_folder_contents('/') self.assertEqual(log_cm.records[0].getMessage(), "Re-initializing the FTP connection")
def test_retry_on_timeout_decorator_connection_error(self): """The retry_on_timeout decorator should try to re-create the when a connection error occurs, and re-run the method in which the error occurred 5 times """ with mock.patch('ftplib.FTP'): crawler = crawlers.FTPCrawler('ftp://foo') for error in (ConnectionError, ConnectionRefusedError, ConnectionResetError): crawler.ftp.nlst.side_effect = error with mock.patch.object(crawler, 'connect') as mock_connect: with self.assertRaises(error), \ self.assertLogs(crawler.LOGGER, level=logging.INFO): crawler._list_folder_contents('/') self.assertEqual(mock_connect.call_count, 5)
def test_ftp_correct_exception(self, mock_ftp): """set_initial_state() should not raise an error in case of 503 or 230 responses from FTP.login(), but it should for other error codes. """ test_crawler = crawlers.FTPCrawler('ftp://', username="******", password="******", include='\.gz$') mock_ftp.side_effect = ftplib.error_perm("503") test_crawler.set_initial_state() mock_ftp.side_effect = ftplib.error_perm("230") test_crawler.set_initial_state() mock_ftp.side_effect = ftplib.error_perm("999") with self.assertRaises(ftplib.error_perm): test_crawler.set_initial_state()
def test_ftp_correct_navigation(self, mock_ftp): """check that file URLs and folders paths are added to the right stacks""" test_crawler = crawlers.FTPCrawler('ftp://foo', include='\.gz$') test_crawler.ftp.nlst.return_value = [ 'file1.gz', 'folder_name', 'file3.bb', 'file2.gz', ] test_crawler.ftp.cwd = self.emulate_cwd_of_ftp test_crawler.ftp.host = '' with self.assertLogs('geospaas_harvesting.crawlers.FTPCrawler'): test_crawler._process_folder('') # '.gz' files must be in the "_urls" list # Other type of files should not be in the "_urls" list self.assertCountEqual(['ftp://foo/file1.gz', 'ftp://foo/file2.gz'], test_crawler._urls) # folder with 'folder_name' must be in the "_to_process" list self.assertCountEqual(['/', 'folder_name'], test_crawler._to_process)
def test_ftp_incorrect_entry(self): """Shall return 'ValueError' when there is an incorrect entry in ftp address of the configuration file """ with self.assertRaises(ValueError): crawlers.FTPCrawler('ft:///')