def test_no_retry_on_non_timeout_ftp_errors(self):
        """FTP errors other than timeouts should not trigger a retry"""
        with mock.patch('ftplib.FTP'):
            crawler = crawlers.FTPCrawler('ftp://foo')
            crawler.ftp.nlst.side_effect = ftplib.error_temp('422')

            with mock.patch.object(crawler, 'connect') as mock_connect:
                with self.assertRaises(ftplib.error_temp):
                    crawler._list_folder_contents('/')
                mock_connect.assert_not_called()
 def _create_crawlers(self):
     return [
         crawlers.FTPCrawler(
             root_url=url,
             username=self.config.get('username', None),
             password=self.config.get('password'),
             time_range=(self.get_time_range()),
             include=self.config.get('include', None)
         )
         for url in self.config['urls']
     ]
    def test_retry_on_timeout_decorator_timeout_error(self):
        """The retry_on_timeout decorator should re-create
        the connection when a FTP timeout error occurs, and
        and re-run the method in which the error occurred once
        """
        with mock.patch('ftplib.FTP'):
            crawler = crawlers.FTPCrawler('ftp://foo')
            crawler.ftp.nlst.side_effect = ftplib.error_temp('421')

            with self.assertRaises(ftplib.error_temp), \
                 self.assertLogs(crawler.LOGGER, level=logging.INFO) as log_cm:
                crawler._list_folder_contents('/')

            self.assertEqual(log_cm.records[0].getMessage(),
                             "Re-initializing the FTP connection")
    def test_retry_on_timeout_decorator_connection_error(self):
        """The retry_on_timeout decorator should try to re-create
        the when a connection error occurs, and re-run the method
        in which the error occurred 5 times
        """
        with mock.patch('ftplib.FTP'):
            crawler = crawlers.FTPCrawler('ftp://foo')

            for error in (ConnectionError, ConnectionRefusedError,
                          ConnectionResetError):
                crawler.ftp.nlst.side_effect = error

                with mock.patch.object(crawler, 'connect') as mock_connect:
                    with self.assertRaises(error), \
                        self.assertLogs(crawler.LOGGER, level=logging.INFO):
                        crawler._list_folder_contents('/')
                self.assertEqual(mock_connect.call_count, 5)
    def test_ftp_correct_exception(self, mock_ftp):
        """set_initial_state() should not raise an error in case of
        503 or 230 responses from FTP.login(), but it should for
        other error codes.
        """

        test_crawler = crawlers.FTPCrawler('ftp://',
                                           username="******",
                                           password="******",
                                           include='\.gz$')

        mock_ftp.side_effect = ftplib.error_perm("503")
        test_crawler.set_initial_state()

        mock_ftp.side_effect = ftplib.error_perm("230")
        test_crawler.set_initial_state()

        mock_ftp.side_effect = ftplib.error_perm("999")
        with self.assertRaises(ftplib.error_perm):
            test_crawler.set_initial_state()
    def test_ftp_correct_navigation(self, mock_ftp):
        """check that file URLs and folders paths are added to the right stacks"""

        test_crawler = crawlers.FTPCrawler('ftp://foo', include='\.gz$')
        test_crawler.ftp.nlst.return_value = [
            'file1.gz',
            'folder_name',
            'file3.bb',
            'file2.gz',
        ]
        test_crawler.ftp.cwd = self.emulate_cwd_of_ftp
        test_crawler.ftp.host = ''
        with self.assertLogs('geospaas_harvesting.crawlers.FTPCrawler'):
            test_crawler._process_folder('')
        # '.gz' files must be in the "_urls" list
        # Other type of files should not be in the "_urls" list
        self.assertCountEqual(['ftp://foo/file1.gz', 'ftp://foo/file2.gz'],
                              test_crawler._urls)
        # folder with 'folder_name' must be in the "_to_process" list
        self.assertCountEqual(['/', 'folder_name'], test_crawler._to_process)
 def test_ftp_incorrect_entry(self):
     """Shall return 'ValueError' when there is an incorrect entry in ftp address of
     the configuration file """
     with self.assertRaises(ValueError):
         crawlers.FTPCrawler('ft:///')