示例#1
0
 def test_download_files_fails_and_dies(self):
     with testfixtures.LogCapture() as log:
         with self.assertRaises(scraper.RecoverableScraperException):
             scraper.download_files(
                 '/usr/bin/timeout', '/bin/false', 'localhost/', [
                     scraper.RemoteFile('2016/10/26/DNE1', 0),
                     scraper.RemoteFile('2016/10/26/DNE2', 0)
                 ], '/tmp')
         self.assertIn('ERROR', [x.levelname for x in log.records])
示例#2
0
    def test_download_files_breaks_up_long_file_list(self, patched_call):
        files_to_download = [
            scraper.RemoteFile('2016/10/26/DNE.%d' % i, 0)
            for i in range(100070)
        ]
        files_downloaded = []

        def verify_contents(args):
            # Verify that the third-to-last argument to check_call is a filename
            # that contains the right data (specifically, the filenames).  This
            # test needs to be kept in sync with the order of command-line
            # arguments passed to the rsync call.
            file_with_filenames = args[-3]
            files = file(file_with_filenames).read().split('\0')
            self.assertTrue(len(files) > 0)
            self.assertTrue(len(files) <= 1000)
            files_downloaded.extend(files)
            return 0

        patched_call.side_effect = verify_contents
        scraper.download_files('/usr/bin/timeout', '/bin/true', 'localhost/',
                               files_to_download, '/tmp')
        self.assertEqual(set(x.filename for x in files_to_download),
                         set(files_downloaded))
        self.assertEqual(patched_call.call_count, 101)
示例#3
0
    def test_download_files(self, patched_call):
        files_to_download = [
            scraper.RemoteFile('2016/10/26/DNE1', 0),
            scraper.RemoteFile('2016/10/26/DNE2', 0)
        ]

        def verify_contents(args):
            # Verify that the third-to-last argument to check_call is a filename
            # that contains the right data (specifically, the filenames).  This
            # test needs to be kept in sync with the order of command-line
            # arguments passed to the rsync call.
            file_with_filenames = args[-3]
            files_downloaded = file(file_with_filenames).read().split('\0')
            self.assertEqual(files_downloaded,
                             [x.filename for x in files_to_download])
            return 0

        patched_call.side_effect = verify_contents
        self.assertEqual(patched_call.call_count, 0)
        scraper.download_files('/usr/bin/timeout', '/bin/true', 'localhost/',
                               files_to_download, '/tmp')
        self.assertEqual(patched_call.call_count, 1)
示例#4
0
 def test_list_rsync_files_returns_24(self, patched_subprocess):
     # pylint: disable=line-too-long
     serverfiles = textwrap.dedent("""\
         .
         2016/
         2016/01/ 2016/01/06-05:12:07
         2016/01/06/ 2016/01/06-05:12:07
         2016/01/06/.gz 2016/01/06-05:12:07
         2016/01/06/20160106T05:43:32.741066000Z_:0.cputime.gz 2016/01/06-05:43:32
         2016/01/06/20160106T05:43:32.741066000Z_:0.meta 2016/01/06-05:43:32
         2016/01/06/20160106T18:07:33.122784000Z_:0.cputime.gz 2016/01/06-18:07:33
         BADBADBAD
         2016/01/06/20160106T18:07:33.122784000Z_:0.meta 2016/01/06-18:07:33
         2016/01/06/20160106T22:31:57.229531000Z_:0.cputime.gz 2016/01/06-22:31:57"""
                                   )
     mock_process = mock.Mock()
     mock_process.returncode = 24
     patched_subprocess.return_value = mock_process
     mock_process.stdout = serverfiles.splitlines()
     files = set(
         scraper.list_rsync_files('/usr/bin/timeout', '/usr/bin/rsync',
                                  'localhost', ''))
     self.assertSetEqual(
         set([
             scraper.RemoteFile('2016/01/06/.gz',
                                datetime.datetime(2016, 1, 6, 5, 12, 7)),
             scraper.RemoteFile(
                 '2016/01/06/20160106T05:43:32.741066000Z_:0.cputime.gz',
                 datetime.datetime(2016, 1, 6, 5, 43, 32)),
             scraper.RemoteFile(
                 '2016/01/06/20160106T05:43:32.741066000Z_:0.meta',
                 datetime.datetime(2016, 1, 6, 5, 43, 32)),
             scraper.RemoteFile(
                 '2016/01/06/20160106T18:07:33.122784000Z_:0.cputime.gz',
                 datetime.datetime(2016, 1, 6, 18, 7, 33)),
             scraper.RemoteFile(
                 '2016/01/06/20160106T18:07:33.122784000Z_:0.meta',
                 datetime.datetime(2016, 1, 6, 18, 7, 33)),
             scraper.RemoteFile(
                 '2016/01/06/20160106T22:31:57.229531000Z_:0.cputime.gz',
                 datetime.datetime(2016, 1, 6, 22, 31, 57))
         ]), files)
示例#5
0
 def test_list_rsync_files(self):
     # pylint: disable=line-too-long
     serverfiles = textwrap.dedent("""\
     opening tcp connection to ndt.iupui.mlab1.lga05.measurement-lab.org port 7999
     sending daemon args: --server --sender -vvnlogDtprze.iLsfxC --timeout=300 --bwlimit=10000 . iupui_ndt/  (7 args)
     receiving incremental file list
     delta-transmission enabled
     2017/10/12/.gz is uptodate
     2017/10/12/20171012T22:00:14.809574000Z_66-87-124-30.pools.spcsdns.net:20450.s2c_snaplog.gz is uptodate
     2017/10/12/20171012T22:00:14.809574000Z_66-87-124-30.pools.spcsdns.net:5231.cputime.gz is uptodate
     2017/10/12/20171012T22:00:14.809574000Z_66-87-124-30.pools.spcsdns.net:5231.meta is uptodate
     2017/10/12/20171012T22:11:50.24974000Z_pool-71-187-248-40.nwrknj.fios.verizon.net:58633.cputime.gz is uptodate
     2017/10/12/20171012T22:11:50.24974000Z_pool-71-187-248-40.nwrknj.fios.verizon.net:58633.meta is uptodate
     2017/10/12/20171012T22:11:50.24974000Z_pool-71-187-248-40.nwrknj.fios.verizon.net:58634.s2c_snaplog.gz is uptodate
     [receiver] expand file_list pointer array to 524288 bytes, did move
     [generator] expand file_list pointer array to 524288 bytes, did move
     2017/10/12/ 2017/10/13-08:51:08
     2017/10/12/20171012T22:11:56.252172000Z_72.89.41.162.s2c_ndttrace.gz 2017/10/12-22:12:07
     2017/10/12/20171012T22:11:56.252172000Z_pool-72-89-41-162.nycmny.fios.verizon.net:58176.cputime.gz 2017/10/12-22:12:07
     2017/10/12/20171012T22:11:56.252172000Z_pool-72-89-41-162.nycmny.fios.verizon.net:58176.meta 2017/10/12-22:12:07
     2017/10/12/20171012T22:11:56.252172000Z_pool-72-89-41-162.nycmny.fios.verizon.net:58178.s2c_snaplog.gz 2017/10/12-22:12:07
     2017/10/12/20171012T22:11:56.911421000Z_98.217.9.215.s2c_ndttrace 2017/10/12-22:12:07
     2017/10/12/20171012T22:11:56.911421000Z_98.217.9.215:51934.cputime 2017/10/12-22:12:07
     2017/10/12/20171012T22:11:56.911421000Z_98.217.9.215:51935.s2c_snaplog 2017/10/12-22:12:07
     2017/10/12/20171012T22:12:00.876568000Z_71.187.248.40.c2s_ndttrace.gz 2017/10/12-22:12:24"""
                                   )
     with tempfile.NamedTemporaryFile() as temp:
         temp.write(serverfiles)
         temp.flush()
         fake_process = subprocess.Popen(['/bin/cat', temp.name],
                                         stdout=subprocess.PIPE)
         with mock.patch.object(subprocess, 'Popen') as mock_subprocess:
             mock_subprocess.return_value = fake_process
             files = scraper.list_rsync_files('/usr/bin/timeout',
                                              '/usr/bin/rsync', 'localhost',
                                              '/tmp')
     self.assertSetEqual(
         set([
             scraper.RemoteFile(
                 '2017/10/12/20171012T22:11:56.252172000Z_72.89.41.162.s2c_ndttrace.gz',
                 datetime.datetime(2017, 10, 12, 22, 12, 7)),
             scraper.RemoteFile(
                 '2017/10/12/20171012T22:11:56.252172000Z_pool-72-89-41-162.nycmny.fios.verizon.net:58176.cputime.gz',
                 datetime.datetime(2017, 10, 12, 22, 12, 7)),
             scraper.RemoteFile(
                 '2017/10/12/20171012T22:11:56.252172000Z_pool-72-89-41-162.nycmny.fios.verizon.net:58176.meta',
                 datetime.datetime(2017, 10, 12, 22, 12, 7)),
             scraper.RemoteFile(
                 '2017/10/12/20171012T22:11:56.252172000Z_pool-72-89-41-162.nycmny.fios.verizon.net:58178.s2c_snaplog.gz',
                 datetime.datetime(2017, 10, 12, 22, 12, 7)),
             scraper.RemoteFile(
                 '2017/10/12/20171012T22:11:56.911421000Z_98.217.9.215.s2c_ndttrace',
                 datetime.datetime(2017, 10, 12, 22, 12, 7)),
             scraper.RemoteFile(
                 '2017/10/12/20171012T22:11:56.911421000Z_98.217.9.215:51934.cputime',
                 datetime.datetime(2017, 10, 12, 22, 12, 7)),
             scraper.RemoteFile(
                 '2017/10/12/20171012T22:11:56.911421000Z_98.217.9.215:51935.s2c_snaplog',
                 datetime.datetime(2017, 10, 12, 22, 12, 7)),
             scraper.RemoteFile(
                 '2017/10/12/20171012T22:12:00.876568000Z_71.187.248.40.c2s_ndttrace.gz',
                 datetime.datetime(2017, 10, 12, 22, 12, 24))
         ]), set(files))