def test_download_files_fails_and_dies(self): with testfixtures.LogCapture() as log: with self.assertRaises(scraper.RecoverableScraperException): scraper.download_files( '/usr/bin/timeout', '/bin/false', 'localhost/', [ scraper.RemoteFile('2016/10/26/DNE1', 0), scraper.RemoteFile('2016/10/26/DNE2', 0) ], '/tmp') self.assertIn('ERROR', [x.levelname for x in log.records])
def test_download_files_breaks_up_long_file_list(self, patched_call): files_to_download = [ scraper.RemoteFile('2016/10/26/DNE.%d' % i, 0) for i in range(100070) ] files_downloaded = [] def verify_contents(args): # Verify that the third-to-last argument to check_call is a filename # that contains the right data (specifically, the filenames). This # test needs to be kept in sync with the order of command-line # arguments passed to the rsync call. file_with_filenames = args[-3] files = file(file_with_filenames).read().split('\0') self.assertTrue(len(files) > 0) self.assertTrue(len(files) <= 1000) files_downloaded.extend(files) return 0 patched_call.side_effect = verify_contents scraper.download_files('/usr/bin/timeout', '/bin/true', 'localhost/', files_to_download, '/tmp') self.assertEqual(set(x.filename for x in files_to_download), set(files_downloaded)) self.assertEqual(patched_call.call_count, 101)
def test_download_files(self, patched_call): files_to_download = [ scraper.RemoteFile('2016/10/26/DNE1', 0), scraper.RemoteFile('2016/10/26/DNE2', 0) ] def verify_contents(args): # Verify that the third-to-last argument to check_call is a filename # that contains the right data (specifically, the filenames). This # test needs to be kept in sync with the order of command-line # arguments passed to the rsync call. file_with_filenames = args[-3] files_downloaded = file(file_with_filenames).read().split('\0') self.assertEqual(files_downloaded, [x.filename for x in files_to_download]) return 0 patched_call.side_effect = verify_contents self.assertEqual(patched_call.call_count, 0) scraper.download_files('/usr/bin/timeout', '/bin/true', 'localhost/', files_to_download, '/tmp') self.assertEqual(patched_call.call_count, 1)
def test_list_rsync_files_returns_24(self, patched_subprocess): # pylint: disable=line-too-long serverfiles = textwrap.dedent("""\ . 2016/ 2016/01/ 2016/01/06-05:12:07 2016/01/06/ 2016/01/06-05:12:07 2016/01/06/.gz 2016/01/06-05:12:07 2016/01/06/20160106T05:43:32.741066000Z_:0.cputime.gz 2016/01/06-05:43:32 2016/01/06/20160106T05:43:32.741066000Z_:0.meta 2016/01/06-05:43:32 2016/01/06/20160106T18:07:33.122784000Z_:0.cputime.gz 2016/01/06-18:07:33 BADBADBAD 2016/01/06/20160106T18:07:33.122784000Z_:0.meta 2016/01/06-18:07:33 2016/01/06/20160106T22:31:57.229531000Z_:0.cputime.gz 2016/01/06-22:31:57""" ) mock_process = mock.Mock() mock_process.returncode = 24 patched_subprocess.return_value = mock_process mock_process.stdout = serverfiles.splitlines() files = set( scraper.list_rsync_files('/usr/bin/timeout', '/usr/bin/rsync', 'localhost', '')) self.assertSetEqual( set([ scraper.RemoteFile('2016/01/06/.gz', datetime.datetime(2016, 1, 6, 5, 12, 7)), scraper.RemoteFile( '2016/01/06/20160106T05:43:32.741066000Z_:0.cputime.gz', datetime.datetime(2016, 1, 6, 5, 43, 32)), scraper.RemoteFile( '2016/01/06/20160106T05:43:32.741066000Z_:0.meta', datetime.datetime(2016, 1, 6, 5, 43, 32)), scraper.RemoteFile( '2016/01/06/20160106T18:07:33.122784000Z_:0.cputime.gz', datetime.datetime(2016, 1, 6, 18, 7, 33)), scraper.RemoteFile( '2016/01/06/20160106T18:07:33.122784000Z_:0.meta', datetime.datetime(2016, 1, 6, 18, 7, 33)), scraper.RemoteFile( '2016/01/06/20160106T22:31:57.229531000Z_:0.cputime.gz', datetime.datetime(2016, 1, 6, 22, 31, 57)) ]), files)
def test_list_rsync_files(self): # pylint: disable=line-too-long serverfiles = textwrap.dedent("""\ opening tcp connection to ndt.iupui.mlab1.lga05.measurement-lab.org port 7999 sending daemon args: --server --sender -vvnlogDtprze.iLsfxC --timeout=300 --bwlimit=10000 . iupui_ndt/ (7 args) receiving incremental file list delta-transmission enabled 2017/10/12/.gz is uptodate 2017/10/12/20171012T22:00:14.809574000Z_66-87-124-30.pools.spcsdns.net:20450.s2c_snaplog.gz is uptodate 2017/10/12/20171012T22:00:14.809574000Z_66-87-124-30.pools.spcsdns.net:5231.cputime.gz is uptodate 2017/10/12/20171012T22:00:14.809574000Z_66-87-124-30.pools.spcsdns.net:5231.meta is uptodate 2017/10/12/20171012T22:11:50.24974000Z_pool-71-187-248-40.nwrknj.fios.verizon.net:58633.cputime.gz is uptodate 2017/10/12/20171012T22:11:50.24974000Z_pool-71-187-248-40.nwrknj.fios.verizon.net:58633.meta is uptodate 2017/10/12/20171012T22:11:50.24974000Z_pool-71-187-248-40.nwrknj.fios.verizon.net:58634.s2c_snaplog.gz is uptodate [receiver] expand file_list pointer array to 524288 bytes, did move [generator] expand file_list pointer array to 524288 bytes, did move 2017/10/12/ 2017/10/13-08:51:08 2017/10/12/20171012T22:11:56.252172000Z_72.89.41.162.s2c_ndttrace.gz 2017/10/12-22:12:07 2017/10/12/20171012T22:11:56.252172000Z_pool-72-89-41-162.nycmny.fios.verizon.net:58176.cputime.gz 2017/10/12-22:12:07 2017/10/12/20171012T22:11:56.252172000Z_pool-72-89-41-162.nycmny.fios.verizon.net:58176.meta 2017/10/12-22:12:07 2017/10/12/20171012T22:11:56.252172000Z_pool-72-89-41-162.nycmny.fios.verizon.net:58178.s2c_snaplog.gz 2017/10/12-22:12:07 2017/10/12/20171012T22:11:56.911421000Z_98.217.9.215.s2c_ndttrace 2017/10/12-22:12:07 2017/10/12/20171012T22:11:56.911421000Z_98.217.9.215:51934.cputime 2017/10/12-22:12:07 2017/10/12/20171012T22:11:56.911421000Z_98.217.9.215:51935.s2c_snaplog 2017/10/12-22:12:07 2017/10/12/20171012T22:12:00.876568000Z_71.187.248.40.c2s_ndttrace.gz 2017/10/12-22:12:24""" ) with tempfile.NamedTemporaryFile() as temp: temp.write(serverfiles) temp.flush() fake_process = subprocess.Popen(['/bin/cat', temp.name], stdout=subprocess.PIPE) with mock.patch.object(subprocess, 'Popen') as mock_subprocess: mock_subprocess.return_value = fake_process files = scraper.list_rsync_files('/usr/bin/timeout', '/usr/bin/rsync', 'localhost', '/tmp') self.assertSetEqual( set([ scraper.RemoteFile( '2017/10/12/20171012T22:11:56.252172000Z_72.89.41.162.s2c_ndttrace.gz', datetime.datetime(2017, 10, 12, 22, 12, 7)), scraper.RemoteFile( '2017/10/12/20171012T22:11:56.252172000Z_pool-72-89-41-162.nycmny.fios.verizon.net:58176.cputime.gz', datetime.datetime(2017, 10, 12, 22, 12, 7)), scraper.RemoteFile( '2017/10/12/20171012T22:11:56.252172000Z_pool-72-89-41-162.nycmny.fios.verizon.net:58176.meta', datetime.datetime(2017, 10, 12, 22, 12, 7)), scraper.RemoteFile( '2017/10/12/20171012T22:11:56.252172000Z_pool-72-89-41-162.nycmny.fios.verizon.net:58178.s2c_snaplog.gz', datetime.datetime(2017, 10, 12, 22, 12, 7)), scraper.RemoteFile( '2017/10/12/20171012T22:11:56.911421000Z_98.217.9.215.s2c_ndttrace', datetime.datetime(2017, 10, 12, 22, 12, 7)), scraper.RemoteFile( '2017/10/12/20171012T22:11:56.911421000Z_98.217.9.215:51934.cputime', datetime.datetime(2017, 10, 12, 22, 12, 7)), scraper.RemoteFile( '2017/10/12/20171012T22:11:56.911421000Z_98.217.9.215:51935.s2c_snaplog', datetime.datetime(2017, 10, 12, 22, 12, 7)), scraper.RemoteFile( '2017/10/12/20171012T22:12:00.876568000Z_71.187.248.40.c2s_ndttrace.gz', datetime.datetime(2017, 10, 12, 22, 12, 24)) ]), set(files))