def test_main_with_enough_data_for_early_upload(self, mock_sleep): now = datetime.datetime.now() slept_seconds = [] mock_sleep.side_effect = slept_seconds.append # Add files for 2.1 hours ago and right now. Only the older should get # uploaded. now = datetime.datetime.now() older = now - datetime.timedelta(minutes=126) self.create_file(older) self.create_file(now) # Run the scraper, hopefully uploading only recent data. run_scraper.main([ 'run_as_e2e_test', '--num_runs', '1', '--rsync_host', 'ndt.iupui.mlab4.xxx08.measurement-lab.org', '--rsync_module', 'iupui_ndt', '--data_dir', '/scraper_data', '--metrics_port', str(EndToEndWithFakes.prometheus_port), '--max_uncompressed_size', '1024', '--data_buffer_threshold', '1023' ]) # Verify that cloud storage has been updated to 2.1 hours ago datastore_client = datastore.Client() key = datastore_client.key( 'dropboxrsyncaddress', 'rsync://ndt.iupui.mlab4.xxx08.measurement-lab.org' ':7999/iupui_ndt') value = datastore_client.get(key) time_since_epoch = scraper.datetime_to_epoch(now) self.assertLess(value['maxrawfilemtimearchived'], time_since_epoch) # Verify that the storage service received one file tgzfiles = os.listdir(self.cloud_upload_dir) self.assertEqual(len(tgzfiles), 1)
def test_main_with_no_data(self, mock_sleep): now = datetime.datetime.now() slept_seconds = [] mock_sleep.side_effect = slept_seconds.append # Verify that the recoverable exception does not rise to the top level run_scraper.main([ 'run_as_e2e_test', '--num_runs', '1', '--rsync_host', 'ndt.iupui.mlab4.xxx08.measurement-lab.org', '--rsync_module', 'iupui_ndt', '--data_dir', '/scraper_data', '--metrics_port', str(EndToEndWithFakes.prometheus_port), '--max_uncompressed_size', '1024' ]) # Verify that the sleep time is never too long for time_slept in slept_seconds: self.assertLessEqual(time_slept, 3600) # Verify that cloud storage has been updated to midnight last night datastore_client = datastore.Client() key = datastore_client.key( 'dropboxrsyncaddress', 'rsync://ndt.iupui.mlab4.xxx08.measurement-lab.org' ':7999/iupui_ndt') value = datastore_client.get(key) midnight = datetime.datetime(year=now.year, month=now.month, day=now.day) time_since_epoch = (midnight - datetime.datetime(1970, 1, 1)).total_seconds() self.assertTrue( abs(value['maxrawfilemtimearchived'] - time_since_epoch) < 5)
def test_main_with_recoverable_failure(self, _mock_sleep, mock_download): mock_download.side_effect = scraper.RecoverableScraperException( 'fake_label', 'faked_exception') # Verify that the recoverable exception does not rise to the top level run_scraper.main([ 'run_as_e2e_test', '--num_runs', '1', '--rsync_host', 'ndt.iupui.mlab4.xxx08.measurement-lab.org', '--rsync_module', 'iupui_ndt', '--data_dir', '/scraper_data', '--metrics_port', str(EndToEndWithFakes.prometheus_port), '--max_uncompressed_size', '1024' ])
def test_main(self, _mock_sleep): # Add files for yesterday and today. Only yesterday should get uploaded. now = datetime.datetime.now() self.create_file(now - datetime.timedelta(days=1, hours=9)) self.create_file(now) # Should get one tarfile uploaded, because today's data is too new. run_scraper.main([ 'run_as_e2e_test', '--num_runs', '1', '--rsync_host', 'ndt.iupui.mlab4.xxx08.measurement-lab.org', '--rsync_module', 'iupui_ndt', '--data_dir', '/scraper_data', '--metrics_port', str(EndToEndWithFakes.prometheus_port), '--max_uncompressed_size', '1024' ]) # Verify that the storage service received the file tgzfiles = os.listdir(self.cloud_upload_dir) self.assertEqual(len(tgzfiles), 1)