def test_complete_download_from_scratch(self): app_state = AppState() dconf = DownloadConfiguration(number_of_images=10, images_per_category=10, download_destination=self.image_net_home) app_state.set_configuration(dconf) downloader = StatefulDownloader(app_state) results = [] failed_urls = [] successful_urls = [] for result in downloader: results.append(result) failed_urls.extend(result.failed_urls) successful_urls.extend(result.succeeded_urls) self.assertEqual(failed_urls, []) self.assertEqual(successful_urls, ['url1', 'url2', 'url3', 'url4', 'url5']) self.assertEqual(downloader.progress_info.total_downloaded, 5) self.assertEqual(downloader.progress_info.total_failed, 0) self.assertTrue(downloader.progress_info.finished)
def test_stopping_and_resuming_with_new_instance(self): app_state = AppState() dconf = DownloadConfiguration(number_of_images=10, images_per_category=12, batch_size=3, download_destination=self.image_net_home) app_state.set_configuration(dconf) downloader = StatefulDownloader(app_state) for result in downloader: break app_state = AppState() downloader = StatefulDownloader(app_state) failed_urls = [] successful_urls = [] for result in downloader: failed_urls.extend(result.failed_urls) successful_urls.extend(result.succeeded_urls) break self.assertEqual(failed_urls, []) self.assertEqual(successful_urls, ['url4', 'url5']) self.assertEqual(downloader.progress_info.total_downloaded, 5) self.assertEqual(downloader.progress_info.total_failed, 0) self.assertTrue(downloader.progress_info.finished)
def test_remembers_number_of_images_downloaded_for_each_category(self): app_state = AppState() dconf = DownloadConfiguration(number_of_images=4, images_per_category=1, batch_size=2, download_destination=self.image_net_home) app_state.set_configuration(dconf) downloader = StatefulDownloader(app_state) for result in downloader: break app_state = AppState() downloader = StatefulDownloader(app_state) failed_urls = [] successful_urls = [] for result in downloader: failed_urls.extend(result.failed_urls) successful_urls.extend(result.succeeded_urls) break self.assertEqual(failed_urls, []) self.assertEqual(successful_urls, ['url4', 'url5']) self.assertEqual(downloader.progress_info.total_downloaded, 4) self.assertEqual(downloader.progress_info.total_failed, 0) self.assertTrue(downloader.progress_info.finished)
def test_that_images_per_category_parameter_works_correctly(self): class BatchDownloadMocked(batch_download.BatchDownload): def do_download(self, urls, destinations): return [], urls downloaded = [] conf = DownloadConfiguration( number_of_images=100, images_per_category=2, download_destination=self.dataset_location, batch_size=3) d = BatchDownloadMocked(conf) d.add('n123', 'url1') d.add('n999', 'url2') d.add('n123', 'url3') failed, downloaded_urls = d.flush() downloaded.extend(downloaded_urls) d.add('n123', 'url4') d.add('n999', 'url5') d.add('n555', 'url6') d.add('n555', 'url7') failed, downloaded_urls = d.flush() downloaded.extend(downloaded_urls) self.assertEqual(downloaded, ['url1', 'url2', 'url3', 'url5', 'url6', 'url7'])
def test_creates_files_as_expected(self): app_state = AppState() dconf = DownloadConfiguration(number_of_images=4, images_per_category=1, batch_size=2, download_destination=self.image_net_home) app_state.set_configuration(dconf) downloader = StatefulDownloader(app_state) for result in downloader: break app_state = AppState() downloader = StatefulDownloader(app_state) for result in downloader: pass fnames = [] for dirname, dirs, file_names in os.walk(self.image_net_home): fnames.extend(file_names) expected_names = ['1', '2', '3', '4'] self.assertEqual(set(fnames), set(expected_names))
def test_completed(self): class BatchDownloadMocked(batch_download.BatchDownload): def do_download(self, urls, destinations): failed_urls = [urls[0]] succeeded_urls = [urls[1]] return failed_urls, succeeded_urls conf = DownloadConfiguration( number_of_images=2, images_per_category=100, download_destination=self.dataset_location, batch_size=2) d = BatchDownloadMocked(conf) self.assertFalse(d.complete) d.add('wn1', 'url1') d.add('wn2', 'url3') d.flush() self.assertFalse(d.complete) d.add('wn1', 'url42') d.add('wn5', 'url2') self.assertFalse(d.complete) d.flush() self.assertTrue(d.complete)
def test_start_pause_and_resume(self): app_state = AppState() manager = DownloadManager(app_state) conf = DownloadConfiguration(number_of_images=5, images_per_category=1, download_destination=self.image_net_home) app_state.set_configuration(conf) paused_spy = QSignalSpy(manager.downloadPaused) resumed_spy = QSignalSpy(manager.downloadResumed) finished_spy = QSignalSpy(manager.allDownloaded) manager.start() manager.pause_download() received = paused_spy.wait(timeout=500) self.assertTrue(received) time.sleep(0.5) manager.resume_download() received = finished_spy.wait(timeout=500) self.assertTrue(received) self._assert_expected_directories_exist() self._assert_files_are_correct() self.stop_the_thread(manager)
def test_reconfiguration(self): app_state = AppState() dconf = DownloadConfiguration(number_of_images=4, images_per_category=1, batch_size=2, download_destination=self.image_net_home) app_state.set_configuration(dconf) downloader = StatefulDownloader(app_state) for result in downloader: break shutil.rmtree(self.image_net_home) os.makedirs(self.image_net_home) app_state = AppState() downloader = StatefulDownloader(app_state) dconf = DownloadConfiguration(number_of_images=2, images_per_category=2, batch_size=2, download_destination=self.image_net_home) app_state.set_configuration(dconf) failed_urls = [] successful_urls = [] for result in downloader: failed_urls.extend(result.failed_urls) successful_urls.extend(result.succeeded_urls) break self.assertEqual(successful_urls, ['url1', 'url2']) self.assertEqual(downloader.progress_info.total_downloaded, 2) self.assertEqual(downloader.progress_info.total_failed, 0) fnames = [] for dirname, dirs, file_names in os.walk(self.image_net_home): fnames.extend(file_names) expected_names = ['1', '2'] self.assertEqual(set(fnames), set(expected_names))
def test_progress_zero_by_zero(self): app_state = AppState() new_conf = DownloadConfiguration(number_of_images=0, images_per_category=83, download_destination='481516') app_state.set_configuration(new_conf) self.assertAlmostEqual(app_state.calculate_progress(), 0)
def test_is_not_valid(self): conf = DownloadConfiguration(number_of_images=-212, images_per_category=33, download_destination='') self.assertFalse(conf.is_valid) self.assertEqual(conf.errors, [ 'Destination folder for ImageNet was not specified', 'Number of images must be greater than 0' ]) conf = DownloadConfiguration(number_of_images=1, images_per_category=0, download_destination='faef') self.assertFalse(conf.is_valid) self.assertEqual(conf.errors, [ 'Path "{}" does not exist'.format(os.path.abspath('faef')), 'Images per category must be greater than 0' ])
def test_errors_after_new_configuration(self): app_state = AppState() new_conf = DownloadConfiguration(number_of_images=0, images_per_category=83, download_destination='481516') app_state.add_error('abc') app_state.set_configuration(new_conf) self.assertEqual(app_state.errors, [])
def test_inprogress_after_configuring(self): app_state = AppState() app_state.update_progress(result=Result([''], ['afef'])) app_state.mark_finished() new_conf = DownloadConfiguration(number_of_images=0, images_per_category=83, download_destination='481516') app_state.set_configuration(new_conf) self.assertFalse(app_state.inprogress)
def configure(self, destination, number_of_images, images_per_category): if self._state not in ['initial', 'ready']: return self._app_state.reset() conf = DownloadConfiguration(number_of_images=number_of_images, images_per_category=images_per_category, download_destination=destination, batch_size=config.default_batch_size) if conf.is_valid: self._state = 'ready' path = self._parse_url(destination) conf.download_destination = path self._app_state.set_configuration(conf) else: self._state = 'initial' self._generate_error_messages(conf) self.stateChanged.emit()
def test_folders_are_created(self): app_state = AppState() manager = DownloadManager(app_state) conf = DownloadConfiguration(number_of_images=5, images_per_category=10, download_destination=self.image_net_home) app_state.set_configuration(conf) self.wait_for_completion(manager) self._assert_expected_directories_exist() self.stop_the_thread(manager)
def test_case_when_requested_number_of_images_is_greater_than_total(self): app_state = AppState() manager = DownloadManager(app_state) conf = DownloadConfiguration(number_of_images=50, images_per_category=100, download_destination=self.image_net_home) app_state.set_configuration(conf) self.wait_for_completion(manager) self._assert_files_are_correct() self.stop_the_thread(manager)
def _assert_signal_emitted(self, signal): app_state = AppState() manager = DownloadManager(app_state) conf = DownloadConfiguration(number_of_images=5, images_per_category=10, download_destination=self.image_net_home) app_state.set_configuration(conf) signal = getattr(manager, signal) spy = QSignalSpy(signal) manager.start() received = spy.wait(timeout=500) self.assertTrue(received) self.stop_the_thread(manager)
def test_progress(self): app_state = AppState() new_conf = DownloadConfiguration(number_of_images=10, images_per_category=83, download_destination='481516') last_result = Result(failed_urls=['1', 'one'], succeeded_urls=['x']) progress_info = ProgressInfo(total_downloaded=9, total_failed=38, finished=False, last_result=last_result) app_state.set_configuration(new_conf) app_state.set_progress_info(progress_info) self.assertAlmostEqual(app_state.calculate_progress(), 0.9)
def test_data_persistence(self): app_state = AppState() new_conf = DownloadConfiguration(number_of_images=9309, images_per_category=83, download_destination='481516') last_result = Result(failed_urls=['1', 'one'], succeeded_urls=['x']) progress_info = ProgressInfo(total_downloaded=192, total_failed=38, finished=False, last_result=last_result) position = Position(3, 1) counts = {'wnid1': 29, 'wnid10': 3} internal = InternalState(iterator_position=position, category_counts=counts, file_index=322) app_state.set_configuration(new_conf) app_state.set_progress_info(progress_info) app_state.set_internal_state(internal) app_state.save() app_state = AppState() conf = app_state.download_configuration self.assertEqual(conf.download_destination, '481516') self.assertEqual(conf.number_of_images, 9309) self.assertEqual(conf.images_per_category, 83) progress_info = app_state.progress_info self.assertEqual(progress_info.total_downloaded, 192) self.assertEqual(progress_info.total_failed, 38) self.assertEqual(progress_info.finished, False) self.assertEqual(progress_info.last_result.failed_urls, ['1', 'one']) self.assertEqual(progress_info.last_result.succeeded_urls, ['x']) internal = app_state.internal_state self.assertEqual(internal.iterator_position.word_id_offset, 3) self.assertEqual(internal.iterator_position.url_offset, 1) self.assertEqual(internal.category_counts, counts) self.assertEqual(internal.file_index, 322)
def test_images_per_category_argument(self): app_state = AppState() manager = DownloadManager(app_state) conf = DownloadConfiguration(number_of_images=5, images_per_category=1, download_destination=self.image_net_home, batch_size=1) app_state.set_configuration(conf) self.wait_for_completion(manager) files_count = 0 for dirname, dirs, file_names in os.walk(self.image_net_home): files_count += len(file_names) self.assertEqual(files_count, 2) self.stop_the_thread(manager)
def test_flush_downloads_correctly(self): class BatchDownloadMocked(batch_download.BatchDownload): def do_download(self, urls, destinations): failed_urls = ['url1', 'url3'] succeeded_urls = ['url2'] return failed_urls, succeeded_urls conf = DownloadConfiguration( number_of_images=100, images_per_category=100, download_destination=self.dataset_location, batch_size=100) d = BatchDownloadMocked(conf) for wn_id, url in [('wn1', 'url1'), ('wn1', 'url2'), ('wn3', 'url3')]: d.add(wn_id, url) failed, downloaded = d.flush() self.assertEqual(failed, ['url1', 'url3']) self.assertEqual(downloaded, ['url2'])
def test_flush_creates_directories(self): class BatchDownloadMocked(batch_download.BatchDownload): def do_download(self, urls, destinations): return [], urls conf = DownloadConfiguration( number_of_images=100, images_per_category=100, download_destination=self.dataset_location, batch_size=100) d = BatchDownloadMocked(conf) for wn_id, url in [('wn1', 'url1'), ('wn2', 'url2'), ('wn2', 'x')]: d.add(wn_id, url) d.flush() dirs = [] for dirname, dir_names, file_names in os.walk(self.dataset_location): dirs.extend(dir_names) self.assertEqual(dirs, ['wn1', 'wn2'])
def test_with_both_limiting_parameters(self): class BatchDownloadMocked(batch_download.BatchDownload): def do_download(self, urls, destinations): return [], urls conf = DownloadConfiguration( number_of_images=7, images_per_category=2, download_destination=self.dataset_location, batch_size=2) d = BatchDownloadMocked(conf) d.add('n1', 'url1') d.add('n1', 'url2') self.assertTrue(d.batch_ready) d.flush() d.add('n1', 'url3') d.add('n2', 'url4') self.assertFalse(d.batch_ready) d.add('n3', 'url5') self.assertTrue(d.batch_ready) d.flush() d.add('n3', 'url6') d.add('n3', 'url7') self.assertTrue(d.batch_ready) d.flush() d.add('n3', 'url8') d.add('n4', 'url9') self.assertFalse(d.batch_ready) self.assertFalse(d.complete) d.add('n5', 'url10') d.flush() self.assertTrue(d.complete)
def test_to_json(self): app_state = AppState() new_conf = DownloadConfiguration(number_of_images=9309, images_per_category=83, download_destination='481516') last_result = Result(failed_urls=['1', 'one'], succeeded_urls=['x']) progress_info = ProgressInfo(total_downloaded=192, total_failed=38, finished=False, last_result=last_result) position = Position(3, 1) counts = {'wnid1': 29, 'wnid10': 3} internal = InternalState(iterator_position=position, category_counts=counts, file_index=322) app_state.set_configuration(new_conf) app_state.set_progress_info(progress_info) app_state.set_internal_state(internal) app_state.add_error('Some error') state_data = json.loads(app_state.to_json()) self.assertEqual(state_data['downloadPath'], '481516') self.assertEqual(state_data['numberOfImages'], 9309) self.assertEqual(state_data['imagesPerCategory'], 83) self.assertNotEqual(state_data['timeLeft'], '') self.assertEqual(state_data['imagesLoaded'], 192) self.assertEqual(state_data['failures'], 38) self.assertEqual(state_data['failedUrls'], ['1', 'one']) self.assertEqual(state_data['succeededUrls'], ['x']) self.assertEqual(state_data['errors'], ['Some error']) self.assertAlmostEqual(state_data['progress'], 192.0 / 9309)
def test_complete_after_getting_more_images_than_was_requested(self): class BatchDownloadMocked(batch_download.BatchDownload): def do_download(self, urls, destinations): return [], urls conf = DownloadConfiguration( number_of_images=3, images_per_category=100, download_destination=self.dataset_location, batch_size=2) d = BatchDownloadMocked(conf) d.add('wn1', 'url1') d.add('wn2', 'url3') d.flush() self.assertFalse(d.complete) d.add('wn1', 'url42') d.add('wn5', 'url2') d.flush() self.assertTrue(d.complete)
def test_flush_removes_elements_in_buffer(self): class BatchDownloadMocked(batch_download.BatchDownload): def do_download(self, urls, destinations): failed_urls = urls succeeded_urls = [] return failed_urls, succeeded_urls conf = DownloadConfiguration( number_of_images=100, images_per_category=100, download_destination=self.dataset_location, batch_size=2) d = BatchDownloadMocked(conf) d.add('wn1', 'url1') d.add('wn2', 'url2') d.add('wn3', 'url3') d.flush() failed, downloaded = d.flush() self.assertEqual(failed, []) self.assertEqual(downloaded, [])
def test_batch_ready(self): class BatchDownloadMocked(batch_download.BatchDownload): def do_download(self, urls, destinations): failed_urls = [urls[0]] succeeded_urls = [urls[1]] return failed_urls, succeeded_urls conf = DownloadConfiguration( number_of_images=100, images_per_category=100, download_destination=self.dataset_location, batch_size=2) d = BatchDownloadMocked(conf) d.add('wn1', 'url1') self.assertFalse(d.batch_ready) d.add('wn1', 'url2') self.assertTrue(d.batch_ready) failed, downloaded = d.flush() self.assertEqual(failed, ['url1']) self.assertEqual(downloaded, ['url2'])
def test_destination_paths(self): paths = [] class BatchDownloadMocked(batch_download.BatchDownload): def do_download(self, urls, destinations): paths.extend(destinations) return [], urls conf = DownloadConfiguration( number_of_images=100, images_per_category=100, download_destination=self.dataset_location, batch_size=3) d = BatchDownloadMocked(conf) d.add('dogs', 'url1.jpg') d.add('cats', 'url2.png') d.add('dogs', 'url2.gif') d.flush() first = os.path.join(self.dataset_location, 'dogs', '1.jpg') second = os.path.join(self.dataset_location, 'cats', '2.png') third = os.path.join(self.dataset_location, 'dogs', '3.gif') self.assertEqual(paths, [first, second, third])
def test_is_valid(self): conf = DownloadConfiguration(number_of_images=1, images_per_category=1, download_destination='temp') self.assertTrue(conf.is_valid) self.assertEqual(conf.errors, [])