예제 #1
0
    def test_complete_download_from_scratch(self):
        app_state = AppState()

        dconf = DownloadConfiguration(number_of_images=10,
                                      images_per_category=10,
                                      download_destination=self.image_net_home)
        app_state.set_configuration(dconf)
        downloader = StatefulDownloader(app_state)

        results = []
        failed_urls = []
        successful_urls = []
        for result in downloader:
            results.append(result)
            failed_urls.extend(result.failed_urls)
            successful_urls.extend(result.succeeded_urls)

        self.assertEqual(failed_urls, [])
        self.assertEqual(successful_urls,
                         ['url1', 'url2', 'url3', 'url4', 'url5'])

        self.assertEqual(downloader.progress_info.total_downloaded, 5)
        self.assertEqual(downloader.progress_info.total_failed, 0)

        self.assertTrue(downloader.progress_info.finished)
예제 #2
0
    def test_stopping_and_resuming_with_new_instance(self):
        app_state = AppState()

        dconf = DownloadConfiguration(number_of_images=10,
                                      images_per_category=12,
                                      batch_size=3,
                                      download_destination=self.image_net_home)
        app_state.set_configuration(dconf)
        downloader = StatefulDownloader(app_state)

        for result in downloader:
            break

        app_state = AppState()
        downloader = StatefulDownloader(app_state)

        failed_urls = []
        successful_urls = []
        for result in downloader:
            failed_urls.extend(result.failed_urls)
            successful_urls.extend(result.succeeded_urls)
            break

        self.assertEqual(failed_urls, [])
        self.assertEqual(successful_urls, ['url4', 'url5'])

        self.assertEqual(downloader.progress_info.total_downloaded, 5)
        self.assertEqual(downloader.progress_info.total_failed, 0)

        self.assertTrue(downloader.progress_info.finished)
예제 #3
0
    def test_remembers_number_of_images_downloaded_for_each_category(self):
        app_state = AppState()

        dconf = DownloadConfiguration(number_of_images=4,
                                      images_per_category=1,
                                      batch_size=2,
                                      download_destination=self.image_net_home)

        app_state.set_configuration(dconf)
        downloader = StatefulDownloader(app_state)

        for result in downloader:
            break

        app_state = AppState()

        downloader = StatefulDownloader(app_state)

        failed_urls = []
        successful_urls = []
        for result in downloader:
            failed_urls.extend(result.failed_urls)
            successful_urls.extend(result.succeeded_urls)
            break

        self.assertEqual(failed_urls, [])
        self.assertEqual(successful_urls, ['url4', 'url5'])

        self.assertEqual(downloader.progress_info.total_downloaded, 4)
        self.assertEqual(downloader.progress_info.total_failed, 0)

        self.assertTrue(downloader.progress_info.finished)
    def test_that_images_per_category_parameter_works_correctly(self):
        class BatchDownloadMocked(batch_download.BatchDownload):
            def do_download(self, urls, destinations):
                return [], urls

        downloaded = []

        conf = DownloadConfiguration(
            number_of_images=100,
            images_per_category=2,
            download_destination=self.dataset_location,
            batch_size=3)
        d = BatchDownloadMocked(conf)

        d.add('n123', 'url1')
        d.add('n999', 'url2')
        d.add('n123', 'url3')
        failed, downloaded_urls = d.flush()
        downloaded.extend(downloaded_urls)

        d.add('n123', 'url4')
        d.add('n999', 'url5')
        d.add('n555', 'url6')
        d.add('n555', 'url7')

        failed, downloaded_urls = d.flush()
        downloaded.extend(downloaded_urls)

        self.assertEqual(downloaded,
                         ['url1', 'url2', 'url3', 'url5', 'url6', 'url7'])
예제 #5
0
    def test_creates_files_as_expected(self):
        app_state = AppState()

        dconf = DownloadConfiguration(number_of_images=4,
                                      images_per_category=1,
                                      batch_size=2,
                                      download_destination=self.image_net_home)
        app_state.set_configuration(dconf)
        downloader = StatefulDownloader(app_state)

        for result in downloader:
            break

        app_state = AppState()

        downloader = StatefulDownloader(app_state)
        for result in downloader:
            pass

        fnames = []
        for dirname, dirs, file_names in os.walk(self.image_net_home):
            fnames.extend(file_names)

        expected_names = ['1', '2', '3', '4']

        self.assertEqual(set(fnames), set(expected_names))
    def test_completed(self):
        class BatchDownloadMocked(batch_download.BatchDownload):
            def do_download(self, urls, destinations):
                failed_urls = [urls[0]]
                succeeded_urls = [urls[1]]
                return failed_urls, succeeded_urls

        conf = DownloadConfiguration(
            number_of_images=2,
            images_per_category=100,
            download_destination=self.dataset_location,
            batch_size=2)
        d = BatchDownloadMocked(conf)

        self.assertFalse(d.complete)

        d.add('wn1', 'url1')
        d.add('wn2', 'url3')
        d.flush()

        self.assertFalse(d.complete)

        d.add('wn1', 'url42')
        d.add('wn5', 'url2')
        self.assertFalse(d.complete)

        d.flush()
        self.assertTrue(d.complete)
    def test_start_pause_and_resume(self):
        app_state = AppState()

        manager = DownloadManager(app_state)

        conf = DownloadConfiguration(number_of_images=5,
                                     images_per_category=1,
                                     download_destination=self.image_net_home)

        app_state.set_configuration(conf)

        paused_spy = QSignalSpy(manager.downloadPaused)
        resumed_spy = QSignalSpy(manager.downloadResumed)
        finished_spy = QSignalSpy(manager.allDownloaded)

        manager.start()
        manager.pause_download()
        received = paused_spy.wait(timeout=500)
        self.assertTrue(received)

        time.sleep(0.5)

        manager.resume_download()

        received = finished_spy.wait(timeout=500)
        self.assertTrue(received)

        self._assert_expected_directories_exist()
        self._assert_files_are_correct()

        self.stop_the_thread(manager)
예제 #8
0
    def test_reconfiguration(self):
        app_state = AppState()

        dconf = DownloadConfiguration(number_of_images=4,
                                      images_per_category=1,
                                      batch_size=2,
                                      download_destination=self.image_net_home)
        app_state.set_configuration(dconf)
        downloader = StatefulDownloader(app_state)

        for result in downloader:
            break

        shutil.rmtree(self.image_net_home)
        os.makedirs(self.image_net_home)

        app_state = AppState()

        downloader = StatefulDownloader(app_state)
        dconf = DownloadConfiguration(number_of_images=2,
                                      images_per_category=2,
                                      batch_size=2,
                                      download_destination=self.image_net_home)

        app_state.set_configuration(dconf)

        failed_urls = []
        successful_urls = []
        for result in downloader:
            failed_urls.extend(result.failed_urls)
            successful_urls.extend(result.succeeded_urls)
            break

        self.assertEqual(successful_urls, ['url1', 'url2'])

        self.assertEqual(downloader.progress_info.total_downloaded, 2)
        self.assertEqual(downloader.progress_info.total_failed, 0)

        fnames = []
        for dirname, dirs, file_names in os.walk(self.image_net_home):
            fnames.extend(file_names)

        expected_names = ['1', '2']

        self.assertEqual(set(fnames), set(expected_names))
    def test_progress_zero_by_zero(self):
        app_state = AppState()

        new_conf = DownloadConfiguration(number_of_images=0,
                                         images_per_category=83,
                                         download_destination='481516')

        app_state.set_configuration(new_conf)

        self.assertAlmostEqual(app_state.calculate_progress(), 0)
    def test_is_not_valid(self):
        conf = DownloadConfiguration(number_of_images=-212,
                                     images_per_category=33,
                                     download_destination='')
        self.assertFalse(conf.is_valid)

        self.assertEqual(conf.errors, [
            'Destination folder for ImageNet was not specified',
            'Number of images must be greater than 0'
        ])

        conf = DownloadConfiguration(number_of_images=1,
                                     images_per_category=0,
                                     download_destination='faef')
        self.assertFalse(conf.is_valid)

        self.assertEqual(conf.errors, [
            'Path "{}" does not exist'.format(os.path.abspath('faef')),
            'Images per category must be greater than 0'
        ])
    def test_errors_after_new_configuration(self):
        app_state = AppState()

        new_conf = DownloadConfiguration(number_of_images=0,
                                         images_per_category=83,
                                         download_destination='481516')

        app_state.add_error('abc')

        app_state.set_configuration(new_conf)
        self.assertEqual(app_state.errors, [])
    def test_inprogress_after_configuring(self):
        app_state = AppState()
        app_state.update_progress(result=Result([''], ['afef']))
        app_state.mark_finished()

        new_conf = DownloadConfiguration(number_of_images=0,
                                         images_per_category=83,
                                         download_destination='481516')

        app_state.set_configuration(new_conf)
        self.assertFalse(app_state.inprogress)
    def configure(self, destination, number_of_images,
                  images_per_category):
        if self._state not in ['initial', 'ready']:
            return

        self._app_state.reset()

        conf = DownloadConfiguration(number_of_images=number_of_images,
                                     images_per_category=images_per_category,
                                     download_destination=destination,
                                     batch_size=config.default_batch_size)
        if conf.is_valid:
            self._state = 'ready'
            path = self._parse_url(destination)
            conf.download_destination = path
            self._app_state.set_configuration(conf)
        else:
            self._state = 'initial'
            self._generate_error_messages(conf)

        self.stateChanged.emit()
    def test_folders_are_created(self):
        app_state = AppState()
        manager = DownloadManager(app_state)
        conf = DownloadConfiguration(number_of_images=5,
                                     images_per_category=10,
                                     download_destination=self.image_net_home)

        app_state.set_configuration(conf)

        self.wait_for_completion(manager)
        self._assert_expected_directories_exist()
        self.stop_the_thread(manager)
    def test_case_when_requested_number_of_images_is_greater_than_total(self):
        app_state = AppState()
        manager = DownloadManager(app_state)

        conf = DownloadConfiguration(number_of_images=50,
                                     images_per_category=100,
                                     download_destination=self.image_net_home)

        app_state.set_configuration(conf)

        self.wait_for_completion(manager)
        self._assert_files_are_correct()
        self.stop_the_thread(manager)
    def _assert_signal_emitted(self, signal):
        app_state = AppState()
        manager = DownloadManager(app_state)

        conf = DownloadConfiguration(number_of_images=5,
                                     images_per_category=10,
                                     download_destination=self.image_net_home)
        app_state.set_configuration(conf)
        signal = getattr(manager, signal)
        spy = QSignalSpy(signal)
        manager.start()
        received = spy.wait(timeout=500)
        self.assertTrue(received)

        self.stop_the_thread(manager)
    def test_progress(self):
        app_state = AppState()

        new_conf = DownloadConfiguration(number_of_images=10,
                                         images_per_category=83,
                                         download_destination='481516')

        last_result = Result(failed_urls=['1', 'one'], succeeded_urls=['x'])
        progress_info = ProgressInfo(total_downloaded=9,
                                     total_failed=38,
                                     finished=False,
                                     last_result=last_result)

        app_state.set_configuration(new_conf)
        app_state.set_progress_info(progress_info)

        self.assertAlmostEqual(app_state.calculate_progress(), 0.9)
    def test_data_persistence(self):
        app_state = AppState()

        new_conf = DownloadConfiguration(number_of_images=9309,
                                         images_per_category=83,
                                         download_destination='481516')

        last_result = Result(failed_urls=['1', 'one'], succeeded_urls=['x'])
        progress_info = ProgressInfo(total_downloaded=192,
                                     total_failed=38,
                                     finished=False,
                                     last_result=last_result)

        position = Position(3, 1)
        counts = {'wnid1': 29, 'wnid10': 3}
        internal = InternalState(iterator_position=position,
                                 category_counts=counts,
                                 file_index=322)

        app_state.set_configuration(new_conf)
        app_state.set_progress_info(progress_info)
        app_state.set_internal_state(internal)
        app_state.save()

        app_state = AppState()

        conf = app_state.download_configuration
        self.assertEqual(conf.download_destination, '481516')
        self.assertEqual(conf.number_of_images, 9309)
        self.assertEqual(conf.images_per_category, 83)

        progress_info = app_state.progress_info
        self.assertEqual(progress_info.total_downloaded, 192)
        self.assertEqual(progress_info.total_failed, 38)
        self.assertEqual(progress_info.finished, False)
        self.assertEqual(progress_info.last_result.failed_urls, ['1', 'one'])

        self.assertEqual(progress_info.last_result.succeeded_urls, ['x'])

        internal = app_state.internal_state
        self.assertEqual(internal.iterator_position.word_id_offset, 3)
        self.assertEqual(internal.iterator_position.url_offset, 1)
        self.assertEqual(internal.category_counts, counts)
        self.assertEqual(internal.file_index, 322)
    def test_images_per_category_argument(self):
        app_state = AppState()
        manager = DownloadManager(app_state)

        conf = DownloadConfiguration(number_of_images=5,
                                     images_per_category=1,
                                     download_destination=self.image_net_home,
                                     batch_size=1)

        app_state.set_configuration(conf)

        self.wait_for_completion(manager)

        files_count = 0
        for dirname, dirs, file_names in os.walk(self.image_net_home):
            files_count += len(file_names)
        self.assertEqual(files_count, 2)

        self.stop_the_thread(manager)
    def test_flush_downloads_correctly(self):
        class BatchDownloadMocked(batch_download.BatchDownload):
            def do_download(self, urls, destinations):
                failed_urls = ['url1', 'url3']
                succeeded_urls = ['url2']
                return failed_urls, succeeded_urls

        conf = DownloadConfiguration(
            number_of_images=100,
            images_per_category=100,
            download_destination=self.dataset_location,
            batch_size=100)
        d = BatchDownloadMocked(conf)

        for wn_id, url in [('wn1', 'url1'), ('wn1', 'url2'), ('wn3', 'url3')]:
            d.add(wn_id, url)

        failed, downloaded = d.flush()

        self.assertEqual(failed, ['url1', 'url3'])
        self.assertEqual(downloaded, ['url2'])
    def test_flush_creates_directories(self):
        class BatchDownloadMocked(batch_download.BatchDownload):
            def do_download(self, urls, destinations):
                return [], urls

        conf = DownloadConfiguration(
            number_of_images=100,
            images_per_category=100,
            download_destination=self.dataset_location,
            batch_size=100)
        d = BatchDownloadMocked(conf)

        for wn_id, url in [('wn1', 'url1'), ('wn2', 'url2'), ('wn2', 'x')]:
            d.add(wn_id, url)

        d.flush()

        dirs = []
        for dirname, dir_names, file_names in os.walk(self.dataset_location):
            dirs.extend(dir_names)

        self.assertEqual(dirs, ['wn1', 'wn2'])
    def test_with_both_limiting_parameters(self):
        class BatchDownloadMocked(batch_download.BatchDownload):
            def do_download(self, urls, destinations):
                return [], urls

        conf = DownloadConfiguration(
            number_of_images=7,
            images_per_category=2,
            download_destination=self.dataset_location,
            batch_size=2)
        d = BatchDownloadMocked(conf)

        d.add('n1', 'url1')
        d.add('n1', 'url2')
        self.assertTrue(d.batch_ready)
        d.flush()

        d.add('n1', 'url3')
        d.add('n2', 'url4')
        self.assertFalse(d.batch_ready)

        d.add('n3', 'url5')
        self.assertTrue(d.batch_ready)
        d.flush()

        d.add('n3', 'url6')
        d.add('n3', 'url7')
        self.assertTrue(d.batch_ready)
        d.flush()

        d.add('n3', 'url8')
        d.add('n4', 'url9')
        self.assertFalse(d.batch_ready)

        self.assertFalse(d.complete)
        d.add('n5', 'url10')
        d.flush()

        self.assertTrue(d.complete)
    def test_to_json(self):
        app_state = AppState()

        new_conf = DownloadConfiguration(number_of_images=9309,
                                         images_per_category=83,
                                         download_destination='481516')

        last_result = Result(failed_urls=['1', 'one'], succeeded_urls=['x'])
        progress_info = ProgressInfo(total_downloaded=192,
                                     total_failed=38,
                                     finished=False,
                                     last_result=last_result)

        position = Position(3, 1)
        counts = {'wnid1': 29, 'wnid10': 3}
        internal = InternalState(iterator_position=position,
                                 category_counts=counts,
                                 file_index=322)

        app_state.set_configuration(new_conf)
        app_state.set_progress_info(progress_info)
        app_state.set_internal_state(internal)

        app_state.add_error('Some error')

        state_data = json.loads(app_state.to_json())

        self.assertEqual(state_data['downloadPath'], '481516')
        self.assertEqual(state_data['numberOfImages'], 9309)
        self.assertEqual(state_data['imagesPerCategory'], 83)
        self.assertNotEqual(state_data['timeLeft'], '')
        self.assertEqual(state_data['imagesLoaded'], 192)
        self.assertEqual(state_data['failures'], 38)
        self.assertEqual(state_data['failedUrls'], ['1', 'one'])
        self.assertEqual(state_data['succeededUrls'], ['x'])

        self.assertEqual(state_data['errors'], ['Some error'])

        self.assertAlmostEqual(state_data['progress'], 192.0 / 9309)
    def test_complete_after_getting_more_images_than_was_requested(self):
        class BatchDownloadMocked(batch_download.BatchDownload):
            def do_download(self, urls, destinations):
                return [], urls

        conf = DownloadConfiguration(
            number_of_images=3,
            images_per_category=100,
            download_destination=self.dataset_location,
            batch_size=2)
        d = BatchDownloadMocked(conf)

        d.add('wn1', 'url1')
        d.add('wn2', 'url3')

        d.flush()
        self.assertFalse(d.complete)

        d.add('wn1', 'url42')
        d.add('wn5', 'url2')
        d.flush()
        self.assertTrue(d.complete)
    def test_flush_removes_elements_in_buffer(self):
        class BatchDownloadMocked(batch_download.BatchDownload):
            def do_download(self, urls, destinations):
                failed_urls = urls
                succeeded_urls = []
                return failed_urls, succeeded_urls

        conf = DownloadConfiguration(
            number_of_images=100,
            images_per_category=100,
            download_destination=self.dataset_location,
            batch_size=2)
        d = BatchDownloadMocked(conf)

        d.add('wn1', 'url1')
        d.add('wn2', 'url2')
        d.add('wn3', 'url3')

        d.flush()
        failed, downloaded = d.flush()

        self.assertEqual(failed, [])
        self.assertEqual(downloaded, [])
    def test_batch_ready(self):
        class BatchDownloadMocked(batch_download.BatchDownload):
            def do_download(self, urls, destinations):
                failed_urls = [urls[0]]
                succeeded_urls = [urls[1]]
                return failed_urls, succeeded_urls

        conf = DownloadConfiguration(
            number_of_images=100,
            images_per_category=100,
            download_destination=self.dataset_location,
            batch_size=2)
        d = BatchDownloadMocked(conf)

        d.add('wn1', 'url1')
        self.assertFalse(d.batch_ready)

        d.add('wn1', 'url2')
        self.assertTrue(d.batch_ready)

        failed, downloaded = d.flush()
        self.assertEqual(failed, ['url1'])
        self.assertEqual(downloaded, ['url2'])
    def test_destination_paths(self):
        paths = []

        class BatchDownloadMocked(batch_download.BatchDownload):
            def do_download(self, urls, destinations):
                paths.extend(destinations)
                return [], urls

        conf = DownloadConfiguration(
            number_of_images=100,
            images_per_category=100,
            download_destination=self.dataset_location,
            batch_size=3)
        d = BatchDownloadMocked(conf)

        d.add('dogs', 'url1.jpg')
        d.add('cats', 'url2.png')
        d.add('dogs', 'url2.gif')
        d.flush()

        first = os.path.join(self.dataset_location, 'dogs', '1.jpg')
        second = os.path.join(self.dataset_location, 'cats', '2.png')
        third = os.path.join(self.dataset_location, 'dogs', '3.gif')
        self.assertEqual(paths, [first, second, third])
 def test_is_valid(self):
     conf = DownloadConfiguration(number_of_images=1,
                                  images_per_category=1,
                                  download_destination='temp')
     self.assertTrue(conf.is_valid)
     self.assertEqual(conf.errors, [])