class TestImageDownloader(unittest.TestCase): @classmethod def setUpClass(cls): pass @classmethod def tearDownClass(cls): pass def setUp(self): file_urls = 'C:/Users/brian.dsouza/Pictures/image_downloader/fileurls.txt' destination_folder = 'C:/Users/brian.dsouza/Pictures/image_downloader/Images' self.downloader_1 = ImageDownloader(file_urls=file_urls, destination=destination_folder) self.downloader_2 = ImageDownloader(file_urls=file_urls, destination=destination_folder) def tearDown(self): self.downloader_1 = None self.downloader_2 = None def test_download_image(self): """ Mock download for requests""" with patch('image_downloader.requests.get') as mocked_get: with patch('builtins.open', unittest.mock.mock_open()) as mocked_file: # Mock when return value is True mocked_get.return_value.ok = True mocked_get.return_value.content = b'imagevalue' output1 = self.downloader_1.download_image( 'http://company.com/image1.png') mocked_get.assert_called_with('http://company.com/image1.png', timeout=10) self.assertEqual(output1, 'Download Success') # Mock when return value is False mocked_get.return_value.ok = False schedule = self.downloader_2.download_image( 'http://company.com/image1.png') mocked_get.assert_called_with('http://company.com/image1.png', timeout=10) self.assertEqual(schedule, 'Download Failed') def test_download_images(self): with self.assertRaises(FileNotFoundError): self.downloader_1.file_urls = "wrongurl" self.downloader_1.download_images()
def download_and_classify_in_batches(complete_links_list, classifier): print("Total amount of images to be downloaded and classified: %d" % len(complete_links_list)) for index in range(0, len(complete_links_list), BATCH_SIZE): time_start = time.time() print("Downloading and classifying batch: %d -> %d" % (index, index + BATCH_SIZE)) links_batch = complete_links_list[index:index + BATCH_SIZE] tensor_images = ImageDownloader.download_images( links_batch, NUM_DOWNLOAD_THREADS) if len(tensor_images) == 0: print("Skipping classification of empy list") continue results = classifier.classify_image_tensors(tensor_images) results_df = DataHandler.convert_classification_result_to_dataframe( results) DataHandler.write_classification_result(results_df, PARQUET_FILE_OUTPUT_LOCATION) duration = time.time() - time_start print("Duration of donwloading and classification for batch: %.2f" % duration)
from image_downloader import ImageDownloader if __name__ == '__main__': file_urls = 'C:/Users/brian.dsouza/Pictures/image_downloader/fileurls.txt' destination_folder = 'C:/Users/brian.dsouza/Pictures/image_downloader/Images' image_downloader = ImageDownloader(file_urls=file_urls, destination=destination_folder) image_downloader.download_images()