Пример #1
0
def download_and_classify_in_batches(complete_links_list, classifier):
    print("Total amount of images to be downloaded and classified: %d" %
          len(complete_links_list))

    for index in range(0, len(complete_links_list), BATCH_SIZE):
        time_start = time.time()
        print("Downloading and classifying batch: %d -> %d" %
              (index, index + BATCH_SIZE))

        links_batch = complete_links_list[index:index + BATCH_SIZE]
        tensor_images = ImageDownloader.download_images(
            links_batch, NUM_DOWNLOAD_THREADS)

        if len(tensor_images) == 0:
            print("Skipping classification of empy list")
            continue

        results = classifier.classify_image_tensors(tensor_images)
        results_df = DataHandler.convert_classification_result_to_dataframe(
            results)
        DataHandler.write_classification_result(results_df,
                                                PARQUET_FILE_OUTPUT_LOCATION)

        duration = time.time() - time_start
        print("Duration of donwloading and classification for batch: %.2f" %
              duration)