Пример #1
0
    def test_download_and_prepare_dataset(self):
        # Arrange
        datasetDownloader = OpenOmrDatasetDownloader()
        expected_number_of_images = 503

        # Act
        datasetDownloader.download_and_extract_dataset("temp/open_omr_raw2")
        image_generator = OpenOmrImagePreparer()
        image_generator.prepare_dataset("temp/open_omr_raw2",
                                        "temp/open_omr_image2")
        all_image_files = [
            y for x in os.walk("temp/open_omr_image2")
            for y in glob(os.path.join(x[0], '*.png'))
        ]
        actual_number_of_images = len(all_image_files)

        # Assert
        self.assertEqual(expected_number_of_images, actual_number_of_images)

        # Cleanup
        os.remove(datasetDownloader.get_dataset_filename())
        shutil.rmtree("temp")
            destination_folder = os.path.join(image_dataset_directory,
                                              destination_class_name)
            os.makedirs(destination_folder, exist_ok=True)
            dir_util.copy_tree(source_folder, destination_folder)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--raw_dataset_directory",
        type=str,
        default="../data/open_omr_raw",
        help="The directory, where the raw Muscima++ dataset can be found")
    parser.add_argument(
        "--image_dataset_directory",
        type=str,
        default="../data/images",
        help="The directory, where the images will be copied to")

    flags, unparsed = parser.parse_known_args()

    # Download the dataset
    dataset_downloader = OpenOmrDatasetDownloader()
    dataset_downloader.download_and_extract_dataset(
        flags.raw_dataset_directory)

    # Actually prepare our dataset
    dataset_preparer = OpenOmrImagePreparer()
    dataset_preparer.prepare_dataset(flags.raw_dataset_directory,
                                     flags.image_dataset_directory)