def download_datasets(self, root_dir): if not os.path.exists(root_dir): os.mkdir(root_dir) muscima_plus_plus_path = os.path.join(root_dir, "MuscimaPlusPlus_V2") if not os.path.exists(muscima_plus_plus_path): Downloader().download_and_extract_dataset(OmrDataset.MuscimaPlusPlus_V2, muscima_plus_plus_path) else: print("MuscimaPlusPlus_V2 already exists") cvc_muscima_path = os.path.join(root_dir, "CVC_Muscima_Augmented") if not os.path.exists(cvc_muscima_path): Downloader().download_and_extract_dataset(OmrDataset.CvcMuscima_MultiConditionAligned, cvc_muscima_path) else: print("CVC_Muscima_Augmented already exists") muscima_plus_plus_measure_annotations_path = os.path.join(root_dir, "MuscimaPlusPlus_Measure_Annotations") if not os.path.exists(muscima_plus_plus_measure_annotations_path): Downloader().download_and_extract_dataset(OmrDataset.MuscimaPlusPlus_MeasureAnnotations, muscima_plus_plus_measure_annotations_path) else: print("MuscimaPlusPlus_Measure_Annotations already exists") measure_bounding_box_annotations_v2_path = os.path.join(root_dir, "Measure_Bounding_Box_Annotations_v2") if not os.path.exists(measure_bounding_box_annotations_v2_path): Downloader().download_and_extract_dataset(OmrDataset.MeasureBoundingBoxAnnotations_v2, measure_bounding_box_annotations_v2_path) else: print("Measure_Bounding_Box_Annotations_v2 already exists")
def download_dataset_and_verify_correct_extraction( self: unittest.TestCase, destination_directory: str, number_of_samples_in_the_dataset: int, target_file_extension: str, dataset: OmrDataset): # Arrange and Cleanup if os.path.exists(dataset.get_dataset_filename()): os.remove(dataset.get_dataset_filename()) shutil.rmtree(destination_directory, ignore_errors=True) downloader = Downloader() # Act downloader.download_and_extract_dataset(dataset, destination_directory) # Assert all_files = glob(destination_directory + "/**/" + target_file_extension, recursive=True) actual_number_of_files = len(all_files) self.assertEqual(number_of_samples_in_the_dataset, actual_number_of_files) self.assertTrue(os.path.exists(dataset.get_dataset_filename())) # Cleanup os.remove(dataset.get_dataset_filename()) shutil.rmtree(destination_directory, ignore_errors=True) if __name__ == '__main__': unittest.main()
def test_download_extract_and_draw_bitmaps(self): # Arrange datasetDownloader = Downloader() # Act datasetDownloader.download_and_extract_dataset(OmrDataset.Homus_V2, "temp/homus_raw") HomusImageGenerator.create_images("temp/homus_raw", "temp/homus_img", [3], 96, 192, 14) all_image_files = [y for x in os.walk("temp/homus_img") for y in glob(os.path.join(x[0], '*.png'))] actual_number_of_files = len(all_image_files) # Assert self.assertEqual(15200, actual_number_of_files) # Cleanup os.remove("HOMUS-2.0.zip") shutil.rmtree("temp")
def test_download_and_prepare_dataset(self): # Arrange dataset_downloader = Downloader() expected_number_of_images = 503 # Act dataset_downloader.download_and_extract_dataset(OmrDataset.OpenOmr, "temp/open_omr_raw2") image_generator = OpenOmrImagePreparer() image_generator.prepare_dataset("temp/open_omr_raw2", "temp/open_omr_image2") all_image_files = [y for x in os.walk("temp/open_omr_image2") for y in glob(os.path.join(x[0], '*.png'))] actual_number_of_images = len(all_image_files) # Assert self.assertEqual(expected_number_of_images, actual_number_of_images) # Cleanup os.remove(OmrDataset.OpenOmr.get_dataset_filename()) shutil.rmtree("temp")
def test_download_extract_and_render_training_symbols(self, tmp_path): # Arrange dataset_downloader = Downloader() expected_number_of_images = 44809 # Act dataset_downloader.download_and_extract_dataset( OmrDataset.MuscimaPlusPlus_V2, str(tmp_path / "muscima_pp_raw")) image_generator = MuscimaPlusPlusImageGenerator2() image_generator.extract_symbols_for_training( str(tmp_path / "muscima_pp_raw"), str(tmp_path / "muscima_img")) all_image_files = [ y for x in os.walk(tmp_path / "muscima_img") for y in glob(os.path.join(x[0], '*.png')) ] actual_number_of_images = len(all_image_files) # Assert assert expected_number_of_images == actual_number_of_images
def test_download_and_prepare_dataset(self, tmp_path): # Arrange dataset_downloader = Downloader() expected_number_of_images = 503 # Act dataset_downloader.download_and_extract_dataset( OmrDataset.OpenOmr, str(tmp_path / "open_omr_raw2")) image_generator = OpenOmrImagePreparer() image_generator.prepare_dataset(str(tmp_path / "open_omr_raw2"), str(tmp_path / "open_omr_image2")) all_image_files = [ y for x in os.walk(tmp_path / "open_omr_image2") for y in glob(os.path.join(x[0], '*.png')) ] actual_number_of_images = len(all_image_files) # Assert assert expected_number_of_images == actual_number_of_images
def test_download_extract_and_crop_bitmaps(self): # Arrange dataset_downloader = Downloader() # Act dataset_downloader.download_and_extract_dataset( OmrDataset.Audiveris, "temp/audiveris_omr_raw") image_generator = AudiverisOmrImageGenerator() image_generator.extract_symbols("temp/audiveris_omr_raw", "temp/audiveris_omr_img") all_image_files = [ y for x in os.walk("temp/audiveris_omr_img") for y in glob(os.path.join(x[0], '*.png')) ] actual_number_of_files = len(all_image_files) # Assert self.assertEqual(1056, actual_number_of_files) # Cleanup os.remove("AudiverisOmrDataset.zip") shutil.rmtree("temp")
def test_download_extract_and_render_all_symbols(self): # Arrange datasetDownloader = Downloader() # Act datasetDownloader.download_and_extract_dataset( OmrDataset.MuscimaPlusPlus_V2, "temp/muscima_pp") image_generator = MuscimaPlusPlusSymbolImageGenerator() image_generator.extract_and_render_all_symbol_masks( "temp/muscima_pp", "temp/muscima_pp/symbols") all_image_files = [ y for x in os.walk("temp/muscima_pp/symbols") for y in glob(os.path.join(x[0], '*.png')) ] expected_number_of_symbols = 102914 actual_number_of_symbols = len(all_image_files) # Assert self.assertEqual(expected_number_of_symbols, actual_number_of_symbols) # Cleanup os.remove(OmrDataset.MuscimaPlusPlus_V2.get_dataset_filename()) shutil.rmtree("temp")
def test_download_extract_and_render_training_symbols(self): # Arrange dataset_downloader = Downloader() expected_number_of_images = 44809 # Act dataset_downloader.download_and_extract_dataset( OmrDataset.MuscimaPlusPlus_V2, "temp/muscima_pp_raw") image_generator = MuscimaPlusPlusImageGenerator2() image_generator.extract_symbols_for_training("temp/muscima_pp_raw", "temp/muscima_img") all_image_files = [ y for x in os.walk("temp/muscima_img") for y in glob(os.path.join(x[0], '*.png')) ] actual_number_of_images = len(all_image_files) # Assert self.assertEqual(expected_number_of_images, actual_number_of_images) # Cleanup os.remove(OmrDataset.MuscimaPlusPlus_V2.get_dataset_filename()) shutil.rmtree("temp")
def __download_and_extract_datasets( self, datasets, width, height, use_fixed_canvas, staff_line_spacing, staff_line_vertical_offsets, stroke_thicknesses_for_generated_symbols, random_position_on_canvas: bool): dataset_downloader = Downloader() if 'homus' in datasets: raw_dataset_directory = os.path.join(self.dataset_directory, "homus_raw") dataset_downloader.download_and_extract_dataset( OmrDataset.Homus_V2, raw_dataset_directory) generated_image_width = width generated_image_height = height if not use_fixed_canvas: # If we are not using a fixed canvas, remove those arguments to # allow symbols being drawn at their original shapes generated_image_width, generated_image_height = None, None bounding_boxes = HomusImageGenerator.create_images( raw_dataset_directory, self.image_dataset_directory, stroke_thicknesses_for_generated_symbols, generated_image_width, generated_image_height, staff_line_spacing, staff_line_vertical_offsets, random_position_on_canvas) bounding_boxes_cache = os.path.join(self.dataset_directory, "bounding_boxes.txt") with open(bounding_boxes_cache, "wb") as cache: pickle.dump(bounding_boxes, cache) if 'rebelo1' in datasets: dataset_downloader.download_and_extract_dataset( OmrDataset.Rebelo1, self.image_dataset_directory) if 'rebelo2' in datasets: dataset_downloader.download_and_extract_dataset( OmrDataset.Rebelo2, self.image_dataset_directory) if 'printed' in datasets: dataset_downloader.download_and_extract_dataset( OmrDataset.Printed, self.image_dataset_directory) if 'fornes' in datasets: raw_dataset_directory = os.path.join(self.dataset_directory, "fornes_raw") dataset_downloader.download_and_extract_dataset( OmrDataset.Fornes, raw_dataset_directory) image_preparer = FornesMusicSymbolsImagePreparer() image_preparer.prepare_dataset(raw_dataset_directory, self.image_dataset_directory) if 'audiveris' in datasets: raw_dataset_directory = os.path.join(self.dataset_directory, "audiveris_omr_raw") intermediate_image_directory = os.path.join( self.dataset_directory, "audiveris_omr_images") dataset_downloader.download_and_extract_dataset( OmrDataset.Audiveris, raw_dataset_directory) image_generator = AudiverisOmrImageGenerator() image_generator.extract_symbols(raw_dataset_directory, intermediate_image_directory) image_preparer = AudiverisOmrImageExtractor() image_preparer.prepare_dataset(intermediate_image_directory, self.image_dataset_directory) if 'muscima_pp' in datasets: raw_dataset_directory = os.path.join(self.dataset_directory, "muscima_pp_raw") dataset_downloader.download_and_extract_dataset( OmrDataset.MuscimaPlusPlus_V2, raw_dataset_directory) image_generator = MuscimaPlusPlusImageGenerator2() image_generator.extract_symbols_for_training( raw_dataset_directory, self.image_dataset_directory) if 'openomr' in datasets: raw_dataset_directory = os.path.join(self.dataset_directory, "open_omr_raw") dataset_downloader.download_and_extract_dataset( OmrDataset.OpenOmr, raw_dataset_directory) image_preparer = OpenOmrImagePreparer() image_preparer.prepare_dataset(raw_dataset_directory, self.image_dataset_directory)
import os from omrdatasettools.Downloader import Downloader, OmrDataset from image_color_inverter import ImageColorInverter if __name__ == "__main__": muscima_pp_dataset_directory = os.path.join("data", "muscima_pp") muscima_image_directory = os.path.join(muscima_pp_dataset_directory, "v1.0", "data", "images") downloader = Downloader() downloader.download_and_extract_dataset(OmrDataset.MuscimaPlusPlus_V2, muscima_pp_dataset_directory) inverter = ImageColorInverter() # We would like to work with black-on-white images instead of white-on-black images inverter.invert_images(muscima_image_directory, "*.png")
i for i in os.listdir(source_folder) if i.endswith(".png") and i not in broken_symbols ] for image in all_png_images: shutil.copy(os.path.join(source_folder, image), destination_folder) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--raw_dataset_directory", type=str, default="../data/fornes_raw", help="The directory, where the raw Muscima++ dataset can be found") parser.add_argument( "--image_dataset_directory", type=str, default="../data/images", help="The directory, where the images will be copied to") flags, unparsed = parser.parse_known_args() dataset_downloader = Downloader() dataset_downloader.download_and_extract_dataset( OmrDataset.Fornes, flags.raw_dataset_directory) dataset_preparer = FornesMusicSymbolsImagePreparer() dataset_preparer.prepare_dataset(flags.raw_dataset_directory, flags.image_dataset_directory)
action='store', default="../data/mungs", help='The root directory that will contain the MuNG XMLs.') parser.add_argument( '-i', '--image_root', action='store', default="../data/images", help='The root directory that will contain the images of' ' scores that are represented by the MuNGs. The' ' image names must correspond to the MuNG file' ' names, up to the file type suffix.') args = parser.parse_args() mung_root_directory = args.mung_root image_root_directory = args.image_root os.makedirs(mung_root_directory, exist_ok=True) os.makedirs(image_root_directory, exist_ok=True) temporary_directory = "temp" dataset_downloader = Downloader() dataset_downloader.download_and_extract_dataset( OmrDataset.MuscimaPlusPlus_V2, temporary_directory) copy_tree(os.path.join(temporary_directory, "v2.0", "data", "annotations"), mung_root_directory) copy_tree(os.path.join(temporary_directory, "v2.0", "data", "images"), image_root_directory) shutil.rmtree(temporary_directory)