Пример #1
0
    def download_datasets(self, root_dir):
        if not os.path.exists(root_dir):
            os.mkdir(root_dir)

        muscima_plus_plus_path = os.path.join(root_dir, "MuscimaPlusPlus_V2")
        if not os.path.exists(muscima_plus_plus_path):
            Downloader().download_and_extract_dataset(OmrDataset.MuscimaPlusPlus_V2, muscima_plus_plus_path)
        else:
            print("MuscimaPlusPlus_V2 already exists")

        cvc_muscima_path = os.path.join(root_dir, "CVC_Muscima_Augmented")
        if not os.path.exists(cvc_muscima_path):
            Downloader().download_and_extract_dataset(OmrDataset.CvcMuscima_MultiConditionAligned, cvc_muscima_path)
        else:
            print("CVC_Muscima_Augmented already exists")

        muscima_plus_plus_measure_annotations_path = os.path.join(root_dir, "MuscimaPlusPlus_Measure_Annotations")
        if not os.path.exists(muscima_plus_plus_measure_annotations_path):
            Downloader().download_and_extract_dataset(OmrDataset.MuscimaPlusPlus_MeasureAnnotations, muscima_plus_plus_measure_annotations_path)
        else:
            print("MuscimaPlusPlus_Measure_Annotations already exists")

        measure_bounding_box_annotations_v2_path = os.path.join(root_dir, "Measure_Bounding_Box_Annotations_v2")
        if not os.path.exists(measure_bounding_box_annotations_v2_path):
            Downloader().download_and_extract_dataset(OmrDataset.MeasureBoundingBoxAnnotations_v2, measure_bounding_box_annotations_v2_path)
        else:
            print("Measure_Bounding_Box_Annotations_v2 already exists")
Пример #2
0
    def download_dataset_and_verify_correct_extraction(
            self: unittest.TestCase, destination_directory: str,
            number_of_samples_in_the_dataset: int, target_file_extension: str,
            dataset: OmrDataset):
        # Arrange and Cleanup
        if os.path.exists(dataset.get_dataset_filename()):
            os.remove(dataset.get_dataset_filename())
        shutil.rmtree(destination_directory, ignore_errors=True)
        downloader = Downloader()

        # Act
        downloader.download_and_extract_dataset(dataset, destination_directory)

        # Assert
        all_files = glob(destination_directory + "/**/" +
                         target_file_extension,
                         recursive=True)
        actual_number_of_files = len(all_files)
        self.assertEqual(number_of_samples_in_the_dataset,
                         actual_number_of_files)
        self.assertTrue(os.path.exists(dataset.get_dataset_filename()))

        # Cleanup
        os.remove(dataset.get_dataset_filename())
        shutil.rmtree(destination_directory, ignore_errors=True)

        if __name__ == '__main__':
            unittest.main()
Пример #3
0
    def test_download_extract_and_draw_bitmaps(self):
        # Arrange
        datasetDownloader = Downloader()

        # Act
        datasetDownloader.download_and_extract_dataset(OmrDataset.Homus_V2, "temp/homus_raw")
        HomusImageGenerator.create_images("temp/homus_raw", "temp/homus_img", [3], 96, 192, 14)
        all_image_files = [y for x in os.walk("temp/homus_img") for y in glob(os.path.join(x[0], '*.png'))]
        actual_number_of_files = len(all_image_files)

        # Assert
        self.assertEqual(15200, actual_number_of_files)

        # Cleanup
        os.remove("HOMUS-2.0.zip")
        shutil.rmtree("temp")
    def test_download_and_prepare_dataset(self):
        # Arrange
        dataset_downloader = Downloader()
        expected_number_of_images = 503

        # Act
        dataset_downloader.download_and_extract_dataset(OmrDataset.OpenOmr, "temp/open_omr_raw2")
        image_generator = OpenOmrImagePreparer()
        image_generator.prepare_dataset("temp/open_omr_raw2", "temp/open_omr_image2")
        all_image_files = [y for x in os.walk("temp/open_omr_image2") for y in glob(os.path.join(x[0], '*.png'))]
        actual_number_of_images = len(all_image_files)

        # Assert
        self.assertEqual(expected_number_of_images, actual_number_of_images)

        # Cleanup
        os.remove(OmrDataset.OpenOmr.get_dataset_filename())
        shutil.rmtree("temp")
    def test_download_extract_and_render_training_symbols(self, tmp_path):
        # Arrange
        dataset_downloader = Downloader()
        expected_number_of_images = 44809

        # Act
        dataset_downloader.download_and_extract_dataset(
            OmrDataset.MuscimaPlusPlus_V2, str(tmp_path / "muscima_pp_raw"))
        image_generator = MuscimaPlusPlusImageGenerator2()
        image_generator.extract_symbols_for_training(
            str(tmp_path / "muscima_pp_raw"), str(tmp_path / "muscima_img"))
        all_image_files = [
            y for x in os.walk(tmp_path / "muscima_img")
            for y in glob(os.path.join(x[0], '*.png'))
        ]
        actual_number_of_images = len(all_image_files)

        # Assert
        assert expected_number_of_images == actual_number_of_images
    def test_download_and_prepare_dataset(self, tmp_path):
        # Arrange
        dataset_downloader = Downloader()
        expected_number_of_images = 503

        # Act
        dataset_downloader.download_and_extract_dataset(
            OmrDataset.OpenOmr, str(tmp_path / "open_omr_raw2"))
        image_generator = OpenOmrImagePreparer()
        image_generator.prepare_dataset(str(tmp_path / "open_omr_raw2"),
                                        str(tmp_path / "open_omr_image2"))
        all_image_files = [
            y for x in os.walk(tmp_path / "open_omr_image2")
            for y in glob(os.path.join(x[0], '*.png'))
        ]
        actual_number_of_images = len(all_image_files)

        # Assert
        assert expected_number_of_images == actual_number_of_images
    def test_download_extract_and_crop_bitmaps(self):
        # Arrange
        dataset_downloader = Downloader()

        # Act
        dataset_downloader.download_and_extract_dataset(
            OmrDataset.Audiveris, "temp/audiveris_omr_raw")
        image_generator = AudiverisOmrImageGenerator()
        image_generator.extract_symbols("temp/audiveris_omr_raw",
                                        "temp/audiveris_omr_img")
        all_image_files = [
            y for x in os.walk("temp/audiveris_omr_img")
            for y in glob(os.path.join(x[0], '*.png'))
        ]
        actual_number_of_files = len(all_image_files)

        # Assert
        self.assertEqual(1056, actual_number_of_files)

        # Cleanup
        os.remove("AudiverisOmrDataset.zip")
        shutil.rmtree("temp")
    def test_download_extract_and_render_all_symbols(self):
        # Arrange
        datasetDownloader = Downloader()

        # Act
        datasetDownloader.download_and_extract_dataset(
            OmrDataset.MuscimaPlusPlus_V2, "temp/muscima_pp")
        image_generator = MuscimaPlusPlusSymbolImageGenerator()
        image_generator.extract_and_render_all_symbol_masks(
            "temp/muscima_pp", "temp/muscima_pp/symbols")
        all_image_files = [
            y for x in os.walk("temp/muscima_pp/symbols")
            for y in glob(os.path.join(x[0], '*.png'))
        ]
        expected_number_of_symbols = 102914
        actual_number_of_symbols = len(all_image_files)

        # Assert
        self.assertEqual(expected_number_of_symbols, actual_number_of_symbols)

        # Cleanup
        os.remove(OmrDataset.MuscimaPlusPlus_V2.get_dataset_filename())
        shutil.rmtree("temp")
    def test_download_extract_and_render_training_symbols(self):
        # Arrange
        dataset_downloader = Downloader()
        expected_number_of_images = 44809

        # Act
        dataset_downloader.download_and_extract_dataset(
            OmrDataset.MuscimaPlusPlus_V2, "temp/muscima_pp_raw")
        image_generator = MuscimaPlusPlusImageGenerator2()
        image_generator.extract_symbols_for_training("temp/muscima_pp_raw",
                                                     "temp/muscima_img")
        all_image_files = [
            y for x in os.walk("temp/muscima_img")
            for y in glob(os.path.join(x[0], '*.png'))
        ]
        actual_number_of_images = len(all_image_files)

        # Assert
        self.assertEqual(expected_number_of_images, actual_number_of_images)

        # Cleanup
        os.remove(OmrDataset.MuscimaPlusPlus_V2.get_dataset_filename())
        shutil.rmtree("temp")
Пример #10
0
    def __download_and_extract_datasets(
            self, datasets, width, height, use_fixed_canvas,
            staff_line_spacing, staff_line_vertical_offsets,
            stroke_thicknesses_for_generated_symbols,
            random_position_on_canvas: bool):
        dataset_downloader = Downloader()
        if 'homus' in datasets:
            raw_dataset_directory = os.path.join(self.dataset_directory,
                                                 "homus_raw")
            dataset_downloader.download_and_extract_dataset(
                OmrDataset.Homus_V2, raw_dataset_directory)
            generated_image_width = width
            generated_image_height = height
            if not use_fixed_canvas:
                # If we are not using a fixed canvas, remove those arguments to
                # allow symbols being drawn at their original shapes
                generated_image_width, generated_image_height = None, None
            bounding_boxes = HomusImageGenerator.create_images(
                raw_dataset_directory, self.image_dataset_directory,
                stroke_thicknesses_for_generated_symbols,
                generated_image_width, generated_image_height,
                staff_line_spacing, staff_line_vertical_offsets,
                random_position_on_canvas)

            bounding_boxes_cache = os.path.join(self.dataset_directory,
                                                "bounding_boxes.txt")
            with open(bounding_boxes_cache, "wb") as cache:
                pickle.dump(bounding_boxes, cache)
        if 'rebelo1' in datasets:
            dataset_downloader.download_and_extract_dataset(
                OmrDataset.Rebelo1, self.image_dataset_directory)
        if 'rebelo2' in datasets:
            dataset_downloader.download_and_extract_dataset(
                OmrDataset.Rebelo2, self.image_dataset_directory)
        if 'printed' in datasets:
            dataset_downloader.download_and_extract_dataset(
                OmrDataset.Printed, self.image_dataset_directory)
        if 'fornes' in datasets:
            raw_dataset_directory = os.path.join(self.dataset_directory,
                                                 "fornes_raw")
            dataset_downloader.download_and_extract_dataset(
                OmrDataset.Fornes, raw_dataset_directory)
            image_preparer = FornesMusicSymbolsImagePreparer()
            image_preparer.prepare_dataset(raw_dataset_directory,
                                           self.image_dataset_directory)
        if 'audiveris' in datasets:
            raw_dataset_directory = os.path.join(self.dataset_directory,
                                                 "audiveris_omr_raw")
            intermediate_image_directory = os.path.join(
                self.dataset_directory, "audiveris_omr_images")
            dataset_downloader.download_and_extract_dataset(
                OmrDataset.Audiveris, raw_dataset_directory)
            image_generator = AudiverisOmrImageGenerator()
            image_generator.extract_symbols(raw_dataset_directory,
                                            intermediate_image_directory)
            image_preparer = AudiverisOmrImageExtractor()
            image_preparer.prepare_dataset(intermediate_image_directory,
                                           self.image_dataset_directory)
        if 'muscima_pp' in datasets:
            raw_dataset_directory = os.path.join(self.dataset_directory,
                                                 "muscima_pp_raw")
            dataset_downloader.download_and_extract_dataset(
                OmrDataset.MuscimaPlusPlus_V2, raw_dataset_directory)
            image_generator = MuscimaPlusPlusImageGenerator2()
            image_generator.extract_symbols_for_training(
                raw_dataset_directory, self.image_dataset_directory)
        if 'openomr' in datasets:
            raw_dataset_directory = os.path.join(self.dataset_directory,
                                                 "open_omr_raw")
            dataset_downloader.download_and_extract_dataset(
                OmrDataset.OpenOmr, raw_dataset_directory)
            image_preparer = OpenOmrImagePreparer()
            image_preparer.prepare_dataset(raw_dataset_directory,
                                           self.image_dataset_directory)
import os

from omrdatasettools.Downloader import Downloader, OmrDataset

from image_color_inverter import ImageColorInverter

if __name__ == "__main__":
    muscima_pp_dataset_directory = os.path.join("data", "muscima_pp")
    muscima_image_directory = os.path.join(muscima_pp_dataset_directory, "v1.0", "data", "images")

    downloader = Downloader()
    downloader.download_and_extract_dataset(OmrDataset.MuscimaPlusPlus_V2, muscima_pp_dataset_directory)

    inverter = ImageColorInverter()
    # We would like to work with black-on-white images instead of white-on-black images
    inverter.invert_images(muscima_image_directory, "*.png")
Пример #12
0
                i for i in os.listdir(source_folder)
                if i.endswith(".png") and i not in broken_symbols
            ]
            for image in all_png_images:
                shutil.copy(os.path.join(source_folder, image),
                            destination_folder)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--raw_dataset_directory",
        type=str,
        default="../data/fornes_raw",
        help="The directory, where the raw Muscima++ dataset can be found")
    parser.add_argument(
        "--image_dataset_directory",
        type=str,
        default="../data/images",
        help="The directory, where the images will be copied to")

    flags, unparsed = parser.parse_known_args()

    dataset_downloader = Downloader()
    dataset_downloader.download_and_extract_dataset(
        OmrDataset.Fornes, flags.raw_dataset_directory)

    dataset_preparer = FornesMusicSymbolsImagePreparer()
    dataset_preparer.prepare_dataset(flags.raw_dataset_directory,
                                     flags.image_dataset_directory)
Пример #13
0
        action='store',
        default="../data/mungs",
        help='The root directory that will contain the MuNG XMLs.')
    parser.add_argument(
        '-i',
        '--image_root',
        action='store',
        default="../data/images",
        help='The root directory that will contain the images of'
        ' scores that are represented by the MuNGs. The'
        ' image names must correspond to the MuNG file'
        ' names, up to the file type suffix.')
    args = parser.parse_args()
    mung_root_directory = args.mung_root
    image_root_directory = args.image_root

    os.makedirs(mung_root_directory, exist_ok=True)
    os.makedirs(image_root_directory, exist_ok=True)

    temporary_directory = "temp"
    dataset_downloader = Downloader()
    dataset_downloader.download_and_extract_dataset(
        OmrDataset.MuscimaPlusPlus_V2, temporary_directory)

    copy_tree(os.path.join(temporary_directory, "v2.0", "data", "annotations"),
              mung_root_directory)
    copy_tree(os.path.join(temporary_directory, "v2.0", "data", "images"),
              image_root_directory)

    shutil.rmtree(temporary_directory)