Пример #1
0
    def test_download_and_extract_openomr_dataset_expect_folder_to_be_created(
            self):
        destination_directory = "OpenOMR"
        downloader = OpenOmrDatasetDownloader()
        zip_file = downloader.get_dataset_filename()
        number_of_samples_in_the_dataset = 706
        target_file_extension = "*.png"

        self.download_dataset_and_verify_correct_extraction(
            destination_directory, number_of_samples_in_the_dataset,
            target_file_extension, zip_file, downloader)
    def __download_and_extract_datasets(self, datasets, width, height, use_fixed_canvas, staff_line_spacing,
                                        staff_line_vertical_offsets, stroke_thicknesses_for_generated_symbols,
                                        random_position_on_canvas: bool):
        if 'homus' in datasets:
            raw_dataset_directory = os.path.join(self.dataset_directory, "homus_raw")
            dataset_downloader = HomusDatasetDownloader(raw_dataset_directory)
            dataset_downloader.download_and_extract_dataset()
            generated_image_width = width
            generated_image_height = height
            if not use_fixed_canvas:
                # If we are not using a fixed canvas, remove those arguments to
                # allow symbols being drawn at their original shapes
                generated_image_width, generated_image_height = None, None
            bounding_boxes = HomusImageGenerator.create_images(raw_dataset_directory, self.image_dataset_directory,
                                                               stroke_thicknesses_for_generated_symbols,
                                                               generated_image_width,
                                                               generated_image_height, staff_line_spacing,
                                                               staff_line_vertical_offsets,
                                                               random_position_on_canvas)

            bounding_boxes_cache = os.path.join(self.dataset_directory, "bounding_boxes.txt")
            with open(bounding_boxes_cache, "wb") as cache:
                pickle.dump(bounding_boxes, cache)
        if 'rebelo1' in datasets:
            dataset_downloader = RebeloMusicSymbolDataset1Downloader(self.image_dataset_directory)
            dataset_downloader.download_and_extract_dataset()
        if 'rebelo2' in datasets:
            dataset_downloader = RebeloMusicSymbolDataset2Downloader(self.image_dataset_directory)
            dataset_downloader.download_and_extract_dataset()
        if 'printed' in datasets:
            dataset_downloader = PrintedMusicSymbolsDatasetDownloader(self.image_dataset_directory)
            dataset_downloader.download_and_extract_dataset()
        if 'fornes' in datasets:
            raw_dataset_directory = os.path.join(self.dataset_directory, "fornes_raw")
            dataset_downloader = FornesMusicSymbolsDatasetDownloader(raw_dataset_directory)
            dataset_downloader.download_and_extract_dataset()
            image_preparer = FornesMusicSymbolsImagePreparer()
            image_preparer.prepare_dataset(raw_dataset_directory, self.image_dataset_directory)
        if 'audiveris' in datasets:
            raw_dataset_directory = os.path.join(self.dataset_directory, "audiveris_omr_raw")
            intermediate_image_directory = os.path.join(self.dataset_directory, "audiveris_omr_images")
            dataset_downloader = AudiverisOmrDatasetDownloader(raw_dataset_directory)
            dataset_downloader.download_and_extract_dataset()
            image_generator = AudiverisOmrImageGenerator()
            image_generator.extract_symbols(raw_dataset_directory, intermediate_image_directory)
            image_preparer = AudiverisOmrImageExtractor()
            image_preparer.prepare_dataset(intermediate_image_directory, self.image_dataset_directory)
        if 'muscima_pp' in datasets:
            raw_dataset_directory = os.path.join(self.dataset_directory, "muscima_pp_raw")
            dataset_downloader = MuscimaPlusPlusDatasetDownloader(raw_dataset_directory)
            dataset_downloader.download_and_extract_dataset()
            image_generator = MuscimaPlusPlusImageGenerator2()
            image_generator.extract_symbols_for_training(raw_dataset_directory, self.image_dataset_directory)
        if 'openomr' in datasets:
            raw_dataset_directory = os.path.join(self.dataset_directory, "open_omr_raw")
            dataset_downloader = OpenOmrDatasetDownloader(raw_dataset_directory)
            dataset_downloader.download_and_extract_dataset()
            image_preparer = OpenOmrImagePreparer()
            image_preparer.prepare_dataset(raw_dataset_directory, self.image_dataset_directory)
Пример #3
0
    def test_download_and_prepare_dataset(self):
        # Arrange
        datasetDownloader = OpenOmrDatasetDownloader()
        expected_number_of_images = 503

        # Act
        datasetDownloader.download_and_extract_dataset("temp/open_omr_raw2")
        image_generator = OpenOmrImagePreparer()
        image_generator.prepare_dataset("temp/open_omr_raw2",
                                        "temp/open_omr_image2")
        all_image_files = [
            y for x in os.walk("temp/open_omr_image2")
            for y in glob(os.path.join(x[0], '*.png'))
        ]
        actual_number_of_images = len(all_image_files)

        # Assert
        self.assertEqual(expected_number_of_images, actual_number_of_images)

        # Cleanup
        os.remove(datasetDownloader.get_dataset_filename())
        shutil.rmtree("temp")
            source_folder = os.path.join(raw_dataset_directory, symbol_class)
            destination_folder = os.path.join(image_dataset_directory,
                                              destination_class_name)
            os.makedirs(destination_folder, exist_ok=True)
            dir_util.copy_tree(source_folder, destination_folder)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--raw_dataset_directory",
        type=str,
        default="../data/open_omr_raw",
        help="The directory, where the raw Muscima++ dataset can be found")
    parser.add_argument(
        "--image_dataset_directory",
        type=str,
        default="../data/images",
        help="The directory, where the images will be copied to")

    flags, unparsed = parser.parse_known_args()

    # Download the dataset
    dataset_downloader = OpenOmrDatasetDownloader(flags.raw_dataset_directory)
    #dataset_downloader.download_and_extract_dataset()

    # Actually prepare our dataset
    dataset_preparer = OpenOmrImagePreparer()
    dataset_preparer.prepare_dataset(flags.raw_dataset_directory,
                                     flags.image_dataset_directory)
            destination_folder = os.path.join(image_dataset_directory,
                                              destination_class_name)
            os.makedirs(destination_folder, exist_ok=True)
            dir_util.copy_tree(source_folder, destination_folder)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--raw_dataset_directory",
        type=str,
        default="../data/open_omr_raw",
        help="The directory, where the raw Muscima++ dataset can be found")
    parser.add_argument(
        "--image_dataset_directory",
        type=str,
        default="../data/images",
        help="The directory, where the images will be copied to")

    flags, unparsed = parser.parse_known_args()

    # Download the dataset
    dataset_downloader = OpenOmrDatasetDownloader()
    dataset_downloader.download_and_extract_dataset(
        flags.raw_dataset_directory)

    # Actually prepare our dataset
    dataset_preparer = OpenOmrImagePreparer()
    dataset_preparer.prepare_dataset(flags.raw_dataset_directory,
                                     flags.image_dataset_directory)
Пример #6
0
#%% Audiveris
from omrdatasettools.downloaders.AudiverisOmrDatasetDownloader import AudiverisOmrDatasetDownloader
dataset_downloader = AudiverisOmrDatasetDownloader("./data/audiveris")
dataset_downloader.download_and_extract_dataset()

from omrdatasettools.image_generators.AudiverisOmrImageGenerator import AudiverisOmrImageGenerator, AudiverisOmrSymbol

imgen = AudiverisOmrImageGenerator()
imgen.extract_symbols(raw_data_directory='./data/audiveris',
                      destination_directory='./data/audiveris/images')

#%% Open OMR
from omrdatasettools.downloaders.OpenOmrDatasetDownloader import OpenOmrDatasetDownloader

dataset_downloader = OpenOmrDatasetDownloader("./data/openomr")
dataset_downloader.download_and_extract_dataset()

#%% Capitan
'''
from omrdatasettools.downloaders.CapitanDatasetDownloader import CapitanDatasetDownloader
dataset_downloader = CapitanDatasetDownloader("./data/capitan")
dataset_downloader.download_and_extract_dataset()
'''

#%% MUSCIMA
from omrdatasettools.downloaders.MuscimaPlusPlusDatasetDownloader import MuscimaPlusPlusDatasetDownloader

dataset_downloader = MuscimaPlusPlusDatasetDownloader("./data/muscima")
dataset_downloader.download_and_extract_dataset()