Пример #1
0
    def get(self, request, book, page):
        page = DatabasePage(DatabaseBook(book), page)
        file = DatabaseFile(page, 'pcgts')

        if not file.exists():
            file.create()

        try:
            pcgts = PcGts.from_file(file)
            return Response(pcgts.to_json())
        except JSONDecodeError as e:
            logger.error(e)
            file.delete()
            file.create()
            return Response(PcGts.from_file(file).to_json())
Пример #2
0
def dataset_by_locked_pages(
        n_train,
        locks: List[LockState],
        shuffle: bool = True,
        datasets: List[DatabaseBook] = None
) -> Tuple[List[PcGts], List[PcGts]]:
    logger.info("Finding PcGts files with valid ground truth")
    pcgts = []
    for dataset in (datasets if datasets else DatabaseBook.list_available()):
        logger.debug("Listing files of dataset '{}'".format(dataset.book))
        if not dataset.exists():
            raise ValueError("Dataset '{}' does not exist at '{}'".format(
                dataset.book, dataset.local_path()))

        for page in dataset.pages_with_lock(locks):
            pcgts.append(PcGts.from_file(page.file('pcgts')))

    if len(pcgts) == 0:
        raise EmptyDataSetException()

    if shuffle:
        random.shuffle(pcgts)

    train_pcgts = pcgts[:int(len(pcgts) * n_train)]
    val_pcgts = pcgts[len(train_pcgts):]

    if 0 < n_train < 1 and (len(train_pcgts) == 0 or len(val_pcgts) == 0):
        raise EmptyDataSetException()

    return train_pcgts, val_pcgts
Пример #3
0
    def test_upgrade(self):
        with open(
                os.path.join(raw_storage, 'page_test_upgrade_001',
                             'pcgts.json')) as f:
            json0 = json.load(f)

        json1 = deepcopy(json0)
        self.assertTrue(update_pcgts(json1))

        self.maxDiff = None
        self.assertEqual(json1, PcGts.from_json(json1, None).to_json())
Пример #4
0
    def put(self, request, book, page):
        book = DatabaseBook(book)
        page = DatabasePage(book, page)
        obj = json.loads(request.body, encoding='utf-8')

        pcgts = PcGts.from_json(obj, page)
        pcgts.to_file(page.file('pcgts').local_path())
        # add to backup archive
        with zipfile.ZipFile(page.file('pcgts_backup').local_path(), 'a', compression=zipfile.ZIP_DEFLATED) as zf:
            zf.writestr('pcgts_{}.json'.format(datetime.datetime.now()), json.dumps(pcgts.to_json(), indent=2))

        logger.debug('Successfully saved pcgts file to {}'.format(page.file('pcgts').local_path()))

        return Response()
Пример #5
0
    def create(self):

        with mutex_dict.get(self.local_path(), Lock()):
            if self.exists():
                # check if exists
                return

            # check if requirement files exist
            for file in self.definition.requires:
                DatabaseFile(self.page, file).create()

            # check again if exists since the requirements might have created that file!
            if self.exists():
                return

            from omr.steps.preprocessing.preprocessing import Preprocessing

            # create local file
            logger.info('Creating local file {}'.format(self.local_path()))
            if self.definition.id == 'statistics' \
                    or self.definition.id == 'page_progress':
                import json
                with open(self.local_path(), 'w') as f:
                    json.dump({}, f)
            elif self.definition.id == 'page_progress_backup' \
                    or self.definition.id == 'statistics_backup':
                import zipfile
                zf = zipfile.ZipFile(self.local_path(), mode='w', compression=zipfile.ZIP_DEFLATED)
                zf.close()
            elif self.definition.id == 'annotation':
                import json
                with open(self.local_path(), 'w') as f:
                    json.dump({}, f)
            elif self.definition.id == 'pcgts':
                from database.file_formats.pcgts import PcGts, Page, Meta
                img = Image.open(DatabaseFile(self.page, 'color_original').local_path())
                pcgts = PcGts(
                    meta=Meta(),
                    page=Page(location=self.page),
                )
                pcgts.page.image_width, pcgts.page.image_height = img.size
                pcgts.to_file(self.local_path())
            elif self.definition.id == 'pcgts_backup':
                import zipfile
                zf = zipfile.ZipFile(self.local_path(), mode='w', compression=zipfile.ZIP_DEFLATED)
                zf.close()
            elif self.definition.id == 'color_original':
                # create preview
                img = Image.open(self.local_path())
                img.thumbnail(thumbnail_size)
                img.save(self.local_thumbnail_path())
            elif self.definition.id == 'color_highres_preproc':
                meta = self.page.meta()
                preproc = Preprocessing()
                img = Image.open(self.page.local_file_path('color_original.jpg'))
                w, h = img.size
                out_w = min(high_res_max_width, w)
                out_h = (out_w * h) // w
                c_hr = img.resize((out_w, out_h), Image.BILINEAR)
                c_hr, g_hr, b_hr = preproc.preprocess(c_hr)
                meta.preprocessing.deskewing_degrees = preproc.deskewed_angle
                meta.save(self.page)
                self._save_and_thumbnail(c_hr, 0)
                self._save_and_thumbnail(g_hr, 1)
                self._save_and_thumbnail(b_hr, 2)
            elif self.definition.id == 'color_lowres_preproc':
                c_hr = Image.open(self.page.local_file_path('color_highres_preproc.jpg'))
                b_hr = Image.open(self.page.local_file_path('binary_highres_preproc.png'))
                g_hr = Image.open(self.page.local_file_path('gray_highres_preproc.jpg'))
                w, h = c_hr.size
                out_w = min(low_res_max_width, w)
                out_h = (out_w * h) // w
                size = (out_w, out_h)
                c_hr = c_hr.resize(size, Image.BILINEAR)
                g_hr = g_hr.resize(size, Image.BILINEAR)
                b_hr = b_hr.resize(size, Image.NEAREST)

                self._save_and_thumbnail(c_hr, 0)
                self._save_and_thumbnail(g_hr, 1)
                self._save_and_thumbnail(b_hr, 2)
            elif self.definition.id == 'color_norm':
                meta = self.page.meta()
                c_hr = Image.open(self.page.local_file_path('color_highres_preproc.jpg'))
                if meta.preprocessing.auto_line_distance:
                    from omr.steps.preprocessing.scale.scale import LineDistanceComputer
                    ldc = LineDistanceComputer()
                    low_binary = Image.open(self.page.local_file_path('binary_highres_preproc.png'))
                    line_distance = ldc.get_line_distance(np.array(low_binary) / 255).line_distance
                    meta.preprocessing.average_line_distance = line_distance
                    meta.save(self.page)
                else:
                    # average_line_distance is expected to be computed on the original image
                    c_orig = Image.open(self.page.local_file_path('color_original.jpg'))
                    line_distance = int(np.round(meta.preprocessing.average_line_distance * c_hr.size[0] / c_orig.size[0]))

                assert(line_distance > 0)

                # rescale original image
                scaling = line_distance / target_staff_line_distance
                size = (int(c_hr.size[0] / scaling), int(c_hr.size[1] / scaling))
                c_hr = c_hr.resize(size, Image.BILINEAR)

                # compute gray and binary based on normalized color image
                preproc = Preprocessing()
                g_hr = preproc.im2gray(c_hr)
                b_hr = preproc.binarize(c_hr)

                # save output
                self._save_and_thumbnail(c_hr, 0)
                self._save_and_thumbnail(g_hr, 1)
                self._save_and_thumbnail(b_hr, 2)
            elif self.definition.id == 'color_norm_x2':
                meta = self.page.meta()
                line_distance = meta.preprocessing.average_line_distance
                if line_distance <= 0:
                    nf = self.page.file('color_norm')
                    nf.delete()
                    nf.create()
                    meta = self.page.meta()
                    line_distance = meta.preprocessing.average_line_distance

                assert(line_distance > 0)
                c_hr = Image.open(self.page.local_file_path('color_highres_preproc.jpg'))

                # rescale original image
                scaling = line_distance / (target_staff_line_distance * 2)
                size = (int(c_hr.size[0] / scaling), int(c_hr.size[1] / scaling))
                c_hr = c_hr.resize(size, Image.BILINEAR)

                # compute gray and binary based on normalized color image
                preproc = Preprocessing()
                g_hr = preproc.im2gray(c_hr)
                b_hr = preproc.binarize(c_hr)

                # save output
                self._save_and_thumbnail(c_hr, 0)
                self._save_and_thumbnail(g_hr, 1)
                self._save_and_thumbnail(b_hr, 2)
            elif self.definition.id == 'connected_components_norm':
                import pickle
                from omr.steps.preprocessing.util.connected_compontents import connected_compontents_with_stats
                binary = np.array(Image.open(DatabaseFile(self.page, 'binary_norm').local_path()))
                with open(self.local_path(), 'wb') as f:
                    pickle.dump(connected_compontents_with_stats(binary), f)
            else:
                raise Exception("Cannot create file for {}".format(self.definition.id))
Пример #6
0
                    return target_cell_origin + rel * target_cell_extend

        return p

    def transform_points(self, ps):
        return np.array([self.transform_point(p) for p in ps])


if __name__ == '__main__':
    from database import DatabaseBook
    from database.file_formats.pcgts import PageScaleReference
    import matplotlib.pyplot as plt
    page = DatabaseBook('Gothic_Test').pages()[0]
    binary = Image.open(page.file('binary_highres_preproc', create_if_not_existing=True).local_path())
    gray = Image.open(page.file('gray_highres_preproc').local_path())
    pcgts = PcGts.from_file(page.file('pcgts', create_if_not_existing=True))
    overlay = np.array(gray)

    points_to_transform = np.array([(100, 50), (200, 50), (300, 50), (400, 50), (600, 50), (800, 50), (100, 100), (200, 150), (300, 200)], dtype=int)

    # staffs.draw(overlay)
    images = [binary, gray, Image.fromarray(overlay)]
    f, ax = plt.subplots(2, len(images), True, True)
    for a, l in enumerate(images):
        l = np.array(l)
        for p in points_to_transform:
            l[p[1]-5:p[1]+5, p[0]-5:p[0]+5] = 255
        ax[0, a].imshow(l)

    dewarper = Dewarper(images[0].size, pcgts.page.all_staves_staff_line_coords(scale=PageScaleReference.HIGHRES))
    images = dewarper.dewarp(images)
Пример #7
0
        pad=[0, 10, 0, 40],
        dewarp=True,
        center=True,
        staff_lines_only=True,
        cut_region=False,
        height=120,
    )

    print(params.to_json())

    at = AlgorithmTypes.SYMBOLS_PC

    if at == AlgorithmTypes.SYMBOLS_SEQUENCE_TO_SEQUENCE:
        f, ax = plt.subplots(9, max(2, len(pages)), sharex='all', sharey='all')
        for i, p in enumerate(pages):
            pcgts = PcGts.from_file(p.file('pcgts'))
            dataset = SymbolDetectionDataset([pcgts], params)
            calamari_dataset = dataset.to_calamari_dataset(train=True)
            for a, (sample, out) in enumerate(
                    zip(calamari_dataset.samples(), dataset.load())):
                img, region, mask = out.line_image, out.region, out.mask
                img = sample['image'].transpose()
                ax[a, i].imshow(img)
    elif at == AlgorithmTypes.SYMBOLS_PC:
        page = pages[0]
        pcgts = PcGts.from_file(page.file('pcgts'))
        dataset = SymbolDetectionDataset([pcgts], params)
        ps_dataset = dataset.to_page_segmentation_dataset()

        canvas_ol = PcGtsCanvas(
            pcgts.page, scale_reference=PageScaleReference.NORMALIZED_X2)
Пример #8
0
 def pcgts_from_dict(self, d: dict) -> 'PcGts':
     from database.file_formats.pcgts import PcGts
     self._pcgts = PcGts.from_json(d, self)
     return self._pcgts
Пример #9
0
 def pcgts(self, create_if_not_existing=True) -> 'PcGts':
     if not self._pcgts:
         from database.file_formats.pcgts import PcGts
         self._pcgts = PcGts.from_file(self.file('pcgts', create_if_not_existing))
     return self._pcgts
Пример #10
0
                p_to_np(p.get('text', []), page),
                BlockType.MUSIC:
                p_to_np(p.get('system', []), page),
            }, )


if __name__ == "__main__":
    from database import DatabaseBook
    from PIL import Image
    import matplotlib.pyplot as plt

    b = DatabaseBook('demo')
    p = b.page('page00000001')
    img = np.array(Image.open(p.file('color_norm').local_path()))
    mask = np.zeros(img.shape, np.float) + 255
    val_pcgts = [PcGts.from_file(p.file('pcgts'))]

    params = AlgorithmPredictorSettings(model=Meta.best_model_for_book(b), )
    pred = Predictor(params)

    def s(c):
        return val_pcgts[0].page.page_to_image_scale(
            c, pred.dataset_params.page_scale_reference)

    for p in pred.predict(val_pcgts):
        for i, mr_c in enumerate(p.blocks.get(BlockType.MUSIC, [])):
            s(mr_c.coords).draw(mask, (255, 0, 0), fill=True, thickness=0)

        for i, mr_c in enumerate(p.blocks.get(BlockType.LYRICS, [])):
            s(mr_c.coords).draw(mask, (0, 255, 0), fill=True, thickness=0)
Пример #11
0
    def create(self):

        with mutex_dict.get(self.local_path(), Lock()):
            if self.exists() and not self.definition.recalculate:
                # check if exists
                return

            # check if requirement files exist
            for file in self.definition.requires:
                DatabaseFile(self.page, file).create()

            # check again if exists since the requirements might have created that file!
            if self.exists() and not self.definition.recalculate:
                return

            from omr.steps.preprocessing.preprocessing import Preprocessing

            # create local file
            logger.info('Creating local file {}'.format(self.local_path()))
            if self.definition.id == 'statistics' \
                    or self.definition.id == 'page_progress':
                import json
                with open(self.local_path(), 'w') as f:
                    json.dump({}, f)
            elif self.definition.id == 'page_progress_backup' \
                    or self.definition.id == 'statistics_backup':
                import zipfile
                zf = zipfile.ZipFile(self.local_path(),
                                     mode='w',
                                     compression=zipfile.ZIP_DEFLATED)
                zf.close()
            elif self.definition.id == 'annotation':
                import json
                with open(self.local_path(), 'w') as f:
                    json.dump({}, f)
            elif self.definition.id == 'pcgts':
                from database.file_formats.pcgts import PcGts, Page, Meta
                img = Image.open(
                    DatabaseFile(self.page, 'color_original').local_path())
                pcgts = PcGts(
                    meta=Meta(),
                    page=Page(location=self.page),
                )
                pcgts.page.image_width, pcgts.page.image_height = img.size
                pcgts.to_file(self.local_path())
            elif self.definition.id == 'pcgts_backup':
                import zipfile
                zf = zipfile.ZipFile(self.local_path(),
                                     mode='w',
                                     compression=zipfile.ZIP_DEFLATED)
                zf.close()
            elif self.definition.id == 'color_original':
                # create preview
                img = Image.open(self.local_path())
                img.thumbnail(thumbnail_size)
                img.save(self.local_thumbnail_path())
            elif self.definition.id == 'color_highres_preproc':
                meta = self.page.meta()
                preproc = Preprocessing()
                img = Image.open(
                    self.page.local_file_path('color_original.jpg'))
                w, h = img.size
                out_w = min(high_res_max_width, w)
                out_h = (out_w * h) // w
                c_hr = img.resize((out_w, out_h), Image.BILINEAR)
                c_hr, g_hr, b_hr = preproc.preprocess(c_hr)
                meta.preprocessing.deskewing_degrees = preproc.deskewed_angle
                meta.save(self.page)
                self._save_and_thumbnail(c_hr, 0)
                self._save_and_thumbnail(g_hr, 1)
                self._save_and_thumbnail(b_hr, 2)
            elif self.definition.id == 'color_lowres_preproc':
                c_hr = Image.open(
                    self.page.local_file_path('color_highres_preproc.jpg'))
                b_hr = Image.open(
                    self.page.local_file_path('binary_highres_preproc.png'))
                g_hr = Image.open(
                    self.page.local_file_path('gray_highres_preproc.jpg'))
                w, h = c_hr.size
                out_w = min(low_res_max_width, w)
                out_h = (out_w * h) // w
                size = (out_w, out_h)
                c_hr = c_hr.resize(size, Image.BILINEAR)
                g_hr = g_hr.resize(size, Image.BILINEAR)
                b_hr = b_hr.resize(size, Image.NEAREST)

                self._save_and_thumbnail(c_hr, 0)
                self._save_and_thumbnail(g_hr, 1)
                self._save_and_thumbnail(b_hr, 2)
            elif self.definition.id == 'color_norm':
                meta = self.page.meta()
                c_hr = Image.open(
                    self.page.local_file_path('color_highres_preproc.jpg'))
                if meta.preprocessing.auto_line_distance:
                    from omr.steps.preprocessing.scale.scale import LineDistanceComputer
                    ldc = LineDistanceComputer()
                    low_binary = Image.open(
                        self.page.local_file_path(
                            'binary_highres_preproc.png'))
                    line_distance = ldc.get_line_distance(
                        np.array(low_binary) / 255).line_distance
                    meta.preprocessing.average_line_distance = line_distance
                    meta.save(self.page)
                else:
                    # average_line_distance is expected to be computed on the original image
                    c_orig = Image.open(
                        self.page.local_file_path('color_original.jpg'))
                    line_distance = int(
                        np.round(meta.preprocessing.average_line_distance *
                                 c_hr.size[0] / c_orig.size[0]))

                assert (line_distance > 0)

                # rescale original image
                scaling = line_distance / target_staff_line_distance
                size = (int(c_hr.size[0] / scaling),
                        int(c_hr.size[1] / scaling))
                c_hr = c_hr.resize(size, Image.BILINEAR)

                # compute gray and binary based on normalized color image
                preproc = Preprocessing()
                g_hr = preproc.im2gray(c_hr)
                b_hr = preproc.binarize(c_hr)

                # save output
                self._save_and_thumbnail(c_hr, 0)
                self._save_and_thumbnail(g_hr, 1)
                self._save_and_thumbnail(b_hr, 2)
            elif self.definition.id == 'color_norm_x2':
                meta = self.page.meta()
                line_distance = meta.preprocessing.average_line_distance
                if line_distance <= 0:
                    nf = self.page.file('color_norm')
                    nf.delete()
                    nf.create()
                    meta = self.page.meta()
                    line_distance = meta.preprocessing.average_line_distance

                assert (line_distance > 0)
                c_hr = Image.open(
                    self.page.local_file_path('color_highres_preproc.jpg'))

                # rescale original image
                scaling = line_distance / (target_staff_line_distance * 2)
                size = (int(c_hr.size[0] / scaling),
                        int(c_hr.size[1] / scaling))
                c_hr = c_hr.resize(size, Image.BILINEAR)

                # compute gray and binary based on normalized color image
                preproc = Preprocessing()
                g_hr = preproc.im2gray(c_hr)
                b_hr = preproc.binarize(c_hr)

                # save output
                self._save_and_thumbnail(c_hr, 0)
                self._save_and_thumbnail(g_hr, 1)
                self._save_and_thumbnail(b_hr, 2)
            elif self.definition.id == 'connected_components_norm':
                import pickle
                from omr.steps.preprocessing.util.connected_compontents import connected_compontents_with_stats
                binary = np.array(
                    Image.open(
                        DatabaseFile(self.page, 'binary_norm').local_path()))
                with open(self.local_path(), 'wb') as f:
                    pickle.dump(connected_compontents_with_stats(binary), f)
            elif self.definition.id == 'monodiplus':
                import json
                import database.file_formats.pcgts as ns_pcgts
                # with open(DatabaseFile(self.page, 'pcgts').local_path()) as json_file:
                # pcgts = json.load(json_file)
                pcgts = ns_pcgts.PcGts.from_file(
                    DatabaseFile(self.page, 'pcgts'))
                root = PcgtsToMonodiConverter([pcgts]).root
                # import hashlib
                with open(self.local_path(), 'w', encoding='utf-8') as f:
                    json.dump(root.to_json(), f, ensure_ascii=False, indent=4)
            elif self.definition.id == 'monodiplus_svg':
                path = DatabaseFile(self.page, 'monodiplus').local_path()
                from ommr4all.settings import BASE_DIR
                script_path = os.path.join(BASE_DIR, 'internal_storage',
                                           'resources', 'monodi_svg_render',
                                           'bin', 'one-shot')
                import subprocess
                proc = subprocess.Popen(
                    [script_path, path, "-o",
                     self.local_path()],
                    stdout=subprocess.PIPE)
                result, err = proc.communicate()
                # error code in the java script is to be ignored for now
                exit_code = proc.wait()
            else:
                raise Exception("Cannot create file for {}".format(
                    self.definition.id))