Пример #1
0
    def _bounding_box(self):

        # Determine the size of the page at 72 dpi.
        mediabox = mupdf.Rect()
        mupdf.bound_page(self._context, self._c_page, mediabox)
        # 'mediabox {}'.format(mupdf.str_rect(mediabox)))

        return mediabox
Пример #2
0
    def _make_transform(self, scale=1, rotation=0):

        transform = mupdf.Matrix()
        # mupdf.rotate(transform, rotation)
        # mupdf.pre_scale(transform, scale, scale)
        mupdf.scale(transform, scale, scale)
        mupdf.pre_rotate(transform, rotation)

        return transform
Пример #3
0
    def __del__(self):

        # Fixme: manage properly
        for page in self._pages.values():
            page._free() # require context
        if self._c_document is not None:
            mupdf.drop_document(self._context, self._c_document)
        if self._context is not None:
            mupdf.drop_context(self._context)
Пример #4
0
def get_font_name(font):
    """ Return the name of a MuPDF font. """

    font_name = mupdf.decode_utf8(mupdf.get_font_name(font))
    i = font_name.find('+')
    if i:
        font_name = font_name[i + 1:]

    return font_name
Пример #5
0
def get_font_name(font):

    """ Return the name of a MuPDF font. """

    font_name = mupdf.decode_utf8(mupdf.get_font_name(font))
    i = font_name.find('+')
    if i:
        font_name = font_name[i+1:]

    return font_name
Пример #6
0
    def _make_display_list(self, no_cache=False):

        # Fixme: use it

        self._page_list = mupdf.new_display_list(self._context, mupdf.NULL)
        device = mupdf.new_list_device(self._context, page_list)
        if no_cache:
            mupdf.enable_device_hints(self._context, device, mupdf.FZ_NO_CACHE)
        mupdf.run_page_contents(self._context, page, device, mupdf.identity, mupdf.NULL)
        mupdf.close_device(self._context, device)
        mupdf.drop_device(self._context, device)
Пример #7
0
def to_text_style(style):
    """ Convert a MuPDF style instance to a :obj:`.TextStyle` object. """

    font = style.font
    text_style = TextStyle(
        id=style.id,
        font_family=get_font_name(font),
        font_size=style.size,
        is_bold=bool(mupdf.font_is_bold(font)),
        is_italic=bool(mupdf.font_is_italic(font)),
    )

    return text_style
Пример #8
0
def to_text_style(style):

    """ Convert a MuPDF style instance to a :obj:`.TextStyle` object. """

    font = style.font
    text_style = TextStyle(id=style.id,
                           font_family=get_font_name(font),
                           font_size=style.size,
                           is_bold=bool(mupdf.font_is_bold(font)),
                           is_italic=bool(mupdf.font_is_italic(font)),
                           )

    return text_style
Пример #9
0
    def _to_style(self, c_char):

        size = c_char.size
        c_font = c_char.font

        is_bold = mupdf.font_is_bold(self._context, c_font)
        is_italic = mupdf.font_is_italic(self._context, c_font)
        font_name = mupdf.font_name(self._context, c_font)

        return TextStyle(
            font_family=font_name,
            font_size=size,
            is_bold=is_bold,
            is_italic=is_italic,
        )
Пример #10
0
    def _to_style(self, c_char):

        size = c_char.size
        c_font = c_char.font

        is_bold = mupdf.font_is_bold(self._context, c_font)
        is_italic = mupdf.font_is_italic(self._context, c_font)
        font_name = mupdf.font_name(self._context, c_font)

        return TextStyle(
            font_family=font_name,
            font_size=size,
            is_bold=is_bold,
            is_italic=is_italic,
        )
Пример #11
0
    def dump_text_page_xml(self, dump_char=True):

        # Fixme: old and historical code, move elsewhere ?

        text = '<page page_number="{}">\n'.format(self._page_number)
        for block in mupdf_iter.text_block_iterator(self._text_page):
            text += '<block bbox="{}">\n'.format(format_bounding_box(block))
            for line in mupdf_iter.text_line_iterator(block):
                text += ' ' * 2 + '<line bbox="{} wmode="{}" dir="{}">\n'.format(
                    format_bounding_box(line),
                    line.wmode,
                    '{0.x} {0.y}'.format(line.dir),  # :.2f
                )
                # for span in mupdf_iter.TextSpanIterator(line):
                if dump_char:
                    for char in mupdf_iter.text_char_iterator(line):
                        text += ' ' * 4 + '<char c="{}" bbox="{}" font="{}" size="{:.2f}">\n'.format(
                            chr(char.c),
                            # char.origin
                            format_bounding_box(char),
                            mupdf.font_name(self._context, char.font),
                            char.size,
                        )
                text += ' ' * 2 + '</line>\n'
            text += '</block>\n'
        text += '</page>\n'

        return text
Пример #12
0
    def dump_text_page_xml(self, dump_char=True):

        # Fixme: old and historical code, move elsewhere ?

        text = '<page page_number="{}">\n'.format(self._page_number)
        for block in mupdf_iter.text_block_iterator(self._text_page):
            text += '<block bbox="{}">\n'.format(format_bounding_box(block))
            for line in mupdf_iter.text_line_iterator(block):
                text += ' '*2 + '<line bbox="{} wmode="{}" dir="{}">\n'.format(
                    format_bounding_box(line),
                    line.wmode,
                    '{0.x} {0.y}'.format(line.dir), # :.2f
                    )
                # for span in mupdf_iter.TextSpanIterator(line):
                if dump_char:
                    for char in mupdf_iter.text_char_iterator(line):
                        text += ' '*4 + '<char c="{}" bbox="{}" font="{}" size="{:.2f}">\n'.format(
                            chr(char.c),
                            # char.origin
                            format_bounding_box(char),
                            mupdf.font_name(self._context, char.font),
                            char.size,
                        )
                text += ' '*2 + '</line>\n'
            text += '</block>\n'
        text += '</page>\n'

        return text
Пример #13
0
    def __init__(self, document, page_number): # or page_index

        self._document = document
        self._context = self._document._context
        self._c_document = self._document._c_document
        self._page_number = page_number
        self._c_page = mupdf.load_page(self._context, self._c_document, page_number)
        self._text_page = None
Пример #14
0
    def __init__(self, path):

        super().__init__(path)

        self._context = None
        self._c_document = None
        self._pages = {} # page cache

        path = str(self._path).encode('utf-8')

        # try:
        # Create a context to hold the exception stack and various caches
        self._context = mupdf.new_context()
        # Register the default file types to handle
        mupdf.register_document_handlers(self._context)
        self._c_document = mupdf.open_document(self._context, path)
        # except MupdfError as exception:
        #     raise exception
        if self._c_document == mupdf.NULL:
            message = mupdf.decode_utf8(mupdf.caught_message(self._context))
            self._logger.error(message)
            raise MupdfError(message)
        self._metadata = MetaData(self)
        self._number_of_pages = mupdf.count_pages(self._context, self._c_document)
        self._document_words = None
        self._image_cache = None
Пример #15
0
    def _to_text(self, scale=1, rotation=0):

        """ Return a :obj:`.TextPage` instance. """

        mediabox = self._bounding_box()
        transform = self._make_transform(scale, rotation)
        structured_text_options = mupdf.StructuredTextOptions()

        structured_text_page = mupdf.new_stext_page(self._context, mediabox)
        device = mupdf.new_stext_device(self._context, structured_text_page, structured_text_options)
        mupdf.run_page(self._context, self._c_page, device, transform, mupdf.NULL)
        # run_page(self._context, page_list, device)
        mupdf.close_device(self._context, device)
        mupdf.drop_device(self._context, device)

        # structured_text_page_ = mupdf.new_stext_page_from_page(self._context, self._c_page, structured_text_options)

        return TextPage(self, structured_text_page)
Пример #16
0
    def text_direct(self):

        # Fixme: versus text

        structured_text_options = mupdf.StructuredTextOptions()
        c_buffer = mupdf.new_buffer_from_page(self._context, self._c_page, structured_text_options)
        py_buffer = mupdf.string_from_buffer(self._context, c_buffer)
        mupdf.drop_buffer(self._context, c_buffer)

        return mupdf.decode_utf8(py_buffer)
Пример #17
0
    def __init__(self, document):

        super(MetaData, self).__init__()

        context = document._context
        c_document = document._c_document

        for key in (
                'Title',
                'Subject',
                'Author',
                'Creator',
                'Producer',
                'CreationDate',
                'ModDate',
        ):
            # Fixme: buffer size
            string = mupdf.get_meta_info(context, c_document, 'info:' + key, size=1024)
            self._dictionary[key] = string

        # fz_buffer = mupdf.pdf_metadata(c_document)
        # string = mupdf.decode_utf8(mupdf.buffer_data(fz_buffer))
        string = ''
        self._dictionary['metadata'] = string
Пример #18
0
    def _transform_bounding_box(self,
                                rotation=0,
                                resolution=72,
                                width=0, height=0, fit=False):

        bounds = self._bounding_box()
        scale = resolution / 72.
        transform = mupdf.Matrix()
        mupdf.pre_scale(mupdf.rotate(transform, rotation), scale, scale)
        tmp_bounds = mupdf.Rect()
        mupdf.copy_rect(tmp_bounds, bounds)
        ibounds = mupdf.IRect()
        mupdf.round_rect(ibounds, mupdf.transform_rect(tmp_bounds, transform))

        # If a resolution is specified, check to see whether width/height are exceeded if not, unset them.
        if resolution != 72:
            actual_width = ibounds.x1 - ibounds.x0
            actual_height = ibounds.y1 - ibounds.y0
            if width and actual_width <= width:
                width = 0
            if height and actual_height <= height:
                height = 0

        # Now width or height will be 0 unless they need to be enforced.
        if width or height:
            scale_x = width  / (tmp_bounds.x1 - tmp_bounds.x0)
            scale_y = height / (tmp_bounds.y1 - tmp_bounds.y0)
            if fit: # ignore aspect
                if not scale_x:
                    scale_x = 1.0 # keep computed width
                elif not scale_y:
                    scale_y = 1.0 # keep computed height
            else:
                if not scale_x:
                    scale_x = scale_y
                elif not scale_y:
                    scale_y = scale_x
                else:
                    # take the smallest scale
                    if scale_x > scale_y:
                        scale_x = scale_y
                    else:
                        scale_y = scale_x
            scale_mat = mupdf.Matrix()
            mupdf.scale(scale_mat, scale_x, scale_y)
            mupdf.concat(transform, transform, scale_mat)
            mupdf.copy_rect(tmp_bounds, bounds)
            mupdf.transform_rect(tmp_bounds, transform)

        mupdf.round_rect(ibounds, tmp_bounds)

        return transform, ibounds
Пример #19
0
    def _free(self):

        mupdf.drop_stext_page(self._context, self._text_page)
Пример #20
0
    def _free(self):

        mupdf.drop_stext_page(self._context, self._text_page)
Пример #21
0
    def to_pixmap(self,
                  rotation=0,
                  resolution=72,
                  width=None, height=None, fit=False,
                  antialiasing_level=8,
                  ):

        transform, bounding_box = self._transform_bounding_box(rotation,
                                                               resolution,
                                                               width, height, fit)

        width, height = mupdf.rect_width_height(bounding_box)
        np_array = np.zeros((height, width, 4), dtype=np.uint8)
        color_space = mupdf.device_rgb(self._context)
        use_alpha = True
        pixmap = mupdf.new_pixmap_with_bbox_and_data(self._context,
                                                     color_space,
                                                     bounding_box,
                                                     mupdf.NULL,
                                                     use_alpha,
                                                     mupdf.np_array_uint8_ptr(np_array))
        mupdf.clear_pixmap_with_value(self._context, pixmap, 255) # 0xff

        device = mupdf.new_draw_device(self._context, mupdf.NULL, pixmap)
        mupdf.set_aa_level(self._context, antialiasing_level)
        mupdf.run_page(self._context, self._c_page, device, transform, mupdf.NULL)
        mupdf.close_device(self._context, device)
        mupdf.drop_device(self._context, device)
        mupdf.drop_pixmap(self._context, pixmap)

        return np_array
Пример #22
0
    def _free(self):

        if self._text_page is not None:
            self._text_page._free()
        mupdf.drop_page(self._context, self._c_page)