Exemplo n.º 1
0
    def extract_images(self, processed_records, output_dir):
        self.log.debug('Extracting images...')
        output_dir = os.path.abspath(os.path.join(output_dir, 'images'))
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        image_index = 0
        self.image_names = []
        start = getattr(self.book_header, 'first_image_index', -1)
        if start > self.num_sections or start < 0:
            # BAEN PRC files have bad headers
            start = 0
        for i in range(start, self.num_sections):
            if i in processed_records:
                continue
            processed_records.append(i)
            data  = self.sections[i][0]
            image_index += 1
            if data[:4] in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n',
                    b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}:
                # This record is a known non image type, not need to try to
                # load the image
                continue

            path = os.path.join(output_dir, '%05d.jpg' % image_index)
            try:
                if what(None, data) not in {'jpg', 'jpeg', 'gif', 'png', 'bmp'}:
                    continue
                save_cover_data_to(data, path, minify_to=(10000, 10000))
            except Exception:
                continue
            self.image_names.append(os.path.basename(path))
Exemplo n.º 2
0
    def extract_images(self, processed_records, output_dir):
        self.log.debug('Extracting images...')
        output_dir = os.path.abspath(os.path.join(output_dir, 'images'))
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        image_index = 0
        self.image_names = []
        start = getattr(self.book_header, 'first_image_index', -1)
        if start > self.num_sections or start < 0:
            # BAEN PRC files have bad headers
            start = 0
        for i in range(start, self.num_sections):
            if i in processed_records:
                continue
            processed_records.append(i)
            data  = self.sections[i][0]
            image_index += 1
            if data[:4] in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n',
                    b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}:
                # This record is a known non image type, not need to try to
                # load the image
                continue

            path = os.path.join(output_dir, '%05d.jpg' % image_index)
            try:
                if what(None, data) not in {'jpg', 'jpeg', 'gif', 'png', 'bmp'}:
                    continue
                save_cover_data_to(data, path, minify_to=(10000, 10000))
            except Exception:
                continue
            self.image_names.append(os.path.basename(path))
Exemplo n.º 3
0
def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)

    buf = BytesIO()
    log = create_log(buf)
    abort = Event()
    patch_plugins()

    authors = []
    if opts.authors:
        authors = string_to_authors(opts.authors)

    identifiers = {}
    if opts.isbn:
        identifiers['isbn'] = opts.isbn

    allowed_plugins = frozenset(opts.allowed_plugin)
    results = identify(log,
                       abort,
                       title=opts.title,
                       authors=authors,
                       identifiers=identifiers,
                       timeout=int(opts.timeout),
                       allowed_plugins=allowed_plugins or None)

    if not results:
        print(log, file=sys.stderr)
        prints('No results found', file=sys.stderr)
        raise SystemExit(1)
    result = results[0]

    cf = None
    if opts.cover and results:
        cover = download_cover(log,
                               title=opts.title,
                               authors=authors,
                               identifiers=result.identifiers,
                               timeout=int(opts.timeout))
        if cover is None and not opts.opf:
            prints('No cover found', file=sys.stderr)
        else:
            save_cover_data_to(cover[-1], opts.cover)
            result.cover = cf = opts.cover

    log = buf.getvalue()

    result = (metadata_to_opf(result)
              if opts.opf else unicode(result).encode('utf-8'))

    if opts.verbose:
        print(log, file=sys.stderr)

    print(result)
    if not opts.opf and opts.cover:
        prints('Cover               :', cf)

    return 0
Exemplo n.º 4
0
def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)

    buf = BytesIO()
    log = create_log(buf)
    abort = Event()
    patch_plugins()

    authors = []
    if opts.authors:
        authors = string_to_authors(opts.authors)

    identifiers = {}
    for idspec in opts.identifier:
        k, v = idspec.partition(':')[::2]
        if not k or not v:
            raise SystemExit('Not a valid identifier: {}'.format(idspec))
        identifiers[k] = v
    if opts.isbn:
        identifiers['isbn'] = opts.isbn

    allowed_plugins = frozenset(opts.allowed_plugin)
    results = identify(log, abort, title=opts.title, authors=authors,
            identifiers=identifiers, timeout=int(opts.timeout),
            allowed_plugins=allowed_plugins or None)

    if not results:
        print(log, file=sys.stderr)
        prints('No results found', file=sys.stderr)
        raise SystemExit(1)
    result = results[0]

    cf = None
    if opts.cover and results:
        cover = download_cover(log, title=opts.title, authors=authors,
                identifiers=result.identifiers, timeout=int(opts.timeout))
        if cover is None and not opts.opf:
            prints('No cover found', file=sys.stderr)
        else:
            save_cover_data_to(cover[-1], opts.cover)
            result.cover = cf = opts.cover

    log = buf.getvalue()

    result = (metadata_to_opf(result) if opts.opf else
                    unicode_type(result).encode('utf-8'))

    if opts.verbose:
        print(log, file=sys.stderr)

    print(result)
    if not opts.opf and opts.cover:
        prints('Cover               :', cf)

    return 0
Exemplo n.º 5
0
def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)

    buf = BytesIO()
    log = create_log(buf)
    abort = Event()
    patch_plugins()

    authors = []
    if opts.authors:
        authors = string_to_authors(opts.authors)

    identifiers = {}
    for idspec in opts.identifier:
        k, v = idspec.partition(':')[::2]
        if not k or not v:
            raise SystemExit('Not a valid identifier: {}'.format(idspec))
        identifiers[k] = v
    if opts.isbn:
        identifiers['isbn'] = opts.isbn

    allowed_plugins = frozenset(opts.allowed_plugin)
    results = identify(log, abort, title=opts.title, authors=authors,
            identifiers=identifiers, timeout=int(opts.timeout),
            allowed_plugins=allowed_plugins or None)

    if not results:
        prints(buf.getvalue(), file=sys.stderr)
        prints('No results found', file=sys.stderr)
        raise SystemExit(1)
    result = results[0]

    cf = None
    if opts.cover and results:
        cover = download_cover(log, title=opts.title, authors=authors,
                identifiers=result.identifiers, timeout=int(opts.timeout))
        if cover is None:
            if not opts.opf:
                prints('No cover found', file=sys.stderr)
        else:
            save_cover_data_to(cover[-1], opts.cover)
            result.cover = cf = opts.cover

    if opts.verbose:
        prints(buf.getvalue(), file=sys.stderr)

    if opts.opf:
        getattr(sys.stdout, 'buffer', sys.stdout).write(metadata_to_opf(result))
        print()
    else:
        prints(str(result))
    if not opts.opf and opts.cover:
        prints('Cover               :', cf)

    return 0
Exemplo n.º 6
0
    def extract_images(self, processed_records, output_dir):
        self.log.debug('Extracting images...')
        output_dir = os.path.abspath(os.path.join(output_dir, 'images'))
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        image_index = 0
        self.image_names = []
        image_name_map = {}
        start = getattr(self.book_header, 'first_image_index', -1)
        if start > self.num_sections or start < 0:
            # BAEN PRC files have bad headers
            start = 0
        for i in range(start, self.num_sections):
            if i in processed_records:
                continue
            processed_records.append(i)
            data = self.sections[i][0]
            image_index += 1
            if data[:4] in {
                    b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n', b'RESC',
                    b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'
            }:
                # This record is a known non image type, no need to try to
                # load the image
                continue

            try:
                imgfmt = what(None, data)
            except Exception:
                continue
            if imgfmt not in {'jpg', 'jpeg', 'gif', 'png', 'bmp'}:
                continue
            if imgfmt == 'jpeg':
                imgfmt = 'jpg'
            if imgfmt == 'gif':
                try:
                    data = gif_data_to_png_data(data)
                    imgfmt = 'png'
                except AnimatedGIF:
                    pass
            path = os.path.join(output_dir, '%05d.%s' % (image_index, imgfmt))
            image_name_map[image_index] = os.path.basename(path)
            if imgfmt == 'png':
                with open(path, 'wb') as f:
                    f.write(data)
            else:
                try:
                    save_cover_data_to(data, path, minify_to=(10000, 10000))
                except Exception:
                    continue
            self.image_names.append(os.path.basename(path))
        return image_name_map
Exemplo n.º 7
0
def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)

    buf = BytesIO()
    log = create_log(buf)
    abort = Event()

    authors = []
    if opts.authors:
        authors = string_to_authors(opts.authors)

    identifiers = {}
    if opts.isbn:
        identifiers['isbn'] = opts.isbn

    results = identify(log, abort, title=opts.title, authors=authors,
            identifiers=identifiers, timeout=int(opts.timeout))

    if not results:
        print (log, file=sys.stderr)
        prints('No results found', file=sys.stderr)
        raise SystemExit(1)
    result = results[0]

    cf = None
    if opts.cover and results:
        cover = download_cover(log, title=opts.title, authors=authors,
                identifiers=result.identifiers, timeout=int(opts.timeout))
        if cover is None and not opts.opf:
            prints('No cover found', file=sys.stderr)
        else:
            save_cover_data_to(cover[-1], opts.cover)
            result.cover = cf = opts.cover

    log = buf.getvalue()

    result = (metadata_to_opf(result) if opts.opf else
                    unicode(result).encode('utf-8'))

    if opts.verbose:
        print (log, file=sys.stderr)

    print (result)
    if not opts.opf and opts.cover:
        prints('Cover               :', cf)

    return 0
Exemplo n.º 8
0
    def fb2mlize_images(self):
        '''
        This function uses the self.image_hrefs dictionary mapping. It is populated by the dump_text function.
        '''
        from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES

        images = []
        for item in self.oeb_book.manifest:
            # Don't write the image if it's not referenced in the document's text.
            if item.href not in self.image_hrefs:
                continue
            if item.media_type in OEB_RASTER_IMAGES:
                try:
                    if item.media_type != 'image/jpeg':
                        imdata = save_cover_data_to(item.data, compression_quality=70)
                        raw_data = b64encode(imdata)
                    else:
                        raw_data = b64encode(item.data)
                    # Don't put the encoded image on a single line.
                    data = ''
                    col = 1
                    for char in raw_data:
                        if col == 72:
                            data += '\n'
                            col = 1
                        col += 1
                        data += char
                    images.append('<binary id="%s" content-type="image/jpeg">%s\n</binary>' % (self.image_hrefs[item.href], data))
                except Exception as e:
                    self.log.error('Error: Could not include file %s because '
                        '%s.' % (item.href, e))
        return ''.join(images)
Exemplo n.º 9
0
    def fb2mlize_images(self):
        '''
        This function uses the self.image_hrefs dictionary mapping. It is populated by the dump_text function.
        '''
        from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES

        images = []
        for item in self.oeb_book.manifest:
            # Don't write the image if it's not referenced in the document's text.
            if item.href not in self.image_hrefs:
                continue
            if item.media_type in OEB_RASTER_IMAGES:
                try:
                    if item.media_type != 'image/jpeg':
                        imdata = save_cover_data_to(item.data,
                                                    compression_quality=70)
                        raw_data = b64encode(imdata)
                    else:
                        raw_data = b64encode(item.data)
                    # Don't put the encoded image on a single line.
                    data = ''
                    col = 1
                    for char in raw_data:
                        if col == 72:
                            data += '\n'
                            col = 1
                        col += 1
                        data += char
                    images.append(
                        '<binary id="%s" content-type="image/jpeg">%s\n</binary>'
                        % (self.image_hrefs[item.href], data))
                except Exception as e:
                    self.log.error('Error: Could not include file %s because '
                                   '%s.' % (item.href, e))
        return ''.join(images)
Exemplo n.º 10
0
def get_metadata(stream):
    from calibre.ebooks.metadata import MetaInformation
    from calibre.ptempfile import TemporaryDirectory
    from calibre.ebooks.mobi.reader.headers import MetadataHeader
    from calibre.ebooks.mobi.reader.mobi6 import MobiReader
    from calibre.utils.img import save_cover_data_to
    from calibre import CurrentDir

    stream.seek(0)
    try:
        raw = stream.read(3)
    except Exception:
        raw = b''
    stream.seek(0)
    if raw == b'TPZ':
        from calibre.ebooks.metadata.topaz import get_metadata
        return get_metadata(stream)
    from calibre.utils.logging import Log
    log = Log()
    try:
        mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
    except:
        mi = MetaInformation(_('Unknown'), [_('Unknown')])
    mh = MetadataHeader(stream, log)
    if mh.title and mh.title != _('Unknown'):
        mi.title = mh.title

    if mh.exth is not None:
        if mh.exth.mi is not None:
            mi = mh.exth.mi
    else:
        size = 1024**3
        if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
            pos = stream.tell()
            stream.seek(0, 2)
            size = stream.tell()
            stream.seek(pos)
        if size < 4 * 1024 * 1024:
            with TemporaryDirectory('_mobi_meta_reader') as tdir:
                with CurrentDir(tdir):
                    mr = MobiReader(stream, log)
                    parse_cache = {}
                    mr.extract_content(tdir, parse_cache)
                    if mr.embedded_mi is not None:
                        mi = mr.embedded_mi
    if hasattr(mh.exth, 'cover_offset'):
        cover_index = mh.first_image_index + mh.exth.cover_offset
        data = mh.section_data(int(cover_index))
    else:
        try:
            data = mh.section_data(mh.first_image_index)
        except Exception:
            data = b''
    if data and what(None,
                     data) in {'jpg', 'jpeg', 'gif', 'png', 'bmp', 'webp'}:
        try:
            mi.cover_data = ('jpg', save_cover_data_to(data))
        except Exception:
            log.exception('Failed to read MOBI cover')
    return mi
Exemplo n.º 11
0
    def fb2mlize_images(self):
        '''
        This function uses the self.image_hrefs dictionary mapping. It is populated by the dump_text function.
        '''
        from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES

        images = []
        for item in self.oeb_book.manifest:
            # Don't write the image if it's not referenced in the document's text.
            if item.href not in self.image_hrefs:
                continue
            if item.media_type in OEB_RASTER_IMAGES:
                try:
                    if item.media_type not in ('image/jpeg', 'image/png'):
                        imdata = save_cover_data_to(item.data,
                                                    compression_quality=70)
                        raw_data = as_base64_unicode(imdata)
                        content_type = 'image/jpeg'
                    else:
                        raw_data = as_base64_unicode(item.data)
                        content_type = item.media_type
                    # Don't put the encoded image on a single line.
                    step = 72
                    data = '\n'.join(raw_data[i:i + step]
                                     for i in range(0, len(raw_data), step))
                    images.append(
                        '<binary id="%s" content-type="%s">%s</binary>' %
                        (self.image_hrefs[item.href], content_type, data))
                except Exception as e:
                    self.log.error('Error: Could not include file %s because '
                                   '%s.' % (item.href, e))
        return '\n'.join(images)
Exemplo n.º 12
0
    def get_cover(self, id, thumbnail=False, thumb_width=60, thumb_height=80):
        try:
            cherrypy.response.headers['Content-Type'] = 'image/jpeg'
            cherrypy.response.timeout = 3600
            cover = self.db.cover(id, index_is_id=True)
            if cover is None:
                cover = self.default_cover
                updated = self.build_time
            else:
                updated = self.db.cover_last_modified(id, index_is_id=True)
            cherrypy.response.headers['Last-Modified'] = self.last_modified(updated)

            if thumbnail:
                quality = tweaks['content_server_thumbnail_compression_quality']
                if quality < 50:
                    quality = 50
                elif quality > 99:
                    quality = 99
                return scale_image(cover, thumb_width, thumb_height, compression_quality=quality)[-1]

            return save_cover_data_to(cover, None, minify_to=(self.max_cover_width, self.max_cover_height))
        except Exception as err:
            import traceback
            cherrypy.log.error('Failed to generate cover:')
            cherrypy.log.error(traceback.print_exc())
            raise cherrypy.HTTPError(404, 'Failed to generate cover: %r'%err)
Exemplo n.º 13
0
    def get_cover(self, id, thumbnail=False, thumb_width=60, thumb_height=80):
        try:
            cherrypy.response.headers['Content-Type'] = 'image/jpeg'
            cherrypy.response.timeout = 3600
            cover = self.db.cover(id, index_is_id=True)
            if cover is None:
                cover = self.default_cover
                updated = self.build_time
            else:
                updated = self.db.cover_last_modified(id, index_is_id=True)
            cherrypy.response.headers['Last-Modified'] = self.last_modified(
                updated)

            if thumbnail:
                quality = tweaks[
                    'content_server_thumbnail_compression_quality']
                if quality < 50:
                    quality = 50
                elif quality > 99:
                    quality = 99
                return scale_image(cover,
                                   thumb_width,
                                   thumb_height,
                                   compression_quality=quality)[-1]

            return save_cover_data_to(cover,
                                      None,
                                      minify_to=(self.max_cover_width,
                                                 self.max_cover_height))
        except Exception as err:
            import traceback
            cherrypy.log.error('Failed to generate cover:')
            cherrypy.log.error(traceback.print_exc())
            raise cherrypy.HTTPError(404, 'Failed to generate cover: %r' % err)
Exemplo n.º 14
0
def get_metadata(stream):
    from calibre.ebooks.metadata import MetaInformation
    from calibre.ptempfile import TemporaryDirectory
    from calibre.ebooks.mobi.reader.headers import MetadataHeader
    from calibre.ebooks.mobi.reader.mobi6 import MobiReader
    from calibre.utils.img import save_cover_data_to
    from calibre import CurrentDir

    stream.seek(0)
    try:
        raw = stream.read(3)
    except:
        raw = ''
    stream.seek(0)
    if raw == b'TPZ':
        from calibre.ebooks.metadata.topaz import get_metadata
        return get_metadata(stream)
    from calibre.utils.logging import Log
    log = Log()
    try:
        mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')])
    except:
        mi = MetaInformation(_('Unknown'), [_('Unknown')])
    mh = MetadataHeader(stream, log)
    if mh.title and mh.title != _('Unknown'):
        mi.title = mh.title

    if mh.exth is not None:
        if mh.exth.mi is not None:
            mi = mh.exth.mi
    else:
        size = 1024**3
        if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
            pos = stream.tell()
            stream.seek(0, 2)
            size = stream.tell()
            stream.seek(pos)
        if size < 4*1024*1024:
            with TemporaryDirectory('_mobi_meta_reader') as tdir:
                with CurrentDir(tdir):
                    mr = MobiReader(stream, log)
                    parse_cache = {}
                    mr.extract_content(tdir, parse_cache)
                    if mr.embedded_mi is not None:
                        mi = mr.embedded_mi
    if hasattr(mh.exth, 'cover_offset'):
        cover_index = mh.first_image_index + mh.exth.cover_offset
        data  = mh.section_data(int(cover_index))
    else:
        try:
            data  = mh.section_data(mh.first_image_index)
        except:
            data = ''
    if data and what(None, data) in {'jpg', 'jpeg', 'gif', 'png', 'bmp', 'webp'}:
        try:
            mi.cover_data = ('jpg', save_cover_data_to(data))
        except Exception:
            log.exception('Failed to read MOBI cover')
    return mi
Exemplo n.º 15
0
 def image_to_hexstring(self, data):
     # Images must be hex-encoded in 128 character lines
     data = save_cover_data_to(data)
     width, height = identify(data)[1:]
     lines = []
     v = memoryview(data)
     for i in range(0, len(data), 64):
         lines.append(hexlify(v[i:i + 64]))
     hex_string = b'\n'.join(lines).decode('ascii')
     return hex_string, width, height
Exemplo n.º 16
0
def process_result(log, result):
    plugin, data = result
    try:
        if getattr(plugin, 'auto_trim_covers', False):
            img = image_from_data(data)
            nimg = remove_borders_from_image(img)
            if nimg is not img:
                data = image_to_data(nimg)
        fmt, width, height = identify(data)
        if width < 0 or height < 0:
            raise ValueError('Could not read cover image dimensions')
        if width < 50 or height < 50:
            raise ValueError('Image too small')
        data = save_cover_data_to(data)
    except Exception:
        log.exception('Invalid cover from', plugin.name)
        return None
    return (plugin, width, height, fmt, data)
Exemplo n.º 17
0
def process_result(log, result):
    plugin, data = result
    try:
        if getattr(plugin, 'auto_trim_covers', False):
            img = image_from_data(data)
            nimg = remove_borders_from_image(img)
            if nimg is not img:
                data = image_to_data(nimg)
        fmt, width, height = identify(data)
        if width < 0 or height < 0:
            raise ValueError('Could not read cover image dimensions')
        if width < 50 or height < 50:
            raise ValueError('Image too small')
        data = save_cover_data_to(data)
    except Exception:
        log.exception('Invalid cover from', plugin.name)
        return None
    return (plugin, width, height, fmt, data)
Exemplo n.º 18
0
def rescale_image(data, maxsizeb=IMAGE_MAX_SIZE, dimen=None):
    '''
    Convert image setting all transparent pixels to white and changing format
    to JPEG. Ensure the resultant image has a byte size less than
    maxsizeb.

    If dimen is not None, generate a thumbnail of
    width=dimen, height=dimen or width, height = dimen (depending on the type
    of dimen)

    Returns the image as a bytestring
    '''
    if dimen is not None:
        if hasattr(dimen, '__len__'):
            width, height = dimen
        else:
            width = height = dimen
        data = scale_image(data,
                           width=width,
                           height=height,
                           compression_quality=90)[-1]
    else:
        # Replace transparent pixels with white pixels and convert to JPEG
        data = save_cover_data_to(data)
    if len(data) <= maxsizeb:
        return data
    orig_data = data  # save it in case compression fails
    quality = 90
    while len(data) > maxsizeb and quality >= 5:
        data = image_to_data(image_from_data(orig_data),
                             compression_quality=quality)
        quality -= 5
    if len(data) <= maxsizeb:
        return data
    orig_data = data

    scale = 0.9
    while len(data) > maxsizeb and scale >= 0.05:
        img = image_from_data(data)
        w, h = img.width(), img.height()
        img = resize_image(img, int(scale * w), int(scale * h))
        data = image_to_data(img, compression_quality=quality)
        scale -= 0.05
    return data
Exemplo n.º 19
0
    def image_to_hexstring(self, data):
        data = save_cover_data_to(data)
        width, height = identify(data)[1:]

        raw_hex = ''
        for char in data:
            raw_hex += hex(ord(char)).replace('0x', '').rjust(2, '0')

        # Images must be broken up so that they are no longer than 129 chars
        # per line
        hex_string = ''
        col = 1
        for char in raw_hex:
            if col == 129:
                hex_string += '\n'
                col = 1
            col += 1
            hex_string += char

        return (hex_string, width, height)
Exemplo n.º 20
0
    def image_to_hexstring(self, data):
        data = save_cover_data_to(data)
        width, height = identify(data)[1:]

        raw_hex = ''
        for char in data:
            raw_hex += hex(ord(char)).replace('0x', '').rjust(2, '0')

        # Images must be broken up so that they are no longer than 129 chars
        # per line
        hex_string = ''
        col = 1
        for char in raw_hex:
            if col == 129:
                hex_string += '\n'
                col = 1
            col += 1
            hex_string += char

        return (hex_string, width, height)
Exemplo n.º 21
0
def rescale_image(data, maxsizeb=IMAGE_MAX_SIZE, dimen=None):
    '''
    Convert image setting all transparent pixels to white and changing format
    to JPEG. Ensure the resultant image has a byte size less than
    maxsizeb.

    If dimen is not None, generate a thumbnail of
    width=dimen, height=dimen or width, height = dimen (depending on the type
    of dimen)

    Returns the image as a bytestring
    '''
    if dimen is not None:
        if hasattr(dimen, '__len__'):
            width, height = dimen
        else:
            width = height = dimen
        data = scale_image(data, width=width, height=height, compression_quality=90)[-1]
    else:
        # Replace transparent pixels with white pixels and convert to JPEG
        data = save_cover_data_to(data)
    if len(data) <= maxsizeb:
        return data
    orig_data = data  # save it in case compression fails
    quality = 90
    while len(data) > maxsizeb and quality >= 5:
        data = image_to_data(image_from_data(orig_data), compression_quality=quality)
        quality -= 5
    if len(data) <= maxsizeb:
        return data
    orig_data = data

    scale = 0.9
    while len(data) > maxsizeb and scale >= 0.05:
        img = image_from_data(data)
        w, h = img.width(), img.height()
        img = resize_image(img, int(scale*w), int(scale*h))
        data = image_to_data(img, compression_quality=quality)
        scale -= 0.05
    return data
Exemplo n.º 22
0
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from lxml import etree
        from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
        from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
        from calibre.utils.zipfile import ZipFile
        from calibre.utils.filenames import ascii_filename

        # HTML
        if opts.htmlz_css_type == 'inline':
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer
            OEB2HTMLizer = OEB2HTMLInlineCSSizer
        elif opts.htmlz_css_type == 'tag':
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer
            OEB2HTMLizer = OEB2HTMLNoCSSizer
        else:
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer

        with TemporaryDirectory(u'_htmlz_output') as tdir:
            htmlizer = OEB2HTMLizer(log)
            html = htmlizer.oeb2html(oeb_book, opts)

            fname = u'index'
            if opts.htmlz_title_filename:
                from calibre.utils.filenames import shorten_components_to
                fname = shorten_components_to(100, (ascii_filename(
                    unicode_type(oeb_book.metadata.title[0])), ))[0]
            with open(os.path.join(tdir, fname + u'.html'), 'wb') as tf:
                if isinstance(html, unicode_type):
                    html = html.encode('utf-8')
                tf.write(html)

            # CSS
            if opts.htmlz_css_type == 'class' and opts.htmlz_class_style == 'external':
                with open(os.path.join(tdir, u'style.css'), 'wb') as tf:
                    tf.write(htmlizer.get_css(oeb_book))

            # Images
            images = htmlizer.images
            if images:
                if not os.path.exists(os.path.join(tdir, u'images')):
                    os.makedirs(os.path.join(tdir, u'images'))
                for item in oeb_book.manifest:
                    if item.media_type in OEB_IMAGES and item.href in images:
                        if item.media_type == SVG_MIME:
                            data = unicode_type(
                                etree.tostring(item.data,
                                               encoding=unicode_type))
                        else:
                            data = item.data
                        fname = os.path.join(tdir, u'images',
                                             images[item.href])
                        with open(fname, 'wb') as img:
                            img.write(data)

            # Cover
            cover_path = None
            try:
                cover_data = None
                if oeb_book.metadata.cover:
                    term = oeb_book.metadata.cover[0].term
                    cover_data = oeb_book.guide[term].item.data
                if cover_data:
                    from calibre.utils.img import save_cover_data_to
                    cover_path = os.path.join(tdir, u'cover.jpg')
                    with lopen(cover_path, 'w') as cf:
                        cf.write('')
                    save_cover_data_to(cover_data, cover_path)
            except:
                import traceback
                traceback.print_exc()

            # Metadata
            with open(os.path.join(tdir, u'metadata.opf'), 'wb') as mdataf:
                opf = OPF(
                    io.BytesIO(
                        etree.tostring(oeb_book.metadata.to_opf1(),
                                       encoding='UTF-8')))
                mi = opf.to_book_metadata()
                if cover_path:
                    mi.cover = u'cover.jpg'
                mdataf.write(metadata_to_opf(mi))

            htmlz = ZipFile(output_path, 'w')
            htmlz.add_dir(tdir)
Exemplo n.º 23
0
def serialize_cover_data(new_cdata, cpath):
    from calibre.utils.img import save_cover_data_to
    return save_cover_data_to(new_cdata,
                              data_fmt=os.path.splitext(cpath)[1][1:])
Exemplo n.º 24
0
def _encode_into_jpeg(data):
    data = save_cover_data_to(data)
    return as_base64_unicode(data)
Exemplo n.º 25
0
def _encode_into_jpeg(data):
    data = save_cover_data_to(data)
    return b64encode(data)
Exemplo n.º 26
0
def _write_new_cover(new_cdata, cpath):
    from calibre.utils.img import save_cover_data_to
    new_cover = PersistentTemporaryFile(suffix=os.path.splitext(cpath)[1])
    new_cover.close()
    save_cover_data_to(new_cdata, new_cover.name)
    return new_cover
Exemplo n.º 27
0
    def extract_content(self, output_dir):
        # Each text record is independent (unless the continuation
        # value is set in the previous record). Put each converted
        # text recored into a separate file. We will reference the
        # home.html file as the first file and let the HTML input
        # plugin assemble the order based on hyperlinks.
        with CurrentDir(output_dir):
            for uid, num in self.uid_text_secion_number.items():
                self.log.debug('Writing record with uid: %s as %s.html' % (uid, uid))
                with open('%s.html' % uid, 'wb') as htmlf:
                    html = u'<html><body>'
                    section_header, section_data = self.sections[num]
                    if section_header.type == DATATYPE_PHTML:
                        html += self.process_phtml(section_data.data, section_data.header.paragraph_offsets)
                    elif section_header.type == DATATYPE_PHTML_COMPRESSED:
                        d = self.decompress_phtml(section_data.data)
                        html += self.process_phtml(d, section_data.header.paragraph_offsets).decode(self.get_text_uid_encoding(section_header.uid), 'replace')
                    html += '</body></html>'
                    htmlf.write(html.encode('utf-8'))

        # Images.
        # Cache the image sizes in case they are used by a composite image.
        images = set()
        if not os.path.exists(os.path.join(output_dir, 'images/')):
            os.makedirs(os.path.join(output_dir, 'images/'))
        with CurrentDir(os.path.join(output_dir, 'images/')):
            # Single images.
            for uid, num in self.uid_image_section_number.items():
                section_header, section_data = self.sections[num]
                if section_data:
                    idata = None
                    if section_header.type == DATATYPE_TBMP:
                        idata = section_data
                    elif section_header.type == DATATYPE_TBMP_COMPRESSED:
                        if self.header_record.compression == 1:
                            idata = decompress_doc(section_data)
                        elif self.header_record.compression == 2:
                            idata = zlib.decompress(section_data)
                    try:
                        save_cover_data_to(idata, '%s.jpg' % uid, compression_quality=70)
                        images.add(uid)
                        self.log.debug('Wrote image with uid %s to images/%s.jpg' % (uid, uid))
                    except Exception as e:
                        self.log.error('Failed to write image with uid %s: %s' % (uid, e))
                else:
                    self.log.error('Failed to write image with uid %s: No data.' % uid)
            # Composite images.
            # We're going to use the already compressed .jpg images here.
            for uid, num in self.uid_composite_image_section_number.items():
                try:
                    section_header, section_data = self.sections[num]
                    # Get the final width and height.
                    width = 0
                    height = 0
                    for row in section_data.layout:
                        row_width = 0
                        col_height = 0
                        for col in row:
                            if col not in images:
                                raise Exception('Image with uid: %s missing.' % col)
                            w, h = identify(lopen('%s.jpg' % col, 'rb'))[1:]
                            row_width += w
                            if col_height < h:
                                col_height = h
                        if width < row_width:
                            width = row_width
                        height += col_height
                    # Create a new image the total size of all image
                    # parts. Put the parts into the new image.
                    with Canvas(width, height) as canvas:
                        y_off = 0
                        for row in section_data.layout:
                            x_off = 0
                            largest_height = 0
                            for col in row:
                                im = image_from_data(lopen('%s.jpg' % col, 'rb').read())
                                canvas.compose(im, x_off, y_off)
                                w, h = im.width(), im.height()
                                x_off += w
                                if largest_height < h:
                                    largest_height = h
                            y_off += largest_height
                    with lopen('%s.jpg' % uid) as out:
                        out.write(canvas.export(compression_quality=70))
                    self.log.debug('Wrote composite image with uid %s to images/%s.jpg' % (uid, uid))
                except Exception as e:
                    self.log.error('Failed to write composite image with uid %s: %s' % (uid, e))

        # Run the HTML through the html processing plugin.
        from calibre.customize.ui import plugin_for_input_format
        html_input = plugin_for_input_format('html')
        for opt in html_input.options:
            setattr(self.options, opt.option.name, opt.recommended_value)
        self.options.input_encoding = 'utf-8'
        odi = self.options.debug_pipeline
        self.options.debug_pipeline = None
        # Determine the home.html record uid. This should be set in the
        # reserved values in the metadata recored. home.html is the first
        # text record (should have hyper link references to other records)
        # in the document.
        try:
            home_html = self.header_record.home_html
            if not home_html:
                home_html = self.uid_text_secion_number.items()[0][0]
        except:
            raise Exception('Could not determine home.html')
        # Generate oeb from html conversion.
        oeb = html_input.convert(open('%s.html' % home_html, 'rb'), self.options, 'html', self.log, {})
        self.options.debug_pipeline = odi

        return oeb
Exemplo n.º 28
0
def _encode_into_jpeg(data):
    data = save_cover_data_to(data)
    return b64encode(data)
Exemplo n.º 29
0
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from lxml import etree
        from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
        from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
        from calibre.utils.zipfile import ZipFile
        from calibre.utils.filenames import ascii_filename

        # HTML
        if opts.htmlz_css_type == 'inline':
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer
            OEB2HTMLizer = OEB2HTMLInlineCSSizer
        elif opts.htmlz_css_type == 'tag':
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer
            OEB2HTMLizer = OEB2HTMLNoCSSizer
        else:
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer

        with TemporaryDirectory(u'_htmlz_output') as tdir:
            htmlizer = OEB2HTMLizer(log)
            html = htmlizer.oeb2html(oeb_book, opts)

            fname = u'index'
            if opts.htmlz_title_filename:
                from calibre.utils.filenames import shorten_components_to
                fname = shorten_components_to(100, (ascii_filename(unicode_type(oeb_book.metadata.title[0])),))[0]
            with open(os.path.join(tdir, fname+u'.html'), 'wb') as tf:
                if isinstance(html, unicode_type):
                    html = html.encode('utf-8')
                tf.write(html)

            # CSS
            if opts.htmlz_css_type == 'class' and opts.htmlz_class_style == 'external':
                with open(os.path.join(tdir, u'style.css'), 'wb') as tf:
                    tf.write(htmlizer.get_css(oeb_book))

            # Images
            images = htmlizer.images
            if images:
                if not os.path.exists(os.path.join(tdir, u'images')):
                    os.makedirs(os.path.join(tdir, u'images'))
                for item in oeb_book.manifest:
                    if item.media_type in OEB_IMAGES and item.href in images:
                        if item.media_type == SVG_MIME:
                            data = etree.tostring(item.data, encoding='unicode')
                        else:
                            data = item.data
                        fname = os.path.join(tdir, u'images', images[item.href])
                        with open(fname, 'wb') as img:
                            img.write(data)

            # Cover
            cover_path = None
            try:
                cover_data = None
                if oeb_book.metadata.cover:
                    term = oeb_book.metadata.cover[0].term
                    cover_data = oeb_book.guide[term].item.data
                if cover_data:
                    from calibre.utils.img import save_cover_data_to
                    cover_path = os.path.join(tdir, u'cover.jpg')
                    with lopen(cover_path, 'w') as cf:
                        cf.write('')
                    save_cover_data_to(cover_data, cover_path)
            except:
                import traceback
                traceback.print_exc()

            # Metadata
            with open(os.path.join(tdir, u'metadata.opf'), 'wb') as mdataf:
                opf = OPF(io.BytesIO(etree.tostring(oeb_book.metadata.to_opf1(), encoding='UTF-8')))
                mi = opf.to_book_metadata()
                if cover_path:
                    mi.cover = u'cover.jpg'
                mdataf.write(metadata_to_opf(mi))

            htmlz = ZipFile(output_path, 'w')
            htmlz.add_dir(tdir)
Exemplo n.º 30
0
def serialize_cover_data(new_cdata, cpath):
    from calibre.utils.img import save_cover_data_to
    return save_cover_data_to(new_cdata, data_fmt=os.path.splitext(cpath)[1][1:])
Exemplo n.º 31
0
def _write_new_cover(new_cdata, cpath):
    from calibre.utils.img import save_cover_data_to
    new_cover = PersistentTemporaryFile(suffix=os.path.splitext(cpath)[1])
    new_cover.close()
    save_cover_data_to(new_cdata, new_cover.name)
    return new_cover
Exemplo n.º 32
0
def _encode_into_jpeg(data):
    data = save_cover_data_to(data)
    return as_base64_unicode(data)
Exemplo n.º 33
0
    def extract_content(self, output_dir):
        # Each text record is independent (unless the continuation
        # value is set in the previous record). Put each converted
        # text recored into a separate file. We will reference the
        # home.html file as the first file and let the HTML input
        # plugin assemble the order based on hyperlinks.
        with CurrentDir(output_dir):
            for uid, num in self.uid_text_secion_number.items():
                self.log.debug('Writing record with uid: %s as %s.html' %
                               (uid, uid))
                with open('%s.html' % uid, 'wb') as htmlf:
                    html = u'<html><body>'
                    section_header, section_data = self.sections[num]
                    if section_header.type == DATATYPE_PHTML:
                        html += self.process_phtml(
                            section_data.data,
                            section_data.header.paragraph_offsets)
                    elif section_header.type == DATATYPE_PHTML_COMPRESSED:
                        d = self.decompress_phtml(section_data.data)
                        html += self.process_phtml(
                            d, section_data.header.paragraph_offsets).decode(
                                self.get_text_uid_encoding(section_header.uid),
                                'replace')
                    html += '</body></html>'
                    htmlf.write(html.encode('utf-8'))

        # Images.
        # Cache the image sizes in case they are used by a composite image.
        images = set()
        if not os.path.exists(os.path.join(output_dir, 'images/')):
            os.makedirs(os.path.join(output_dir, 'images/'))
        with CurrentDir(os.path.join(output_dir, 'images/')):
            # Single images.
            for uid, num in self.uid_image_section_number.items():
                section_header, section_data = self.sections[num]
                if section_data:
                    idata = None
                    if section_header.type == DATATYPE_TBMP:
                        idata = section_data
                    elif section_header.type == DATATYPE_TBMP_COMPRESSED:
                        if self.header_record.compression == 1:
                            idata = decompress_doc(section_data)
                        elif self.header_record.compression == 2:
                            idata = zlib.decompress(section_data)
                    try:
                        save_cover_data_to(idata,
                                           '%s.jpg' % uid,
                                           compression_quality=70)
                        images.add(uid)
                        self.log.debug(
                            'Wrote image with uid %s to images/%s.jpg' %
                            (uid, uid))
                    except Exception as e:
                        self.log.error(
                            'Failed to write image with uid %s: %s' % (uid, e))
                else:
                    self.log.error(
                        'Failed to write image with uid %s: No data.' % uid)
            # Composite images.
            # We're going to use the already compressed .jpg images here.
            for uid, num in self.uid_composite_image_section_number.items():
                try:
                    section_header, section_data = self.sections[num]
                    # Get the final width and height.
                    width = 0
                    height = 0
                    for row in section_data.layout:
                        row_width = 0
                        col_height = 0
                        for col in row:
                            if col not in images:
                                raise Exception('Image with uid: %s missing.' %
                                                col)
                            w, h = identify(lopen('%s.jpg' % col, 'rb'))[1:]
                            row_width += w
                            if col_height < h:
                                col_height = h
                        if width < row_width:
                            width = row_width
                        height += col_height
                    # Create a new image the total size of all image
                    # parts. Put the parts into the new image.
                    with Canvas(width, height) as canvas:
                        y_off = 0
                        for row in section_data.layout:
                            x_off = 0
                            largest_height = 0
                            for col in row:
                                im = image_from_data(
                                    lopen('%s.jpg' % col, 'rb').read())
                                canvas.compose(im, x_off, y_off)
                                w, h = im.width(), im.height()
                                x_off += w
                                if largest_height < h:
                                    largest_height = h
                            y_off += largest_height
                    with lopen('%s.jpg' % uid) as out:
                        out.write(canvas.export(compression_quality=70))
                    self.log.debug(
                        'Wrote composite image with uid %s to images/%s.jpg' %
                        (uid, uid))
                except Exception as e:
                    self.log.error(
                        'Failed to write composite image with uid %s: %s' %
                        (uid, e))

        # Run the HTML through the html processing plugin.
        from calibre.customize.ui import plugin_for_input_format
        html_input = plugin_for_input_format('html')
        for opt in html_input.options:
            setattr(self.options, opt.option.name, opt.recommended_value)
        self.options.input_encoding = 'utf-8'
        odi = self.options.debug_pipeline
        self.options.debug_pipeline = None
        # Determine the home.html record uid. This should be set in the
        # reserved values in the metadata recored. home.html is the first
        # text record (should have hyper link references to other records)
        # in the document.
        try:
            home_html = self.header_record.home_html
            if not home_html:
                home_html = self.uid_text_secion_number.items()[0][0]
        except:
            raise Exception('Could not determine home.html')
        # Generate oeb from html conversion.
        oeb = html_input.convert(open('%s.html' % home_html, 'rb'),
                                 self.options, 'html', self.log, {})
        self.options.debug_pipeline = odi

        return oeb