示例#1
0
文件: utils.py 项目: JimmXinu/calibre
def get_table(raw, name):
    ''' Get the raw table bytes for the specified table in the font '''
    name = as_bytes(name.lower())
    for table_tag, table, table_index, table_offset, table_checksum in get_tables(raw):
        if table_tag.lower() == name:
            return table, table_index, table_offset, table_checksum
    return None, None, None, None
示例#2
0
    def __call__(self, **kwargs):
        positions = {}
        for name, val in iteritems(kwargs):
            if name not in self:
                raise KeyError('Not a valid header field: %r'%name)
            self[name] = val

        buf = BytesIO()
        buf.write(as_bytes(self.HEADER_NAME))
        for name, val in iteritems(self):
            val = self.format_value(name, val)
            positions[name] = buf.tell()
            if val is None:
                raise ValueError('Dynamic field %r not set'%name)
            if isinstance(val, numbers.Integral):
                fmt = b'H' if name in self.SHORT_FIELDS else b'I'
                val = pack(b'>'+fmt, val)
            buf.write(val)

        for pos_field, field in iteritems(self.POSITIONS):
            buf.seek(positions[pos_field])
            buf.write(pack(b'>I', positions[field]))

        ans = buf.getvalue()
        if self.ALIGN_BLOCK:
            ans = align_block(ans)
        return ans
示例#3
0
文件: t2b.py 项目: j-howell/calibre
def write_t2b(t2bfile, coverdata=None):
    '''
    t2bfile is a file handle ready to write binary data to disk.
    coverdata is a string representation of a JPEG file.
    '''
    from PIL import Image
    if coverdata is not None:
        coverdata = io.BytesIO(coverdata)
        cover = Image.open(coverdata).convert("L")
        cover.thumbnail((96, 144), Image.ANTIALIAS)
        t2bcover = Image.new('L', (96, 144), 'white')

        x, y = cover.size
        t2bcover.paste(cover, ((96-x)//2, (144-y)//2))

        px = []
        pxs = t2bcover.getdata()
        for i in range(len(pxs)):
            px.append(pxs[i])
            if len(px) >= 4:
                binstr = i2b(reduce_color(px[0])) + i2b(reduce_color(px[1])) + i2b(reduce_color(px[2])) + i2b(reduce_color(px[3]))
                t2bfile.write(as_bytes(chr(int(binstr, 2))))
                px = []
    else:
        t2bfile.write(DEFAULT_T2B_DATA)
示例#4
0
文件: server.py 项目: cbhaley/calibre
 def export_template(self):
     path = choose_save_file(
         self, 'custom-list-template', _('Choose template file'),
         filters=[(_('Template files'), ['json'])], initial_filename='custom-list-template.json')
     if path:
         raw = self.serialize(self.current_template)
         with lopen(path, 'wb') as f:
             f.write(as_bytes(raw))
示例#5
0
文件: input.py 项目: JimmXinu/calibre
    def write(self, name='styles.css'):

        def join(style):
            ans = ['%s : %s;'%(k, v) for k, v in style.items()]
            if ans:
                ans[-1] = ans[-1][:-1]
            return '\n\t'.join(ans)

        with open(name, 'wb') as f:
            f.write(as_bytes(self.CSS))
            for (w, sel) in [(self.text_styles, 'ts'), (self.block_styles,
                'bs')]:
                for i, s in enumerate(w):
                    if not s:
                        continue
                    rsel = '.%s%d'%(sel, i)
                    s = join(s)
                    f.write(as_bytes(rsel + ' {\n\t' + s + '\n}\n\n'))
示例#6
0
文件: opds.py 项目: JimmXinu/calibre
def NAVCATALOG_ENTRY(url_for, updated, title, description, query):
    href = url_for('/opds/navcatalog', which=as_hex_unicode(query))
    id_ = 'calibre-navcatalog:' + hashlib.sha1(as_bytes(href)).hexdigest()
    return E.entry(
        TITLE(title),
        ID(id_),
        UPDATED(updated),
        E.content(description, type='text'),
        NAVLINK(href=href)
    )
示例#7
0
文件: view.py 项目: j-howell/calibre
 def load(data):
     p = QPixmap()
     p.loadFromData(as_bytes(data))
     try:
         dpr = self.devicePixelRatioF()
     except AttributeError:
         dpr = self.devicePixelRatio()
     p.setDevicePixelRatio(dpr)
     if data and p.isNull():
         p = self.failed_img
     return p
示例#8
0
文件: server.py 项目: cbhaley/calibre
 def commit(self):
     template = self.current_template
     if template == self.default_template:
         try:
             os.remove(custom_list_template.path)
         except EnvironmentError as err:
             if err.errno != errno.ENOENT:
                 raise
     else:
         raw = self.serialize(template)
         with lopen(custom_list_template.path, 'wb') as f:
             f.write(as_bytes(raw))
     return True
示例#9
0
文件: apnx.py 项目: cbhaley/calibre
    def generate_apnx(self, pages, apnx_meta):
        apnx = b''

        if DEBUG:
            prints('APNX META: guid:', apnx_meta['guid'])
            prints('APNX META: ASIN:', apnx_meta['asin'])
            prints('APNX META: CDE:', apnx_meta['cdetype'])
            prints('APNX META: format:', apnx_meta['format'])
            prints('APNX META: Name:', apnx_meta['acr'])

        # Updated header if we have a KF8 file...
        if apnx_meta['format'] == 'MOBI_8':
            content_header = '{"contentGuid":"%(guid)s","asin":"%(asin)s","cdeType":"%(cdetype)s","format":"%(format)s","fileRevisionId":"1","acr":"%(acr)s"}' % apnx_meta  # noqa
        else:
            # My 5.1.x Touch & 3.4 K3 seem to handle the 'extended' header fine for
            # legacy mobi files, too. But, since they still handle this one too, let's
            # try not to break old devices, and keep using the simple header ;).
            content_header = '{"contentGuid":"%(guid)s","asin":"%(asin)s","cdeType":"%(cdetype)s","fileRevisionId":"1"}' % apnx_meta
        page_header = '{"asin":"%(asin)s","pageMap":"(1,a,1)"}' % apnx_meta

        if DEBUG:
            prints('APNX Content Header:', content_header)

        apnx += struct.pack('>I', 65537)
        apnx += struct.pack('>I', 12 + len(content_header))
        apnx += struct.pack('>I', len(content_header))
        apnx += as_bytes(content_header)
        apnx += struct.pack('>H', 1)
        apnx += struct.pack('>H', len(page_header))
        apnx += struct.pack('>H', len(pages))
        apnx += struct.pack('>H', 32)
        apnx += as_bytes(page_header)

        # Write page values to APNX.
        for page in pages:
            apnx += struct.pack('>I', page)

        return apnx
示例#10
0
def encode_thumbnail(thumbnail):
    '''
    Encode the image part of a thumbnail, then return the 3 part tuple
    '''
    from calibre.utils.imghdr import identify
    if thumbnail is None:
        return None
    if not isinstance(thumbnail, (tuple, list)):
        try:
            width, height = identify(as_bytes(thumbnail))[1:]
            if width < 0 or height < 0:
                return None
            thumbnail = (width, height, thumbnail)
        except Exception:
            return None
    return (thumbnail[0], thumbnail[1], as_base64_unicode(thumbnail[2]))
示例#11
0
 def kindle_update_booklist(self, bl, collections):
     with lopen(collections, 'rb') as f:
         collections = f.read()
     collections = json.loads(collections)
     path_map = {}
     for name, val in collections.items():
         col = name.split('@')[0]
         items = val.get('items', [])
         for x in items:
             x = x[-40:]
             if x not in path_map:
                 path_map[x] = set([])
             path_map[x].add(col)
     if path_map:
         for book in bl:
             path = '/mnt/us/'+book.lpath
             h = hashlib.sha1(as_bytes(path)).hexdigest()
             if h in path_map:
                 book.device_collections = list(sorted(path_map[h]))
示例#12
0
 def kindle_update_booklist(self, bl, collections):
     with lopen(collections, 'rb') as f:
         collections = f.read()
     collections = json.loads(collections)
     path_map = {}
     for name, val in collections.items():
         col = name.split('@')[0]
         items = val.get('items', [])
         for x in items:
             x = x[-40:]
             if x not in path_map:
                 path_map[x] = set()
             path_map[x].add(col)
     if path_map:
         for book in bl:
             path = '/mnt/us/' + book.lpath
             h = hashlib.sha1(as_bytes(path)).hexdigest()
             if h in path_map:
                 book.device_collections = list(sorted(path_map[h]))
示例#13
0
文件: apnx.py 项目: zwlistu/calibre
    def get_pages_pagebreak_tag(self, mobi_file_path):
        '''
        Determine pages based on the presense of
        <mbp:pagebreak>.
        '''
        pages = []

        # Get the MOBI html.
        mr = MobiReader(mobi_file_path, default_log)
        if mr.book_header.encryption_type != 0:
            # DRMed book
            return self.get_pages_fast(mobi_file_path)
        mr.extract_text()

        html = as_bytes(mr.mobi_html.lower())
        for m in re.finditer(b'<[^>]*pagebreak[^>]*>', html):
            pages.append(m.end())

        return pages
示例#14
0
 def save_annotations(self, in_book_file=True):
     if not self.current_book_data:
         return
     amap = self.current_book_data['annotations_map']
     annots = as_bytes(serialize_annotations(amap))
     with open(
             os.path.join(annotations_dir,
                          self.current_book_data['annotations_path_key']),
             'wb') as f:
         f.write(annots)
     if in_book_file and self.current_book_data.get(
             'pathtoebook',
             '').lower().endswith('.epub') and get_session_pref(
                 'save_annotations_in_ebook', default=True):
         path = self.current_book_data['pathtoebook']
         if os.access(path, os.W_OK):
             before_stat = os.stat(path)
             save_annots_to_epub(path, annots)
             update_book(path, before_stat,
                         {'calibre-book-annotations.json': annots})
示例#15
0
    def process_encryption(self, encfile, opf, log):
        from lxml import etree
        import uuid, hashlib
        idpf_key = opf.raw_unique_identifier
        if idpf_key:
            idpf_key = re.sub(u'[\u0020\u0009\u000d\u000a]', u'', idpf_key)
            idpf_key = hashlib.sha1(idpf_key.encode('utf-8')).digest()
        key = None
        for item in opf.identifier_iter():
            scheme = None
            for xkey in item.attrib.keys():
                if xkey.endswith('scheme'):
                    scheme = item.get(xkey)
            if (scheme and scheme.lower() == 'uuid') or \
                    (item.text and item.text.startswith('urn:uuid:')):
                try:
                    key = item.text.rpartition(':')[-1]
                    key = uuid.UUID(as_bytes(key)).bytes
                except:
                    import traceback
                    traceback.print_exc()
                    key = None

        try:
            root = etree.parse(encfile)
            for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
                algorithm = em.get('Algorithm', '')
                if algorithm not in {ADOBE_OBFUSCATION, IDPF_OBFUSCATION}:
                    return False
                cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0]
                uri = cr.get('URI')
                path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
                tkey = (key if algorithm == ADOBE_OBFUSCATION else idpf_key)
                if (tkey and os.path.exists(path)):
                    self._encrypted_font_uris.append(uri)
                    decrypt_font(tkey, path, algorithm)
            return True
        except:
            import traceback
            traceback.print_exc()
        return False
示例#16
0
    def process_encryption(self, encfile, opf, log):
        from lxml import etree
        import uuid, hashlib
        idpf_key = opf.raw_unique_identifier
        if idpf_key:
            idpf_key = re.sub(u'[\u0020\u0009\u000d\u000a]', u'', idpf_key)
            idpf_key = hashlib.sha1(idpf_key.encode('utf-8')).digest()
        key = None
        for item in opf.identifier_iter():
            scheme = None
            for xkey in item.attrib.keys():
                if xkey.endswith('scheme'):
                    scheme = item.get(xkey)
            if (scheme and scheme.lower() == 'uuid') or \
                    (item.text and item.text.startswith('urn:uuid:')):
                try:
                    key = item.text.rpartition(':')[-1]
                    key = uuid.UUID(as_bytes(key)).bytes
                except:
                    import traceback
                    traceback.print_exc()
                    key = None

        try:
            root = etree.parse(encfile)
            for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
                algorithm = em.get('Algorithm', '')
                if algorithm not in {ADOBE_OBFUSCATION, IDPF_OBFUSCATION}:
                    return False
                cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0]
                uri = cr.get('URI')
                path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
                tkey = (key if algorithm == ADOBE_OBFUSCATION else idpf_key)
                if (tkey and os.path.exists(path)):
                    self._encrypted_font_uris.append(uri)
                    decrypt_font(tkey, path, algorithm)
            return True
        except:
            import traceback
            traceback.print_exc()
        return False
示例#17
0
    def _launch_viewer(self, name=None, viewer='ebook-viewer', internal=True, calibre_book_data=None, open_at=None):
        self.gui.setCursor(Qt.BusyCursor)
        try:
            if internal:
                args = [viewer]
                if ismacos and 'ebook' in viewer:
                    args.append('--raise-window')

                if name is not None:
                    args.append(name)
                    if open_at is not None:
                        args.append('--open-at=' + open_at)
                    if calibre_book_data is not None:
                        with PersistentTemporaryFile('.json') as ptf:
                            ptf.write(as_bytes(json.dumps(calibre_book_data)))
                            args.append('--internal-book-data=' + ptf.name)
                self.gui.job_manager.launch_gui_app(viewer,
                        kwargs=dict(args=args))
            else:
                if iswindows:
                    winutil = plugins['winutil'][0]
                    ext = name.rpartition('.')[-1]
                    if ext:
                        try:
                            prog = winutil.file_association(unicode_type('.' + ext))
                        except Exception:
                            prog = None
                        if prog and prog.lower().endswith('calibre.exe'):
                            name = os.path.basename(name)
                            return error_dialog(
                                self.gui, _('No associated program'), _(
                                    'Windows will try to open %s with calibre itself'
                                    ' resulting in a duplicate in your calibre library. You'
                                    ' should install some program capable of viewing this'
                                    ' file format and tell Windows to use that program to open'
                                    ' files of this type.') % name, show=True)

                open_local_file(name)
                time.sleep(2)  # User feedback
        finally:
            self.gui.unsetCursor()
示例#18
0
 def __call__(self, src, options):
     self.compiler_result = null = object()
     self.errors = []
     self.working = True
     options['basedir'] = '__stdlib__'
     options['write_name'] = True
     options['keep_docstrings'] = False
     src = 'var js = window.compiler.compile({}, {}); [js, window.write_cache]'.format(
         *map(json.dumps, (src, options)))
     self.runJavaScript(src, QWebEngineScript.ApplicationWorld,
                        self.compilation_done)
     while self.working:
         self.spin_loop()
     if self.compiler_result is null or self.compiler_result is None:
         raise CompileFailure(
             'Failed to compile rapydscript code with error: ' +
             '\n'.join(self.errors))
     write_cache = self.compiler_result[1]
     with open(cache_path, 'wb') as f:
         f.write(as_bytes(json.dumps(write_cache)))
     return self.compiler_result[0]
示例#19
0
    def put_file(self, parent, name, stream, size, callback=None, replace=True):
        e = parent.folder_named(name)
        if e is not None:
            raise ValueError('Cannot upload file, %s already has a folder named: %s'%(
                parent.full_path, e.name))
        e = parent.file_named(name)
        if e is not None:
            if not replace:
                raise ValueError('Cannot upload file %s, it already exists'%(
                    e.full_path,))
            self.delete_file_or_folder(e)
        sid, pid = parent.storage_id, parent.object_id
        if pid == sid:
            pid = 0xFFFFFFFF
        ename = name if ispy3 else as_bytes(name)

        ans, errs = self.dev.put_file(sid, pid, ename, stream, size, callback)
        if ans is None:
            raise DeviceError('Failed to upload file named: %s to %s: %s'
                    %(name, parent.full_path, self.format_errorstack(errs)))
        return parent.add_child(ans)
示例#20
0
文件: input.py 项目: zyhong/calibre
    def __call__(self, stream, odir, log):
        from calibre.utils.zipfile import ZipFile
        from calibre.ebooks.metadata.odt import get_metadata
        from calibre.ebooks.metadata.opf2 import OPFCreator

        if not os.path.exists(odir):
            os.makedirs(odir)
        with CurrentDir(odir):
            log('Extracting ODT file...')
            stream.seek(0)
            mi = get_metadata(stream, 'odt')
            if not mi.title:
                mi.title = _('Unknown')
            if not mi.authors:
                mi.authors = [_('Unknown')]
            self.filter_load(stream, mi, log)
            html = self.xhtml()
            # A blanket img specification like this causes problems
            # with EPUB output as the containing element often has
            # an absolute height and width set that is larger than
            # the available screen real estate
            html = html.replace('img { width: 100%; height: 100%; }', '')
            # odf2xhtml creates empty title tag
            html = html.replace('<title></title>',
                                '<title>%s</title>' % (mi.title, ))
            try:
                html = self.fix_markup(html, log)
            except:
                log.exception('Failed to filter CSS, conversion may be slow')
            with open('index.xhtml', 'wb') as f:
                f.write(as_bytes(html))
            zf = ZipFile(stream, 'r')
            self.extract_pictures(zf)
            opf = OPFCreator(os.path.abspath(getcwd()), mi)
            opf.create_manifest([(os.path.abspath(f2), None)
                                 for f2 in walk(getcwd())])
            opf.create_spine([os.path.abspath('index.xhtml')])
            with open('metadata.opf', 'wb') as f:
                opf.render(f)
            return os.path.abspath('metadata.opf')
示例#21
0
def mobile(ctx, rd):
    db, library_id, library_map, default_library = get_library_data(ctx, rd)
    try:
        start = max(1, int(rd.query.get('start', 1)))
    except ValueError:
        raise HTTPBadRequest('start is not an integer')
    try:
        num = max(0, int(rd.query.get('num', 25)))
    except ValueError:
        raise HTTPBadRequest('num is not an integer')
    search = rd.query.get('search') or ''
    with db.safe_read_lock:
        book_ids = ctx.search(rd, db, search)
        total = len(book_ids)
        ascending = rd.query.get('order', '').lower().strip() == 'ascending'
        sort_by = sanitize_sort_field_name(db.field_metadata,
                                           rd.query.get('sort') or 'date')
        try:
            book_ids = db.multisort([(sort_by, ascending)], book_ids)
        except Exception:
            sort_by = 'date'
            book_ids = db.multisort([(sort_by, ascending)], book_ids)
        books = [
            db.get_metadata(book_id)
            for book_id in book_ids[(start - 1):(start - 1) + num]
        ]
    rd.outheaders['Last-Modified'] = http_date(
        timestampfromdt(db.last_modified()))
    order = 'ascending' if ascending else 'descending'
    q = {
        b'search': search.encode('utf-8'),
        b'order': order.encode('ascii'),
        b'sort': sort_by.encode('utf-8'),
        b'num': as_bytes(num),
        'library_id': library_id
    }
    url_base = ctx.url_for('/mobile') + '?' + urlencode(q)
    lm = {k: v for k, v in iteritems(library_map) if k != library_id}
    return build_index(rd, books, num, search, sort_by, order, start, total,
                       url_base, db.field_metadata, ctx, lm, library_id)
示例#22
0
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.pdf.pdftohtml import pdftohtml

        log.debug('Converting file to html...')
        # The main html file will be named index.html
        self.opts, self.log = options, log
        if options.new_pdf_engine:
            return self.convert_new(stream, accelerators)
        pdftohtml(getcwd(), stream.name, options.no_images)

        from calibre.ebooks.metadata.meta import get_metadata
        log.debug('Retrieving document metadata...')
        mi = get_metadata(stream, 'pdf')
        opf = OPFCreator(getcwd(), mi)

        manifest = [('index.html', None)]

        images = os.listdir(getcwd())
        images.remove('index.html')
        for i in images:
            manifest.append((i, None))
        log.debug('Generating manifest...')
        opf.create_manifest(manifest)

        opf.create_spine(['index.html'])
        log.debug('Rendering manifest...')
        with lopen('metadata.opf', 'wb') as opffile:
            opf.render(opffile)
        if os.path.exists('toc.ncx'):
            ncxid = opf.manifest.id_for_path('toc.ncx')
            if ncxid:
                with lopen('metadata.opf', 'r+b') as f:
                    raw = f.read().replace(b'<spine', b'<spine toc="%s"' % as_bytes(ncxid))
                    f.seek(0)
                    f.write(raw)

        return os.path.join(getcwd(), 'metadata.opf')
示例#23
0
def compile_pyj(data, filename='<stdin>', beautify=True, private_scope=True, libdir=None, omit_baselib=False, js_version=5):
    if isinstance(data, bytes):
        data = data.decode('utf-8')
    options = {
        'beautify':beautify,
        'private_scope':private_scope,
        'keep_baselib': not omit_baselib,
        'filename': filename,
        'js_version': js_version,
    }
    if not ok_to_import_webengine():
        from calibre.debug import run_calibre_debug
        p = run_calibre_debug('-c', 'from calibre.utils.rapydscript import *; forked_compile()',
                json.dumps(options), stdin=subprocess.PIPE, stdout=subprocess.PIPE)
        stdout = p.communicate(as_bytes(data))[0]
        if p.wait() != 0:
            raise SystemExit(p.returncode)
        result = as_unicode(stdout)
    else:
        c = compiler()
        result = c(data, options)
    return result
示例#24
0
    def _images(self, manifest, image_hrefs):
        '''
        Image format.

        0-4   : 'PNG '. There must be a space after PNG.
        4-36  : Image name. Must be exactly 32 bytes long. Pad with \x00 for names shorter than 32 bytes
        36-58 : Unknown.
        58-60 : Width.
        60-62 : Height.
        62-...: Raw image data in 8 bit PNG format.
        '''
        images = []
        from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES

        for item in manifest:
            if item.media_type in OEB_RASTER_IMAGES and item.href in image_hrefs.keys(
            ):
                try:
                    im = Image.open(io.BytesIO(item.data)).convert('P')
                    im.thumbnail((300, 300), Image.ANTIALIAS)

                    data = io.BytesIO()
                    im.save(data, 'PNG')
                    data = data.getvalue()
                    href = as_bytes(image_hrefs[item.href])

                    header = b'PNG '
                    header += href.ljust(32, b'\x00')[:32]
                    header = header.ljust(58, b'\x00')
                    header += struct.pack('>HH', im.size[0], im.size[1])
                    header = header.ljust(62, b'\x00')

                    if len(data) + len(header) < 65505:
                        images.append((header, data))
                except Exception as e:
                    self.log.error('Error: Could not include file %s becuase '
                                   '%s.' % (item.href, e))

        return images
示例#25
0
    def convert(self, stream, options, file_ext, log, accelerators):
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.pdf.pdftohtml import pdftohtml

        log.debug('Converting file to html...')
        # The main html file will be named index.html
        self.opts, self.log = options, log
        if options.new_pdf_engine:
            return self.convert_new(stream, accelerators)
        pdftohtml(os.getcwd(), stream.name, options.no_images)

        from calibre.ebooks.metadata.meta import get_metadata
        log.debug('Retrieving document metadata...')
        mi = get_metadata(stream, 'pdf')
        opf = OPFCreator(os.getcwd(), mi)

        manifest = [('index.html', None)]

        images = os.listdir(os.getcwd())
        images.remove('index.html')
        for i in images:
            manifest.append((i, None))
        log.debug('Generating manifest...')
        opf.create_manifest(manifest)

        opf.create_spine(['index.html'])
        log.debug('Rendering manifest...')
        with lopen('metadata.opf', 'wb') as opffile:
            opf.render(opffile)
        if os.path.exists('toc.ncx'):
            ncxid = opf.manifest.id_for_path('toc.ncx')
            if ncxid:
                with lopen('metadata.opf', 'r+b') as f:
                    raw = f.read().replace(
                        b'<spine', b'<spine toc="%s"' % as_bytes(ncxid))
                    f.seek(0)
                    f.write(raw)

        return os.path.join(os.getcwd(), 'metadata.opf')
示例#26
0
 def __call__(self, oeb, opts):
     from calibre.utils.imghdr import what
     self.log = oeb.log
     attr_path = XPath('//h:img[@src]')
     for item in oeb.spine:
         root = item.data
         if not hasattr(root, 'xpath'):
             continue
         for img in attr_path(root):
             raw = img.get('src', '')
             if not raw.startswith('data:'):
                 continue
             header, data = raw.partition(',')[0::2]
             if not header.startswith('data:image/') or not data:
                 continue
             if ';base64' in header:
                 data = re.sub(r'\s+', '', data)
                 from polyglot.binary import from_base64_bytes
                 try:
                     data = from_base64_bytes(data)
                 except Exception:
                     self.log.error(
                         'Found invalid base64 encoded data URI, ignoring it'
                     )
                     continue
             else:
                 data = urlunquote(data)
             data = as_bytes(data)
             fmt = what(None, data)
             if not fmt:
                 self.log.warn(
                     'Image encoded as data URL has unknown format, ignoring'
                 )
                 continue
             img.set(
                 'src',
                 item.relhref(self.convert_image_data_uri(data, fmt, oeb)))
示例#27
0
    def convert(self, oeb, output_path, input_plugin, opts, log):
        self.log, self.opts, self.oeb = log, opts, oeb

        if self.opts.epub_inline_toc:
            from calibre.ebooks.mobi.writer8.toc import TOCAdder
            opts.mobi_toc_at_start = not opts.epub_toc_at_end
            opts.mobi_passthrough = False
            opts.no_inline_toc = False
            TOCAdder(oeb,
                     opts,
                     replace_previous_inline_toc=True,
                     ignore_existing_toc=True)

        if self.opts.epub_flatten:
            from calibre.ebooks.oeb.transforms.filenames import FlatFilenames
            FlatFilenames()(oeb, opts)
        else:
            from calibre.ebooks.oeb.transforms.filenames import UniqueFilenames
            UniqueFilenames()(oeb, opts)

        self.workaround_ade_quirks()
        self.workaround_webkit_quirks()
        self.upshift_markup()
        from calibre.ebooks.oeb.transforms.rescale import RescaleImages
        RescaleImages(check_colorspaces=True)(oeb, opts)

        from calibre.ebooks.oeb.transforms.split import Split
        split = Split(not self.opts.dont_split_on_page_breaks,
                      max_flow_size=self.opts.flow_size * 1024)
        split(self.oeb, self.opts)

        from calibre.ebooks.oeb.transforms.cover import CoverManager
        cm = CoverManager(
            no_default_cover=self.opts.no_default_epub_cover,
            no_svg_cover=self.opts.no_svg_cover,
            preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio)
        cm(self.oeb, self.opts, self.log)

        self.workaround_sony_quirks()

        if self.oeb.toc.count() == 0:
            self.log.warn('This EPUB file has no Table of Contents. '
                          'Creating a default TOC')
            first = next(iter(self.oeb.spine))
            self.oeb.toc.add(_('Start'), first.href)

        from calibre.ebooks.oeb.base import OPF
        identifiers = oeb.metadata['identifier']
        uuid = None
        for x in identifiers:
            if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode_type(
                    x).startswith('urn:uuid:'):
                uuid = unicode_type(x).split(':')[-1]
                break
        encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', [])

        if uuid is None:
            self.log.warn('No UUID identifier found')
            from uuid import uuid4
            uuid = unicode_type(uuid4())
            oeb.metadata.add('identifier', uuid, scheme='uuid', id=uuid)

        if encrypted_fonts and not uuid.startswith('urn:uuid:'):
            # Apparently ADE requires this value to start with urn:uuid:
            # for some absurd reason, or it will throw a hissy fit and refuse
            # to use the obfuscated fonts.
            for x in identifiers:
                if unicode_type(x) == uuid:
                    x.content = 'urn:uuid:' + uuid

        with TemporaryDirectory('_epub_output') as tdir:
            from calibre.customize.ui import plugin_for_output_format
            metadata_xml = None
            extra_entries = []
            if self.is_periodical:
                if self.opts.output_profile.epub_periodical_format == 'sony':
                    from calibre.ebooks.epub.periodical import sony_metadata
                    metadata_xml, atom_xml = sony_metadata(oeb)
                    extra_entries = [('atom.xml', 'application/atom+xml',
                                      atom_xml)]
            oeb_output = plugin_for_output_format('oeb')
            oeb_output.convert(oeb, tdir, input_plugin, opts, log)
            opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
            self.condense_ncx([
                os.path.join(tdir, x) for x in os.listdir(tdir)
                if x.endswith('.ncx')
            ][0])
            if self.opts.epub_version == '3':
                self.upgrade_to_epub3(tdir, opf)
            encryption = None
            if encrypted_fonts:
                encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)

            from calibre.ebooks.epub import initialize_container
            with initialize_container(output_path,
                                      os.path.basename(opf),
                                      extra_entries=extra_entries) as epub:
                epub.add_dir(tdir)
                if encryption is not None:
                    epub.writestr('META-INF/encryption.xml',
                                  as_bytes(encryption))
                if metadata_xml is not None:
                    epub.writestr('META-INF/metadata.xml',
                                  metadata_xml.encode('utf-8'))
            if opts.extract_to is not None:
                from calibre.utils.zipfile import ZipFile
                if os.path.exists(opts.extract_to):
                    if os.path.isdir(opts.extract_to):
                        shutil.rmtree(opts.extract_to)
                    else:
                        os.remove(opts.extract_to)
                os.mkdir(opts.extract_to)
                with ZipFile(output_path) as zf:
                    zf.extractall(path=opts.extract_to)
                self.log.info('EPUB extracted to', opts.extract_to)
示例#28
0
def convert_single_ebook(parent, db, book_ids, auto_conversion=False,  # {{{
        out_format=None, show_no_format_warning=True):
    changed = False
    jobs = []
    bad = []

    total = len(book_ids)
    if total == 0:
        return None, None, None

    for i, book_id in enumerate(book_ids):
        temp_files = []

        try:
            d = SingleConfig(parent, db, book_id, None, out_format)

            if auto_conversion:
                d.accept()
                result = QDialog.DialogCode.Accepted
            else:
                result = d.exec_()

            if result == QDialog.DialogCode.Accepted:
                # if not convert_existing(parent, db, [book_id], d.output_format):
                #    continue

                mi = db.get_metadata(book_id, True)
                in_file = PersistentTemporaryFile('.'+d.input_format)
                with in_file:
                    input_fmt = db.original_fmt(book_id, d.input_format).lower()
                    same_fmt = input_fmt == d.output_format.lower()
                    db.copy_format_to(book_id, input_fmt, in_file,
                            index_is_id=True)

                out_file = PersistentTemporaryFile('.' + d.output_format)
                out_file.write(as_bytes(d.output_format))
                out_file.close()
                temp_files = [in_file]

                try:
                    dtitle = unicode_type(mi.title)
                except:
                    dtitle = repr(mi.title)
                desc = _('Convert book %(num)d of %(total)d (%(title)s)') % \
                        {'num':i + 1, 'total':total, 'title':dtitle}

                recs = d.recommendations
                if d.opf_file is not None:
                    recs.append(('read_metadata_from_opf', d.opf_file.name,
                        OptionRecommendation.HIGH))
                    temp_files.append(d.opf_file)
                if d.cover_file is not None:
                    recs.append(('cover', d.cover_file.name,
                        OptionRecommendation.HIGH))
                    temp_files.append(d.cover_file)
                args = [in_file.name, out_file.name, recs]
                temp_files.append(out_file)
                func = 'gui_convert_override'
                parts = []
                if not auto_conversion and d.manually_fine_tune_toc:
                    parts.append('manually_fine_tune_toc')
                if same_fmt:
                    parts.append('same_fmt')
                if parts:
                    func += ':%s'%(';'.join(parts))
                jobs.append((func, args, desc, d.output_format.upper(), book_id, temp_files))

                changed = True
                d.break_cycles()
        except NoSupportedInputFormats as nsif:
            bad.append((book_id, nsif.available_formats))

    if bad and show_no_format_warning:
        if len(bad) == 1 and not bad[0][1]:
            title = db.title(bad[0][0], True)
            warning_dialog(parent, _('Could not convert'), '<p>'+ _(
                'Could not convert <b>%s</b> as it has no e-book files. If you '
                'think it should have files, but calibre is not finding '
                'them, that is most likely because you moved the book\'s '
                'files around outside of calibre. You will need to find those files '
                'and re-add them to calibre.')%title, show=True)
        else:
            res = []
            for id, available_formats in bad:
                title = db.title(id, True)
                if available_formats:
                    msg = _('No supported formats (Available formats: %s)')%(
                        ', '.join(available_formats))
                else:
                    msg = _('This book has no actual e-book files')
                res.append('%s - %s'%(title, msg))

            msg = '%s' % '\n'.join(res)
            warning_dialog(parent, _('Could not convert some books'),
                (
                    _('Could not convert the book because no supported source format was found')
                    if len(res) == 1 else
                    _('Could not convert {num} of {tot} books, because no supported source formats were found.')
                ).format(num=len(res), tot=total),
                msg).exec_()

    return jobs, changed, bad
示例#29
0
def pickle_binary_string(data):
    # Maintains compatibility with python's pickle module protocol version 2
    import struct
    PROTO, STOP, BINSTRING = b'\x80', b'.', b'T'
    data = as_bytes(data)
    return PROTO + b'\x02' + BINSTRING + struct.pack(b'<i', len(data)) + data + STOP
示例#30
0
    def write_apnx(self, mobi_file_path, apnx_path, method=None, page_count=0):
        '''
        If you want a fixed number of pages (such as from a custom column) then
        pass in a value to page_count, otherwise a count will be estimated
        using either the fast or accurate algorithm.
        '''
        import uuid
        apnx_meta = {
            'guid': str(uuid.uuid4()).replace('-', '')[:8],
            'asin': '',
            'cdetype': 'EBOK',
            'format': 'MOBI_7',
            'acr': ''
        }

        with lopen(mobi_file_path, 'rb') as mf:
            ident = PdbHeaderReader(mf).identity()
            if as_bytes(ident) != b'BOOKMOBI':
                # Check that this is really a MOBI file.
                raise Exception(
                    _('Not a valid MOBI file. Reports identity of %s') % ident)
            apnx_meta['acr'] = as_unicode(PdbHeaderReader(mf).name(),
                                          errors='replace')

        # We'll need the PDB name, the MOBI version, and some metadata to make FW 3.4 happy with KF8 files...
        with lopen(mobi_file_path, 'rb') as mf:
            mh = MetadataHeader(mf, default_log)
            if mh.mobi_version == 8:
                apnx_meta['format'] = 'MOBI_8'
            else:
                apnx_meta['format'] = 'MOBI_7'
            if mh.exth is None or not mh.exth.cdetype:
                apnx_meta['cdetype'] = 'EBOK'
            else:
                apnx_meta['cdetype'] = str(mh.exth.cdetype)
            if mh.exth is None or not mh.exth.uuid:
                apnx_meta['asin'] = ''
            else:
                apnx_meta['asin'] = str(mh.exth.uuid)

        # Get the pages depending on the chosen parser
        pages = []
        if page_count:
            pages = self.get_pages_exact(mobi_file_path, page_count)
        else:
            try:
                if method == 'accurate':
                    pages = self.get_pages_accurate(mobi_file_path)
                elif method == 'pagebreak':
                    pages = self.get_pages_pagebreak_tag(mobi_file_path)
                    if not pages:
                        pages = self.get_pages_accurate(mobi_file_path)
                else:
                    raise Exception(
                        '%r is not a valid apnx generation method' % method)
            except:
                # Fall back to the fast parser if we can't
                # use the accurate one. Typically this is
                # due to the file having DRM.
                pages = self.get_pages_fast(mobi_file_path)

        if not pages:
            pages = self.get_pages_fast(mobi_file_path)
        if not pages:
            raise Exception(_('Could not generate page mapping.'))

        # Generate the APNX file from the page mapping.
        apnx = self.generate_apnx(pages, apnx_meta)

        # Write the APNX.
        with lopen(apnx_path, 'wb') as apnxf:
            apnxf.write(apnx)
            fsync(apnxf)
示例#31
0
 def bin4(num):
     ans = bin(num)[2:]
     return as_bytes('0' * (4 - len(ans)) + ans)
示例#32
0
def create_book(mi,
                path,
                fmt='epub',
                opf_name='metadata.opf',
                html_name='start.xhtml',
                toc_name='toc.ncx'):
    ''' Create an empty book in the specified format at the specified location. '''
    if fmt not in valid_empty_formats:
        raise ValueError('Cannot create empty book in the %s format' % fmt)
    if fmt == 'txt':
        with open(path, 'wb') as f:
            if not mi.is_null('title'):
                f.write(as_bytes(mi.title))
        return
    if fmt == 'docx':
        from calibre.ebooks.conversion.plumber import Plumber
        from calibre.ebooks.docx.writer.container import DOCX
        from calibre.utils.logging import default_log
        p = Plumber('a.docx', 'b.docx', default_log)
        p.setup_options()
        # Use the word default of one inch page margins
        for x in 'left right top bottom'.split():
            setattr(p.opts, 'margin_' + x, 72)
        DOCX(p.opts, default_log).write(path, mi, create_empty_document=True)
        return
    path = os.path.abspath(path)
    lang = 'und'
    opf = metadata_to_opf(mi, as_string=False)
    for l in opf.xpath('//*[local-name()="language"]'):
        if l.text:
            lang = l.text
            break
    lang = lang_as_iso639_1(lang) or lang

    opfns = OPF_NAMESPACES['opf']
    m = opf.makeelement('{%s}manifest' % opfns)
    opf.insert(1, m)
    i = m.makeelement('{%s}item' % opfns, href=html_name, id='start')
    i.set('media-type', guess_type('a.xhtml'))
    m.append(i)
    i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx')
    i.set('media-type', guess_type(toc_name))
    m.append(i)
    s = opf.makeelement('{%s}spine' % opfns, toc="ncx")
    opf.insert(2, s)
    i = s.makeelement('{%s}itemref' % opfns, idref='start')
    s.append(i)
    CONTAINER = '''\
<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
   <rootfiles>
      <rootfile full-path="{0}" media-type="application/oebps-package+xml"/>
   </rootfiles>
</container>
    '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8')
    HTML = P('templates/new_book.html', data=True).decode('utf-8').replace(
        '_LANGUAGE_', prepare_string_for_xml(lang, True)).replace(
            '_TITLE_', prepare_string_for_xml(mi.title)).replace(
                '_AUTHORS_',
                prepare_string_for_xml(authors_to_string(
                    mi.authors))).encode('utf-8')
    h = parse(HTML)
    pretty_html_tree(None, h)
    HTML = serialize(h, 'text/html')
    ncx = etree.tostring(create_toc(mi, opf, html_name, lang),
                         encoding='utf-8',
                         xml_declaration=True,
                         pretty_print=True)
    pretty_xml_tree(opf)
    opf = etree.tostring(opf,
                         encoding='utf-8',
                         xml_declaration=True,
                         pretty_print=True)
    if fmt == 'azw3':
        with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir):
            for name, data in ((opf_name, opf), (html_name, HTML), (toc_name,
                                                                    ncx)):
                with open(name, 'wb') as f:
                    f.write(data)
            c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name,
                          DevNull())
            opf_to_azw3(opf_name, path, c)
    else:
        with ZipFile(path, 'w', compression=ZIP_STORED) as zf:
            zf.writestr('mimetype',
                        b'application/epub+zip',
                        compression=ZIP_STORED)
            zf.writestr('META-INF/', b'', 0o755)
            zf.writestr('META-INF/container.xml', CONTAINER)
            zf.writestr(opf_name, opf)
            zf.writestr(html_name, HTML)
            zf.writestr(toc_name, ncx)
示例#33
0
def path_key(path):
    return sha256(as_bytes(path)).hexdigest()
示例#34
0
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from lxml import etree
        from calibre.ebooks.metadata.meta import get_metadata
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
        from calibre.ebooks.rtf.input import InlineClass
        self.opts = options
        self.log = log
        self.log('Converting RTF to XML...')
        try:
            xml = self.generate_xml(stream.name)
        except RtfInvalidCodeException as e:
            self.log.exception('Unable to parse RTF')
            raise ValueError(_('This RTF file has a feature calibre does not '
            'support. Convert it to HTML first and then try it.\n%s')%e)

        d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
        if d:
            imap = {}
            try:
                imap = self.extract_images(d[0])
            except:
                self.log.exception('Failed to extract images...')

        self.log('Parsing XML...')
        parser = etree.XMLParser(recover=True, no_network=True)
        doc = etree.fromstring(xml, parser=parser)
        border_styles = self.convert_borders(doc)
        for pict in doc.xpath('//rtf:pict[@num]',
                namespaces={'rtf':'http://rtf2xml.sourceforge.net/'}):
            num = int(pict.get('num'))
            name = imap.get(num, None)
            if name is not None:
                pict.set('num', name)

        self.log('Converting XML to HTML...')
        inline_class = InlineClass(self.log)
        styledoc = etree.fromstring(P('templates/rtf.xsl', data=True))
        extensions = {('calibre', 'inline-class') : inline_class}
        transform = etree.XSLT(styledoc, extensions=extensions)
        result = transform(doc)
        html = u'index.xhtml'
        with open(html, 'wb') as f:
            res = as_bytes(transform.tostring(result))
            # res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
            # clean multiple \n
            res = re.sub(b'\n+', b'\n', res)
            # Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
            # res = re.sub('\s*<body>', '<body>', res)
            # res = re.sub('(?<=\n)\n{2}',
            # u'<p>\u00a0</p>\n'.encode('utf-8'), res)
            f.write(res)
        self.write_inline_css(inline_class, border_styles)
        stream.seek(0)
        mi = get_metadata(stream, 'rtf')
        if not mi.title:
            mi.title = _('Unknown')
        if not mi.authors:
            mi.authors = [_('Unknown')]
        opf = OPFCreator(getcwd(), mi)
        opf.create_manifest([(u'index.xhtml', None)])
        opf.create_spine([u'index.xhtml'])
        opf.render(open(u'metadata.opf', 'wb'))
        return os.path.abspath(u'metadata.opf')
示例#35
0
def atomic_write(base, name, content):
    name = os.path.join(base, name)
    tname = name + '.tmp'
    with lopen(tname, 'wb') as f:
        f.write(as_bytes(content))
    atomic_rename(tname, name)
示例#36
0
 def get_css(self, oeb_book):
     css = b''
     for item in oeb_book.manifest:
         if item.media_type == 'text/css':
             css += as_bytes(item.data.cssText) + b'\n\n'
     return css
示例#37
0
def mobile(ctx, rd):
    db, library_id, library_map, default_library = get_library_data(ctx, rd)
    try:
        start = max(1, int(rd.query.get('start', 1)))
    except ValueError:
        raise HTTPBadRequest('start is not an integer')
    try:
        num = max(0, int(rd.query.get('num', 25)))
    except ValueError:
        raise HTTPBadRequest('num is not an integer')
    search = rd.query.get('search') or ''
    with db.safe_read_lock:
        book_ids = ctx.search(rd, db, search)
        total = len(book_ids)
        ascending = rd.query.get('order', '').lower().strip() == 'ascending'
        sort_by = sanitize_sort_field_name(db.field_metadata, rd.query.get('sort') or 'date')
        try:
            book_ids = db.multisort([(sort_by, ascending)], book_ids)
        except Exception:
            sort_by = 'date'
            book_ids = db.multisort([(sort_by, ascending)], book_ids)
        books = [db.get_metadata(book_id) for book_id in book_ids[(start-1):(start-1)+num]]
    rd.outheaders['Last-Modified'] = http_date(timestampfromdt(db.last_modified()))
    order = 'ascending' if ascending else 'descending'
    q = {b'search':search.encode('utf-8'), b'order':order.encode('ascii'), b'sort':sort_by.encode('utf-8'), b'num':as_bytes(num), 'library_id':library_id}
    url_base = ctx.url_for('/mobile') + '?' + urlencode(q)
    lm = {k:v for k, v in iteritems(library_map) if k != library_id}
    return build_index(rd, books, num, search, sort_by, order, start, total, url_base, db.field_metadata, ctx, lm, library_id)
示例#38
0
文件: utils.py 项目: j-howell/calibre
def encode_string(raw):
    ans = bytearray(as_bytes(raw))
    ans.insert(0, len(ans))
    return bytes(ans)
示例#39
0
def encode_string(raw):
    ans = bytearray(as_bytes(raw))
    ans.insert(0, len(ans))
    return bytes(ans)
示例#40
0
    def requestStarted(self, rq):
        if bytes(rq.requestMethod()) != b'GET':
            return self.fail_request(rq, rq.RequestDenied)
        url = rq.requestUrl()
        host = url.host()
        if host not in self.allowed_hosts or url.scheme() != FAKE_PROTOCOL:
            return self.fail_request(rq)
        name = url.path()[1:]
        if host == SANDBOX_HOST and not name.startswith('book/'):
            return self.fail_request(rq)
        if name.startswith('book/'):
            name = name.partition('/')[2]
            if name == '__index__':
                send_reply(rq, 'text/html', b'<div>\xa0</div>')
                return
            elif name == '__popup__':
                send_reply(
                    rq, 'text/html',
                    b'<div id="calibre-viewer-footnote-iframe">\xa0</div>')
                return
            try:
                data, mime_type = get_data(name)
                if data is None:
                    rq.fail(rq.UrlNotFound)
                    return
                data = as_bytes(data)
                mime_type = {
                    # Prevent warning in console about mimetype of fonts
                    'application/vnd.ms-opentype': 'application/x-font-ttf',
                    'application/x-font-truetype': 'application/x-font-ttf',
                    'application/font-sfnt': 'application/x-font-ttf',
                }.get(mime_type, mime_type)
                send_reply(rq, mime_type, data)
            except Exception:
                import traceback
                traceback.print_exc()
                return self.fail_request(rq, rq.RequestFailed)
        elif name == 'manifest':
            data = b'[' + set_book_path.manifest + b',' + set_book_path.metadata + b']'
            send_reply(rq, set_book_path.manifest_mime, data)
        elif name == 'reader-background':
            mt, data = background_image()
            if data:
                send_reply(rq, mt, data)
            else:
                rq.fail(rq.UrlNotFound)
        elif name.startswith('mathjax/'):
            from calibre.gui2.viewer.mathjax import monkeypatch_mathjax
            if name == 'mathjax/manifest.json':
                if self.mathjax_manifest is None:
                    import json
                    from calibre.srv.books import get_mathjax_manifest
                    self.mathjax_manifest = as_bytes(
                        json.dumps(get_mathjax_manifest()['files']))
                send_reply(rq, 'application/json', self.mathjax_manifest)
                return
            path = os.path.abspath(os.path.join(self.mathjax_dir, '..', name))
            if path.startswith(self.mathjax_dir):
                mt = guess_type(name)
                try:
                    with lopen(path, 'rb') as f:
                        raw = f.read()
                except EnvironmentError as err:
                    prints(
                        "Failed to get mathjax file: {} with error: {}".format(
                            name, err))
                    return self.fail_request(rq, rq.RequestFailed)
                if 'MathJax.js' in name:
                    # raw = open(os.path.expanduser('~/work/mathjax/unpacked/MathJax.js')).read()
                    raw = monkeypatch_mathjax(
                        raw.decode('utf-8')).encode('utf-8')

                send_reply(rq, mt, raw)
        elif not name:
            send_reply(rq, 'text/html', viewer_html())
        else:
            return self.fail_request(rq)
示例#41
0
def do_list(dbctx,
            fields,
            afields,
            sort_by,
            ascending,
            search_text,
            line_width,
            separator,
            prefix,
            limit,
            for_machine=False):
    if sort_by is None:
        ascending = True
    ans = dbctx.run('list', fields, sort_by, ascending, search_text, limit)
    try:
        book_ids, data, metadata = ans['book_ids'], ans['data'], ans[
            'metadata']
    except TypeError:
        raise SystemExit(ans)
    fields = list(ans['fields'])
    try:
        fields.remove('id')
    except ValueError:
        pass
    fields = ['id'] + fields
    stringify(data, metadata, for_machine)
    if for_machine:
        raw = json.dumps(list(as_machine_data(book_ids, data, metadata)),
                         indent=2,
                         sort_keys=True)
        if not isinstance(raw, bytes):
            raw = raw.encode('utf-8')
        getattr(sys.stdout, 'buffer', sys.stdout).write(raw)
        return
    from calibre.utils.terminal import ColoredStream, geometry

    output_table = prepare_output_table(fields, book_ids, data, metadata)
    widths = list(map(lambda x: 0, fields))

    for record in output_table:
        for j in range(len(fields)):
            widths[j] = max(widths[j], str_width(record[j]))

    screen_width = geometry()[0] if line_width < 0 else line_width
    if not screen_width:
        screen_width = 80
    field_width = screen_width // len(fields)
    base_widths = list(map(lambda x: min(x + 1, field_width), widths))

    while sum(base_widths) < screen_width:
        adjusted = False
        for i in range(len(widths)):
            if base_widths[i] < widths[i]:
                base_widths[i] += min(screen_width - sum(base_widths),
                                      widths[i] - base_widths[i])
                adjusted = True
                break
        if not adjusted:
            break

    widths = list(base_widths)
    titles = map(lambda x, y: '%-*s%s' % (x - len(separator), y, separator),
                 widths, fields)
    with ColoredStream(sys.stdout, fg='green'):
        print(''.join(titles), flush=True)
    stdout = getattr(sys.stdout, 'buffer', sys.stdout)
    linesep = as_bytes(os.linesep)

    wrappers = [
        TextWrapper(x - 1).wrap if x > 1 else lambda y: y for x in widths
    ]

    for record in output_table:
        text = [wrappers[i](record[i]) for i, field in enumerate(fields)]
        lines = max(map(len, text))
        for l in range(lines):
            for i, field in enumerate(text):
                ft = text[i][l] if l < len(text[i]) else ''
                stdout.write(ft.encode('utf-8'))
                if i < len(text) - 1:
                    filler = ('%*s' % (widths[i] - str_width(ft) - 1, ''))
                    stdout.write((filler + separator).encode('utf-8'))
            stdout.write(linesep)
示例#42
0
def NAVCATALOG_ENTRY(url_for, updated, title, description, query):
    href = url_for('/opds/navcatalog', which=as_hex_unicode(query))
    id_ = 'calibre-navcatalog:' + hashlib.sha1(as_bytes(href)).hexdigest()
    return E.entry(TITLE(title), ID(id_), UPDATED(updated),
                   E.content(description, type='text'), NAVLINK(href=href))
示例#43
0
文件: mobi6.py 项目: j-howell/calibre
 def bin4(num):
     ans = bin(num)[2:]
     return as_bytes('0'*(4-len(ans)) + ans)
示例#44
0
 def get_css(self, oeb_book):
     css = b''
     for item in oeb_book.manifest:
         if item.media_type == 'text/css':
             css += as_bytes(item.data.cssText) + b'\n\n'
     return css
示例#45
0
文件: utils.py 项目: j-howell/calibre
 def fast_now_strftime(fmt):
     fmt = as_bytes(fmt, encoding='mbcs')
     return time.strftime(fmt).decode('mbcs', 'replace')
示例#46
0
def save_metadata(metadata, f):
    f.seek(0), f.truncate(), f.write(as_bytes(json.dumps(metadata, indent=2)))
示例#47
0
    def convert(self, stream, options, file_ext, log, accelerators):
        from lxml import etree
        from calibre.ebooks.metadata.meta import get_metadata
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
        from calibre.ebooks.rtf.input import InlineClass
        from calibre.utils.xml_parse import safe_xml_fromstring
        self.opts = options
        self.log = log
        self.log('Converting RTF to XML...')
        try:
            xml = self.generate_xml(stream.name)
        except RtfInvalidCodeException as e:
            self.log.exception('Unable to parse RTF')
            raise ValueError(
                _('This RTF file has a feature calibre does not '
                  'support. Convert it to HTML first and then try it.\n%s') %
                e)

        d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
        if d:
            imap = {}
            try:
                imap = self.extract_images(d[0])
            except:
                self.log.exception('Failed to extract images...')

        self.log('Parsing XML...')
        doc = safe_xml_fromstring(xml)
        border_styles = self.convert_borders(doc)
        for pict in doc.xpath(
                '//rtf:pict[@num]',
                namespaces={'rtf': 'http://rtf2xml.sourceforge.net/'}):
            num = int(pict.get('num'))
            name = imap.get(num, None)
            if name is not None:
                pict.set('num', name)

        self.log('Converting XML to HTML...')
        inline_class = InlineClass(self.log)
        styledoc = safe_xml_fromstring(P('templates/rtf.xsl', data=True),
                                       recover=False)
        extensions = {('calibre', 'inline-class'): inline_class}
        transform = etree.XSLT(styledoc, extensions=extensions)
        result = transform(doc)
        html = 'index.xhtml'
        with open(html, 'wb') as f:
            res = as_bytes(transform.tostring(result))
            # res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
            # clean multiple \n
            res = re.sub(b'\n+', b'\n', res)
            # Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
            # res = re.sub('\s*<body>', '<body>', res)
            # res = re.sub('(?<=\n)\n{2}',
            # u'<p>\u00a0</p>\n'.encode('utf-8'), res)
            f.write(res)
        self.write_inline_css(inline_class, border_styles)
        stream.seek(0)
        mi = get_metadata(stream, 'rtf')
        if not mi.title:
            mi.title = _('Unknown')
        if not mi.authors:
            mi.authors = [_('Unknown')]
        opf = OPFCreator(os.getcwd(), mi)
        opf.create_manifest([('index.xhtml', None)])
        opf.create_spine(['index.xhtml'])
        opf.render(open('metadata.opf', 'wb'))
        return os.path.abspath('metadata.opf')
示例#48
0
    def get_pages_accurate(self, mobi_file_path):
        '''
        A more accurate but much more resource intensive and slower
        method to calculate the page length.

        Parses the uncompressed text. In an average paper back book
        There are 32 lines per page and a maximum of 70 characters
        per line.

        Each paragraph starts a new line and every 70 characters
        (minus markup) in a paragraph starts a new line. The
        position after every 30 lines will be marked as a new
        page.

        This can be make more accurate by accounting for
        <div class="mbp_pagebreak" /> as a new page marker.
        And <br> elements as an empty line.
        '''
        pages = []

        # Get the MOBI html.
        mr = MobiReader(mobi_file_path, default_log)
        if mr.book_header.encryption_type != 0:
            # DRMed book
            return self.get_pages_fast(mobi_file_path)
        mr.extract_text()

        # States
        in_tag = False
        in_p = False
        check_p = False
        closing = False
        p_char_count = 0

        # Get positions of every line
        # A line is either a paragraph starting
        # or every 70 characters in a paragraph.
        lines = []
        pos = -1
        # We want this to be as fast as possible so we
        # are going to do one pass across the text. re
        # and string functions will parse the text each
        # time they are called.
        #
        # We can can use .lower() here because we are
        # not modifying the text. In this case the case
        # doesn't matter just the absolute character and
        # the position within the stream.
        data = bytearray(as_bytes(mr.mobi_html.lower()))
        slash, p, lt, gt = map(ord, '/p<>')
        for c in data:
            pos += 1

            # Check if we are starting or stopping a p tag.
            if check_p:
                if c == slash:
                    closing = True
                    continue
                elif c == p:
                    if closing:
                        in_p = False
                    else:
                        in_p = True
                        lines.append(pos - 2)
                check_p = False
                closing = False
                continue

            if c == lt:
                in_tag = True
                check_p = True
                continue
            elif c == gt:
                in_tag = False
                check_p = False
                continue

            if in_p and not in_tag:
                p_char_count += 1
                if p_char_count == 70:
                    lines.append(pos)
                    p_char_count = 0

        # Every 30 lines is a new page
        for i in range(0, len(lines), 32):
            pages.append(lines[i])

        return pages
示例#49
0
    def do_book(self):
        if self.i >= len(self.book_ids):
            return self.do_queue()
        book_id = self.book_ids[self.i]
        self.i += 1

        temp_files = []

        try:
            input_format = get_input_format_for_book(self.db, book_id, None)[0]
            input_fmt = self.db.original_fmt(book_id, input_format).lower()
            same_fmt = input_fmt == self.output_format.lower()
            mi, opf_file = create_opf_file(self.db, book_id)
            in_file = PersistentTemporaryFile('.'+input_format)
            with in_file:
                self.db.copy_format_to(book_id, input_fmt, in_file,
                        index_is_id=True)

            out_file = PersistentTemporaryFile('.' + self.output_format)
            out_file.write(as_bytes(self.output_format))
            out_file.close()
            temp_files = [in_file]

            combined_recs = GuiRecommendations()
            default_recs = bulk_defaults_for_input_format(input_format)
            for key in default_recs:
                combined_recs[key] = default_recs[key]
            if self.use_saved_single_settings:
                specific_recs = load_specifics(self.db, book_id)
                for key in specific_recs:
                    combined_recs[key] = specific_recs[key]
            for item in self.user_recs:
                combined_recs[item[0]] = item[1]
            save_specifics(self.db, book_id, combined_recs)
            lrecs = list(combined_recs.to_recommendations())
            from calibre.customize.ui import plugin_for_output_format
            op = plugin_for_output_format(self.output_format)
            if op and op.recommendations:
                prec = {x[0] for x in op.recommendations}
                for i, r in enumerate(list(lrecs)):
                    if r[0] in prec:
                        lrecs[i] = (r[0], r[1], OptionRecommendation.HIGH)

            cover_file = create_cover_file(self.db, book_id)

            if opf_file is not None:
                lrecs.append(('read_metadata_from_opf', opf_file.name,
                    OptionRecommendation.HIGH))
                temp_files.append(opf_file)
            if cover_file is not None:
                lrecs.append(('cover', cover_file.name,
                    OptionRecommendation.HIGH))
                temp_files.append(cover_file)

            for x in list(lrecs):
                if x[0] == 'debug_pipeline':
                    lrecs.remove(x)
            try:
                dtitle = unicode_type(mi.title)
            except:
                dtitle = repr(mi.title)
            if len(dtitle) > 50:
                dtitle = dtitle[:50].rpartition(' ')[0]+'...'
            self.setLabelText(_('Queueing ')+dtitle)
            desc = _('Convert book %(num)d of %(tot)d (%(title)s)') % dict(
                    num=self.i, tot=len(self.book_ids), title=dtitle)

            args = [in_file.name, out_file.name, lrecs]
            temp_files.append(out_file)
            func = 'gui_convert_override'
            if same_fmt:
                func += ':same_fmt'
            self.jobs.append((func, args, desc, self.output_format.upper(), book_id, temp_files))

            self.changed = True
            self.setValue(self.i)
        except NoSupportedInputFormats:
            self.bad.append(book_id)
        QTimer.singleShot(0, self.do_book)
示例#50
0
def process_exploded_book(book_fmt,
                          opfpath,
                          input_fmt,
                          tdir,
                          render_manager,
                          log=None,
                          book_hash=None,
                          save_bookmark_data=False,
                          book_metadata=None,
                          virtualize_resources=True):
    log = log or default_log
    container = SimpleContainer(tdir, opfpath, log)
    input_plugin = plugin_for_input_format(input_fmt)
    is_comic = bool(getattr(input_plugin, 'is_image_collection', False))

    def needs_work(mt):
        return mt in OEB_STYLES or mt in OEB_DOCS or mt == 'image/svg+xml'

    def work_priority(name):
        # ensure workers with large files or stylesheets
        # have the less names
        size = os.path.getsize(container.name_path_map[name]),
        is_html = container.mime_map.get(name) in OEB_DOCS
        return (0 if is_html else 1), size

    if not is_comic:
        render_manager.launch_workers(
            tuple(n for n, mt in iteritems(container.mime_map)
                  if needs_work(mt)), container)

    bookmark_data = None
    if save_bookmark_data:
        bm_file = 'META-INF/calibre_bookmarks.txt'
        if container.exists(bm_file):
            with container.open(bm_file, 'rb') as f:
                bookmark_data = f.read()

    # We do not add zero byte sized files as the IndexedDB API in the
    # browser has no good way to distinguish between zero byte files and
    # load failures.
    excluded_names = {
        name
        for name, mt in iteritems(container.mime_map)
        if name == container.opf_name or mt == guess_type('a.ncx')
        or name.startswith('META-INF/') or name == 'mimetype'
        or not container.has_name_and_is_not_empty(name)
    }
    raster_cover_name, titlepage_name = create_cover_page(
        container, input_fmt.lower(), is_comic, book_metadata)

    toc = get_toc(container, verify_destinations=False).to_dict(count())
    if not toc or not toc.get('children'):
        toc = from_xpaths(container,
                          ['//h:h1', '//h:h2', '//h:h3']).to_dict(count())
    spine = [name for name, is_linear in container.spine_names]
    spineq = frozenset(spine)
    landmarks = [l for l in get_landmarks(container) if l['dest'] in spineq]

    book_render_data = {
        'version': RENDER_VERSION,
        'toc': toc,
        'book_format': book_fmt,
        'spine': spine,
        'link_uid': uuid4(),
        'book_hash': book_hash,
        'is_comic': is_comic,
        'raster_cover_name': raster_cover_name,
        'title_page_name': titlepage_name,
        'has_maths': False,
        'total_length': 0,
        'spine_length': 0,
        'toc_anchor_map': toc_anchor_map(toc),
        'landmarks': landmarks,
        'link_to_map': {},
    }

    names = sorted(
        (n for n, mt in iteritems(container.mime_map) if needs_work(mt)),
        key=work_priority)

    results = render_manager(
        names,
        (tdir, opfpath, virtualize_resources, book_render_data['link_uid'],
         container.data_for_clone()), container)
    ltm = book_render_data['link_to_map']
    html_data = {}
    virtualized_names = set()

    def merge_ltm(dest, src):
        for k, v in iteritems(src):
            if k in dest:
                dest[k] |= v
            else:
                dest[k] = v

    for link_to_map, hdata, vnames in results:
        html_data.update(hdata)
        virtualized_names |= vnames
        for k, v in iteritems(link_to_map):
            if k in ltm:
                merge_ltm(ltm[k], v)
            else:
                ltm[k] = v

    def manifest_data(name):
        mt = (container.mime_map.get(name)
              or 'application/octet-stream').lower()
        ans = {
            'size': os.path.getsize(container.name_path_map[name]),
            'is_virtualized': name in virtualized_names,
            'mimetype': mt,
            'is_html': mt in OEB_DOCS,
        }
        if ans['is_html']:
            data = html_data[name]
            ans['length'] = l = data['length']
            book_render_data['total_length'] += l
            if name in book_render_data['spine']:
                book_render_data['spine_length'] += l
            ans['has_maths'] = hm = data['has_maths']
            if hm:
                book_render_data['has_maths'] = True
            ans['anchor_map'] = data['anchor_map']
        return ans

    book_render_data['files'] = {
        name: manifest_data(name)
        for name in set(container.name_path_map) - excluded_names
    }
    container.commit()

    for name in excluded_names:
        os.remove(container.name_path_map[name])

    ltm = book_render_data['link_to_map']
    for name, amap in iteritems(ltm):
        for k, v in tuple(iteritems(amap)):
            amap[k] = tuple(v)  # needed for JSON serialization

    data = as_bytes(json.dumps(book_render_data, ensure_ascii=False))
    with lopen(os.path.join(container.root, 'calibre-book-manifest.json'),
               'wb') as f:
        f.write(data)

    return container, bookmark_data
示例#51
0
    def __init__(self, type_, data, length):
        self.type = type_
        self.data = data
        self.length = length
        self.name = {
                1   : 'Drm Server Id',
                2   : 'Drm Commerce Id',
                3   : 'Drm Ebookbase Book Id',
                100 : 'Creator',
                101 : 'Publisher',
                102 : 'Imprint',
                103 : 'Description',
                104 : 'ISBN',
                105 : 'Subject',
                106 : 'Published',
                107 : 'Review',
                108 : 'Contributor',
                109 : 'Rights',
                110 : 'SubjectCode',
                111 : 'Type',
                112 : 'Source',
                113 : 'ASIN',
                114 : 'versionNumber',
                115 : 'sample',
                116 : 'StartOffset',
                117 : 'Adult',
                118 : 'Price',
                119 : 'Currency',
                121 : 'KF8_Boundary_Section',
                122 : 'fixed-layout',
                123 : 'book-type',
                124 : 'orientation-lock',
                125 : 'KF8_Count_of_Resources_Fonts_Images',
                126 : 'original-resolution',
                127 : 'zero-gutter',
                128 : 'zero-margin',
                129 : 'KF8_Masthead/Cover_Image',
                131 : 'KF8_Unidentified_Count',
                132 : 'RegionMagnification',
                200 : 'DictShortName',
                201 : 'CoverOffset',
                202 : 'ThumbOffset',
                203 : 'Fake Cover',
                204 : 'Creator Software',
                205 : 'Creator Major Version',  # '>I'
                206 : 'Creator Minor Version',  # '>I'
                207 : 'Creator Build Number',  # '>I'
                208 : 'Watermark',
                209 : 'Tamper Proof Keys [hex]',
                300 : 'Font Signature [hex]',
                301 : 'Clipping Limit [3xx]',  # percentage '>B'
                401 : 'Clipping Limit',  # percentage '>B'
                402 : 'Publisher Limit',
                404 : 'Text to Speech Disabled',  # '>B' 1 - TTS disabled 0 - TTS enabled
                501 : 'CDE Type',  # 4 chars (PDOC, EBOK, MAGZ, ...)
                502 : 'last_update_time',
                503 : 'Updated Title',
                504 : 'ASIN [5xx]',
                508 : 'Unknown Title Furigana?',
                517 : 'Unknown Creator Furigana?',
                522 : 'Unknown Publisher Furigana?',
                524 : 'Language',
                525 : 'primary-writing-mode',
                527 : 'page-progression-direction',
                528 : 'Override Kindle fonts',
                534 : 'Input Source Type',
                535 : 'Kindlegen Build-Rev Number',
                536 : 'Container Info',  # CONT_Header is 0, Ends with CONTAINER_BOUNDARY (or Asset_Type?)
                538 : 'Container Resolution',
                539 : 'Container Mimetype',
                543 : 'Container id',  # FONT_CONTAINER, BW_CONTAINER, HD_CONTAINER
        }.get(self.type, repr(self.type))

        if (self.name in {'sample', 'StartOffset', 'CoverOffset', 'ThumbOffset', 'Fake Cover',
                'Creator Software', 'Creator Major Version', 'Creator Minor Version',
                'Creator Build Number', 'Clipping Limit (3xx)', 'Clipping Limit',
                'Publisher Limit', 'Text to Speech Disabled'} or
                self.type in {121, 125, 131}):
            if self.length == 9:
                self.data, = struct.unpack(b'>B', self.data)
            elif self.length == 10:
                self.data, = struct.unpack(b'>H', self.data)
            else:
                self.data, = struct.unpack(b'>L', self.data)
        elif self.type in {209, 300}:
            self.data = as_bytes(self.data.encode('hex'))