def main(args=sys.argv): parser = option_parser() opts, args = parser.parse_args(args) if len(args) < 2: parser.print_help() prints(_('No file specified'), file=sys.stderr) return 1 path = args[1] stream_type = os.path.splitext(path)[1].replace('.', '').lower() trying_to_set = False for pref in config().option_set.preferences: if pref.name in ('to_opf', 'get_cover'): continue if getattr(opts, pref.name) is not None: trying_to_set = True break with open(path, 'rb') as stream: mi = get_metadata(stream, stream_type, force_read_metadata=True) if trying_to_set: prints(_('Original metadata')+'::') metadata = unicode_type(mi) if trying_to_set: metadata = '\t'+'\n\t'.join(metadata.split('\n')) prints(metadata, safe_encode=True) if trying_to_set: with open(path, 'r+b') as stream: do_set_metadata(opts, mi, stream, stream_type) stream.seek(0) stream.flush() lrf = None if stream_type == 'lrf': if opts.lrf_bookid is not None: lrf = LRFMetaFile(stream) lrf.book_id = opts.lrf_bookid mi = get_metadata(stream, stream_type, force_read_metadata=True) prints('\n' + _('Changed metadata') + '::') metadata = unicode_type(mi) metadata = '\t'+'\n\t'.join(metadata.split('\n')) prints(metadata, safe_encode=True) if lrf is not None: prints('\tBookID:', lrf.book_id) if opts.to_opf is not None: from calibre.ebooks.metadata.opf2 import OPFCreator opf = OPFCreator(getcwd(), mi) with open(opts.to_opf, 'wb') as f: opf.render(f) prints(_('OPF created in'), opts.to_opf) if opts.get_cover is not None: if mi.cover_data and mi.cover_data[1]: with open(opts.get_cover, 'wb') as f: f.write(mi.cover_data[1]) prints(_('Cover saved to'), f.name) else: prints(_('No cover found'), file=sys.stderr) return 0
def main(args=sys.argv): parser = option_parser() opts, args = parser.parse_args(args) if len(args) < 2: parser.print_help() prints(_('No file specified'), file=sys.stderr) return 1 path = args[1] stream = open(path, 'r+b') stream_type = os.path.splitext(path)[1].replace('.', '').lower() trying_to_set = False for pref in config().option_set.preferences: if pref.name in ('to_opf', 'get_cover'): continue if getattr(opts, pref.name) is not None: trying_to_set = True break mi = get_metadata(stream, stream_type, force_read_metadata=True) if trying_to_set: prints(_('Original metadata') + '::') metadata = unicode(mi) if trying_to_set: metadata = '\t' + '\n\t'.join(metadata.split('\n')) prints(metadata, safe_encode=True) if trying_to_set: stream.seek(0) do_set_metadata(opts, mi, stream, stream_type) stream.seek(0) stream.flush() lrf = None if stream_type == 'lrf': if opts.lrf_bookid is not None: lrf = LRFMetaFile(stream) lrf.book_id = opts.lrf_bookid mi = get_metadata(stream, stream_type, force_read_metadata=True) prints('\n' + _('Changed metadata') + '::') metadata = unicode(mi) metadata = '\t' + '\n\t'.join(metadata.split('\n')) prints(metadata, safe_encode=True) if lrf is not None: prints('\tBookID:', lrf.book_id) if opts.to_opf is not None: from calibre.ebooks.metadata.opf2 import OPFCreator opf = OPFCreator(os.getcwdu(), mi) with open(opts.to_opf, 'wb') as f: opf.render(f) prints(_('OPF created in'), opts.to_opf) if opts.get_cover is not None: if mi.cover_data and mi.cover_data[1]: with open(opts.get_cover, 'wb') as f: f.write(mi.cover_data[1]) prints(_('Cover saved to'), f.name) else: prints(_('No cover found'), file=sys.stderr) return 0
def do_show_metadata(db, id, as_opf): if not db.has_id(id): raise ValueError('Id #%d is not present in database.'%id) mi = db.get_metadata(id, index_is_id=True) if as_opf: mi = OPFCreator(os.getcwdu(), mi) mi.render(sys.stdout) else: prints(unicode(mi))
def convert(self, stream, options, file_ext, log, accelerators): from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.utils.zipfile import ZipFile self.options = options self.log = log pages, images = [], [] toc = TOC() if file_ext == 'pmlz': log.debug('De-compressing content to temporary directory...') with TemporaryDirectory('_unpmlz') as tdir: zf = ZipFile(stream) zf.extractall(tdir) pmls = glob.glob(os.path.join(tdir, '*.pml')) for pml in pmls: html_name = os.path.splitext( os.path.basename(pml))[0] + '.html' html_path = os.path.join(os.getcwd(), html_name) pages.append(html_name) log.debug('Processing PML item %s...' % pml) ttoc = self.process_pml(pml, html_path) toc += ttoc images = self.get_images(stream, tdir, True) else: toc = self.process_pml(stream, 'index.html') pages.append('index.html') if hasattr(stream, 'name'): images = self.get_images( stream, os.path.abspath(os.path.dirname(stream.name))) # We want pages to be ordered alphabetically. pages.sort() manifest_items = [] for item in pages + images: manifest_items.append((item, None)) from calibre.ebooks.metadata.meta import get_metadata log.debug('Reading metadata from input file...') mi = get_metadata(stream, 'pml') if 'images/cover.png' in images: mi.cover = 'images/cover.png' opf = OPFCreator(os.getcwd(), mi) log.debug('Generating manifest...') opf.create_manifest(manifest_items) opf.create_spine(pages) opf.set_toc(toc) with lopen('metadata.opf', 'wb') as opffile: with lopen('toc.ncx', 'wb') as tocfile: opf.render(opffile, tocfile, 'toc.ncx') return os.path.join(os.getcwd(), 'metadata.opf')
def write(self, doc): toc = create_toc(doc, self.body, self.resolved_link_map, self.styles, self.object_map, self.log, self.namespace) raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>') with lopen(os.path.join(self.dest_dir, 'index.html'), 'wb') as f: f.write(raw) css = self.styles.generate_css(self.dest_dir, self.docx, self.notes_nopb, self.nosupsub) if css: with lopen(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f: f.write(css.encode('utf-8')) opf = OPFCreator(self.dest_dir, self.mi) opf.toc = toc opf.create_manifest_from_files_in([self.dest_dir]) for item in opf.manifest: if item.media_type == 'text/html': item.media_type = guess_type('a.xhtml')[0] opf.create_spine(['index.html']) if self.cover_image is not None: opf.guide.set_cover(self.cover_image) def process_guide(E, guide): if self.toc_anchor is not None: guide.append(E.reference( href='index.html#' + self.toc_anchor, title=_('Table of Contents'), type='toc')) toc_file = os.path.join(self.dest_dir, 'toc.ncx') with lopen(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(toc_file, 'wb') as ncx: opf.render(of, ncx, 'toc.ncx', process_guide=process_guide) if os.path.getsize(toc_file) == 0: os.remove(toc_file) return os.path.join(self.dest_dir, 'metadata.opf')
def write(self, doc): toc = create_toc(doc, self.body, self.resolved_link_map, self.styles, self.object_map, self.log) raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>') with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f: f.write(raw) css = self.styles.generate_css(self.dest_dir, self.docx) if css: with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f: f.write(css.encode('utf-8')) opf = OPFCreator(self.dest_dir, self.mi) opf.toc = toc opf.create_manifest_from_files_in([self.dest_dir]) for item in opf.manifest: if item.media_type == 'text/html': item.media_type = guess_type('a.xhtml')[0] opf.create_spine(['index.html']) if self.cover_image is not None: opf.guide.set_cover(self.cover_image) toc_file = os.path.join(self.dest_dir, 'toc.ncx') with open(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(toc_file, 'wb') as ncx: opf.render(of, ncx, 'toc.ncx') if os.path.getsize(toc_file) == 0: os.remove(toc_file) return os.path.join(self.dest_dir, 'metadata.opf')
def convert(self, stream, options, file_ext, log, accelerators): from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.utils.zipfile import ZipFile self.options = options self.log = log pages, images = [], [] toc = TOC() if file_ext == 'pmlz': log.debug('De-compressing content to temporary directory...') with TemporaryDirectory('_unpmlz') as tdir: zf = ZipFile(stream) zf.extractall(tdir) pmls = glob.glob(os.path.join(tdir, '*.pml')) for pml in pmls: html_name = os.path.splitext(os.path.basename(pml))[0]+'.html' html_path = os.path.join(getcwd(), html_name) pages.append(html_name) log.debug('Processing PML item %s...' % pml) ttoc = self.process_pml(pml, html_path) toc += ttoc images = self.get_images(stream, tdir, True) else: toc = self.process_pml(stream, 'index.html') pages.append('index.html') if hasattr(stream, 'name'): images = self.get_images(stream, os.path.abspath(os.path.dirname(stream.name))) # We want pages to be orded alphabetically. pages.sort() manifest_items = [] for item in pages+images: manifest_items.append((item, None)) from calibre.ebooks.metadata.meta import get_metadata log.debug('Reading metadata from input file...') mi = get_metadata(stream, 'pml') if 'images/cover.png' in images: mi.cover = 'images/cover.png' opf = OPFCreator(getcwd(), mi) log.debug('Generating manifest...') opf.create_manifest(manifest_items) opf.create_spine(pages) opf.set_toc(toc) with lopen('metadata.opf', 'wb') as opffile: with lopen('toc.ncx', 'wb') as tocfile: opf.render(opffile, tocfile, 'toc.ncx') return os.path.join(getcwd(), 'metadata.opf')
def convert(self, stream, options, file_ext, log, accelerators): from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.pdf.pdftohtml import pdftohtml log.debug('Converting file to html...') # The main html file will be named index.html self.opts, self.log = options, log if options.new_pdf_engine: return self.convert_new(stream, accelerators) pdftohtml(os.getcwdu(), stream.name, options.no_images) from calibre.ebooks.metadata.meta import get_metadata log.debug('Retrieving document metadata...') mi = get_metadata(stream, 'pdf') opf = OPFCreator(os.getcwdu(), mi) manifest = [(u'index.html', None)] images = os.listdir(os.getcwdu()) images.remove('index.html') for i in images: manifest.append((i, None)) log.debug('Generating manifest...') opf.create_manifest(manifest) opf.create_spine([u'index.html']) log.debug('Rendering manifest...') with open(u'metadata.opf', 'wb') as opffile: opf.render(opffile) return os.path.join(os.getcwdu(), u'metadata.opf')
def main(opts, args, dbctx): if len(args) < 1: raise SystemExit(_('You must specify an id')) book_id = int(args[0]) mi = dbctx.run('show_metadata', book_id) if mi is None: raise SystemExit('Id #%d is not present in database.' % id) if opts.as_opf: mi = OPFCreator(os.getcwdu(), mi) mi.render(sys.stdout) else: prints(unicode(mi)) return 0
def main(opts, args, dbctx): if len(args) < 1: raise SystemExit(_('You must specify an id')) book_id = int(args[0]) mi = dbctx.run('show_metadata', book_id) if mi is None: raise SystemExit(f'Id #{book_id} is not present in database.') if opts.as_opf: stdout = getattr(sys.stdout, 'buffer', sys.stdout) mi = OPFCreator(os.getcwd(), mi) mi.render(stdout) else: prints(str(mi)) return 0
def convert(self, stream, options, file_ext, log, accelerators): log.debug('Enter convert() ...') dest_dir = os.getcwdu() # note: temp dir from calibre process log.debug('dest_dir: ' + dest_dir) mi = None # call latex2mobi with markup output only from subprocess import check_output, STDOUT, CalledProcessError args = [ self.java_exec, '-jar', os.path.join(self.plugin_dir, JAR_FILENAME), '-i', stream.name, '-n', '-o', dest_dir ] from calibre_plugins.latexformulas_input.config import prefs if prefs['pandoc_exec'] != None and prefs['pandoc_exec'] != '': args.append('-p') args.append(prefs['pandoc_exec']) try: log.debug(check_output(args, stderr=STDOUT)) except CalledProcessError as e: log.debug(e.returncode) log.debug(e.cmd) log.debug(e.output) opf = OPFCreator(dest_dir, mi) markup_dir = dest_dir + os.path.sep + os.path.basename( stream.name) + '-markup' log.debug('Markup-dir: ' + markup_dir) log.debug('CreateManifestFromFilesIn()') opf.create_manifest_from_files_in([markup_dir]) for item in opf.manifest: if item.media_type == 'text/html': log.debug('Item ' + str(item) + ' is of type text/html') item.media_type = guess_type('a.html')[0] log.debug('Guess type result: ' + item.media_type) if item.media_type == 'text/css': log.debug('Item ' + str(item) + ' is of type text/css') item.media_type = guess_type('a.css')[0] log.debug('Guess type result: ' + item.media_type) log.debug('Create_spine()') opf.create_spine( [os.path.basename(markup_dir) + os.path.sep + 'latex2mobi.html']) output_path = os.path.join(dest_dir, 'metadata.opf') with open(output_path, 'wb') as of: opf.render(of) log('Exit convert() ...') return output_path
def create_opf(self, output_dir, images, toc): with CurrentDir(output_dir): if 'cover.png' in images: self.mi.cover = os.path.join('images', 'cover.png') opf = OPFCreator(output_dir, self.mi) manifest = [('index.html', None)] for i in images: manifest.append((os.path.join('images', i), None)) opf.create_manifest(manifest) opf.create_spine(['index.html']) opf.set_toc(toc) with open('metadata.opf', 'wb') as opffile: with open('toc.ncx', 'wb') as tocfile: opf.render(opffile, tocfile, 'toc.ncx') return os.path.join(output_dir, 'metadata.opf')
def write(self): toc = self.create_toc() raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>') with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f: f.write(raw) css = self.styles.generate_css(self.dest_dir, self.docx) if css: with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f: f.write(css.encode('utf-8')) opf = OPFCreator(self.dest_dir, self.mi) opf.toc = toc opf.create_manifest_from_files_in([self.dest_dir]) opf.create_spine(['index.html']) with open(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(os.path.join(self.dest_dir, 'toc.ncx'), 'wb') as ncx: opf.render(of, ncx, 'toc.ncx') return os.path.join(self.dest_dir, 'metadata.opf')
def create_opf(self, output_dir, pages, images): with CurrentDir(output_dir): opf = OPFCreator(output_dir, self.mi) manifest = [] for page in pages + images: manifest.append((page, None)) opf.create_manifest(manifest) opf.create_spine(pages) with open('metadata.opf', 'wb') as opffile: opf.render(opffile) return os.path.join(output_dir, 'metadata.opf')
def create_opf(self, output_dir, images): with CurrentDir(output_dir): opf = OPFCreator(output_dir, self.mi) manifest = [('index.html', None)] for i in images: manifest.append((os.path.join('images/', i), None)) opf.create_manifest(manifest) opf.create_spine(['index.html']) with open('metadata.opf', 'wb') as opffile: opf.render(opffile) return os.path.join(output_dir, 'metadata.opf')
def convert(self, stream, options, file_ext, log, accelerators): log.debug('Enter convert() ...') dest_dir = os.getcwdu() # note: temp dir from calibre process log.debug('dest_dir: ' + dest_dir) mi = None # call latex2mobi with markup output only from subprocess import check_output, STDOUT, CalledProcessError args = [self.java_exec, '-jar', os.path.join(self.plugin_dir, JAR_FILENAME), '-i', stream.name, '-n', '-o', dest_dir] from calibre_plugins.latexformulas_input.config import prefs if prefs['pandoc_exec'] != None and prefs['pandoc_exec'] != '': args.append('-p') args.append(prefs['pandoc_exec']) try: log.debug(check_output(args, stderr=STDOUT)) except CalledProcessError as e: log.debug(e.returncode) log.debug(e.cmd) log.debug(e.output) opf = OPFCreator(dest_dir, mi) markup_dir = dest_dir + os.path.sep + os.path.basename(stream.name) + '-markup' log.debug('Markup-dir: ' + markup_dir) log.debug('CreateManifestFromFilesIn()') opf.create_manifest_from_files_in([markup_dir]) for item in opf.manifest: if item.media_type == 'text/html': log.debug('Item ' + str(item) + ' is of type text/html') item.media_type = guess_type('a.html')[0] log.debug('Guess type result: ' + item.media_type) if item.media_type == 'text/css': log.debug('Item ' + str(item) + ' is of type text/css') item.media_type = guess_type('a.css')[0] log.debug('Guess type result: ' + item.media_type) log.debug('Create_spine()') opf.create_spine([os.path.basename(markup_dir) + os.path.sep + 'latex2mobi.html']) output_path = os.path.join(dest_dir, 'metadata.opf') with open(output_path, 'wb') as of: opf.render(of) log('Exit convert() ...') return output_path
def create_opf(self, output_dir, pages, images): with CurrentDir(output_dir): opf = OPFCreator(output_dir, self.mi) manifest = [] for page in pages+images: manifest.append((page, None)) opf.create_manifest(manifest) opf.create_spine(pages) with open('metadata.opf', 'wb') as opffile: opf.render(opffile) return os.path.join(output_dir, 'metadata.opf')
def write(self): toc = self.create_toc() raw = html.tostring(self.html, encoding="utf-8", doctype="<!DOCTYPE html>") with open(os.path.join(self.dest_dir, "index.html"), "wb") as f: f.write(raw) css = self.styles.generate_css(self.dest_dir, self.docx) if css: with open(os.path.join(self.dest_dir, "docx.css"), "wb") as f: f.write(css.encode("utf-8")) opf = OPFCreator(self.dest_dir, self.mi) opf.toc = toc opf.create_manifest_from_files_in([self.dest_dir]) opf.create_spine(["index.html"]) with open(os.path.join(self.dest_dir, "metadata.opf"), "wb") as of, open( os.path.join(self.dest_dir, "toc.ncx"), "wb" ) as ncx: opf.render(of, ncx, "toc.ncx") return os.path.join(self.dest_dir, "metadata.opf")
def write(self, doc): toc = create_toc(doc, self.body, self.resolved_link_map, self.styles, self.object_map) raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>') with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f: f.write(raw) css = self.styles.generate_css(self.dest_dir, self.docx) if css: with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f: f.write(css.encode('utf-8')) opf = OPFCreator(self.dest_dir, self.mi) opf.toc = toc opf.create_manifest_from_files_in([self.dest_dir]) opf.create_spine(['index.html']) if self.cover_image is not None: opf.guide.set_cover(self.cover_image) with open(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(os.path.join(self.dest_dir, 'toc.ncx'), 'wb') as ncx: opf.render(of, ncx, 'toc.ncx') return os.path.join(self.dest_dir, 'metadata.opf')
def __call__(self, stream, odir, log): from calibre.utils.zipfile import ZipFile from calibre.ebooks.metadata.odt import get_metadata from calibre.ebooks.metadata.opf2 import OPFCreator if not os.path.exists(odir): os.makedirs(odir) with CurrentDir(odir): log('Extracting ODT file...') stream.seek(0) mi = get_metadata(stream, 'odt') if not mi.title: mi.title = _('Unknown') if not mi.authors: mi.authors = [_('Unknown')] self.filter_load(stream, mi, log) html = self.xhtml() # A blanket img specification like this causes problems # with EPUB output as the containing element often has # an absolute height and width set that is larger than # the available screen real estate html = html.replace('img { width: 100%; height: 100%; }', '') # odf2xhtml creates empty title tag html = html.replace('<title></title>', '<title>%s</title>' % (mi.title, )) try: html = self.fix_markup(html, log) except: log.exception('Failed to filter CSS, conversion may be slow') with open('index.xhtml', 'wb') as f: f.write(as_bytes(html)) zf = ZipFile(stream, 'r') self.extract_pictures(zf) opf = OPFCreator(os.path.abspath(getcwd()), mi) opf.create_manifest([(os.path.abspath(f2), None) for f2 in walk(getcwd())]) opf.create_spine([os.path.abspath('index.xhtml')]) with open('metadata.opf', 'wb') as f: opf.render(f) return os.path.abspath('metadata.opf')
def convert(self, stream, options, file_ext, log, accelerators): from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.pdf.pdftohtml import pdftohtml log.debug('Converting file to html...') # The main html file will be named index.html self.opts, self.log = options, log if options.new_pdf_engine: return self.convert_new(stream, accelerators) pdftohtml(os.getcwd(), stream.name, options.no_images) from calibre.ebooks.metadata.meta import get_metadata log.debug('Retrieving document metadata...') mi = get_metadata(stream, 'pdf') opf = OPFCreator(os.getcwd(), mi) manifest = [('index.html', None)] images = os.listdir(os.getcwd()) images.remove('index.html') for i in images: manifest.append((i, None)) log.debug('Generating manifest...') opf.create_manifest(manifest) opf.create_spine(['index.html']) log.debug('Rendering manifest...') with lopen('metadata.opf', 'wb') as opffile: opf.render(opffile) if os.path.exists('toc.ncx'): ncxid = opf.manifest.id_for_path('toc.ncx') if ncxid: with lopen('metadata.opf', 'r+b') as f: raw = f.read().replace( b'<spine', b'<spine toc="%s"' % as_bytes(ncxid)) f.seek(0) f.write(raw) return os.path.join(os.getcwd(), 'metadata.opf')
def convert(self, stream, options, file_ext, log, accelerators): from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.pdf.pdftohtml import pdftohtml log.debug('Converting file to html...') # The main html file will be named index.html self.opts, self.log = options, log if options.new_pdf_engine: return self.convert_new(stream, accelerators) pdftohtml(getcwd(), stream.name, options.no_images) from calibre.ebooks.metadata.meta import get_metadata log.debug('Retrieving document metadata...') mi = get_metadata(stream, 'pdf') opf = OPFCreator(getcwd(), mi) manifest = [('index.html', None)] images = os.listdir(getcwd()) images.remove('index.html') for i in images: manifest.append((i, None)) log.debug('Generating manifest...') opf.create_manifest(manifest) opf.create_spine(['index.html']) log.debug('Rendering manifest...') with lopen('metadata.opf', 'wb') as opffile: opf.render(opffile) if os.path.exists('toc.ncx'): ncxid = opf.manifest.id_for_path('toc.ncx') if ncxid: with lopen('metadata.opf', 'r+b') as f: raw = f.read().replace(b'<spine', b'<spine toc="%s"' % as_bytes(ncxid)) f.seek(0) f.write(raw) return os.path.join(getcwd(), 'metadata.opf')
def __call__(self, stream, odir, log): from calibre.utils.zipfile import ZipFile from calibre.ebooks.metadata.odt import get_metadata from calibre.ebooks.metadata.opf2 import OPFCreator if not os.path.exists(odir): os.makedirs(odir) with CurrentDir(odir): log('Extracting ODT file...') stream.seek(0) mi = get_metadata(stream, 'odt') if not mi.title: mi.title = _('Unknown') if not mi.authors: mi.authors = [_('Unknown')] self.filter_load(stream, mi, log) html = self.xhtml() # A blanket img specification like this causes problems # with EPUB output as the containing element often has # an absolute height and width set that is larger than # the available screen real estate html = html.replace('img { width: 100%; height: 100%; }', '') # odf2xhtml creates empty title tag html = html.replace('<title></title>','<title>%s</title>'%(mi.title,)) try: html = self.fix_markup(html, log) except: log.exception('Failed to filter CSS, conversion may be slow') with open('index.xhtml', 'wb') as f: f.write(html.encode('utf-8')) zf = ZipFile(stream, 'r') self.extract_pictures(zf) opf = OPFCreator(os.path.abspath(os.getcwdu()), mi) opf.create_manifest([(os.path.abspath(f2), None) for f2 in walk(os.getcwdu())]) opf.create_spine([os.path.abspath('index.xhtml')]) with open('metadata.opf', 'wb') as f: opf.render(f) return os.path.abspath('metadata.opf')
def convert(self, stream, options, file_ext, log, accelerators): from lxml import etree from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException from calibre.ebooks.rtf.input import InlineClass self.opts = options self.log = log self.log('Converting RTF to XML...') try: xml = self.generate_xml(stream.name) except RtfInvalidCodeException as e: raise ValueError(_('This RTF file has a feature calibre does not ' 'support. Convert it to HTML first and then try it.\n%s')%e) d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf')) if d: imap = {} try: imap = self.extract_images(d[0]) except: self.log.exception('Failed to extract images...') self.log('Parsing XML...') parser = etree.XMLParser(recover=True, no_network=True) doc = etree.fromstring(xml, parser=parser) border_styles = self.convert_borders(doc) for pict in doc.xpath('//rtf:pict[@num]', namespaces={'rtf':'http://rtf2xml.sourceforge.net/'}): num = int(pict.get('num')) name = imap.get(num, None) if name is not None: pict.set('num', name) self.log('Converting XML to HTML...') inline_class = InlineClass(self.log) styledoc = etree.fromstring(P('templates/rtf.xsl', data=True)) extensions = { ('calibre', 'inline-class') : inline_class } transform = etree.XSLT(styledoc, extensions=extensions) result = transform(doc) html = u'index.xhtml' with open(html, 'wb') as f: res = transform.tostring(result) # res = res[:100].replace('xmlns:html', 'xmlns') + res[100:] #clean multiple \n res = re.sub('\n+', '\n', res) # Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines # res = re.sub('\s*<body>', '<body>', res) # res = re.sub('(?<=\n)\n{2}', # u'<p>\u00a0</p>\n'.encode('utf-8'), res) f.write(res) self.write_inline_css(inline_class, border_styles) stream.seek(0) mi = get_metadata(stream, 'rtf') if not mi.title: mi.title = _('Unknown') if not mi.authors: mi.authors = [_('Unknown')] opf = OPFCreator(os.getcwdu(), mi) opf.create_manifest([(u'index.xhtml', None)]) opf.create_spine([u'index.xhtml']) opf.render(open(u'metadata.opf', 'wb')) return os.path.abspath(u'metadata.opf')
def convert(self, stream, options, file_ext, log, accelerators): from lxml import etree from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER from calibre.ebooks.chardet import xml_to_unicode NAMESPACES = {'f':FB2NS, 'l':XLINK_NS} self.log = log log.debug('Parsing XML...') raw = stream.read().replace('\0', '') raw = xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True, resolve_entities=True)[0] try: doc = etree.fromstring(raw) except etree.XMLSyntaxError: try: doc = etree.fromstring(raw, parser=RECOVER_PARSER) if doc is None: raise Exception('parse failed') except: doc = etree.fromstring(raw.replace('& ', '&'), parser=RECOVER_PARSER) if doc is None: raise ValueError('The FB2 file is not valid XML') stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]') css = '' for s in stylesheets: css += etree.tostring(s, encoding=unicode, method='text', with_tail=False) + '\n\n' if css: import cssutils, logging parser = cssutils.CSSParser(fetcher=None, log=logging.getLogger('calibre.css')) XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % XHTML_NS text = XHTML_CSS_NAMESPACE + css log.debug('Parsing stylesheet...') stylesheet = parser.parseString(text) stylesheet.namespaces['h'] = XHTML_NS css = unicode(stylesheet.cssText).replace('h|style', 'h|span') css = re.sub(r'name\s*=\s*', 'class=', css) self.extract_embedded_content(doc) log.debug('Converting XML to HTML...') ss = open(P('templates/fb2.xsl'), 'rb').read() if options.no_inline_fb2_toc: log('Disabling generation of inline FB2 TOC') ss = re.compile(r'<!-- BUILD TOC -->.*<!-- END BUILD TOC -->', re.DOTALL).sub('', ss) styledoc = etree.fromstring(ss) transform = etree.XSLT(styledoc) result = transform(doc) for img in result.xpath('//img[@src]'): src = img.get('src') img.set('src', self.binary_map.get(src, src)) index = transform.tostring(result) open(u'index.xhtml', 'wb').write(index) open(u'inline-styles.css', 'wb').write(css) stream.seek(0) mi = get_metadata(stream, 'fb2') if not mi.title: mi.title = _('Unknown') if not mi.authors: mi.authors = [_('Unknown')] cpath = None if mi.cover_data and mi.cover_data[1]: with open(u'fb2_cover_calibre_mi.jpg', 'wb') as f: f.write(mi.cover_data[1]) cpath = os.path.abspath(u'fb2_cover_calibre_mi.jpg') else: for img in doc.xpath('//f:coverpage/f:image', namespaces=NAMESPACES): href = img.get('{%s}href'%XLINK_NS, img.get('href', None)) if href is not None: if href.startswith('#'): href = href[1:] cpath = os.path.abspath(href) break opf = OPFCreator(os.getcwdu(), mi) entries = [(f2, guess_type(f)[0]) for f2 in os.listdir(u'.')] opf.create_manifest(entries) opf.create_spine([u'index.xhtml']) if cpath: opf.guide.set_cover(cpath) with open(u'metadata.opf', 'wb') as f: opf.render(f) return os.path.join(os.getcwdu(), u'metadata.opf')
def convert(self, stream, options, file_ext, log, accelerators): from lxml import etree from calibre.ebooks.metadata.fb2 import ensure_namespace from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER from calibre.ebooks.chardet import xml_to_unicode self.log = log log.debug('Parsing XML...') raw = stream.read().replace('\0', '') raw = xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True, resolve_entities=True)[0] try: doc = etree.fromstring(raw) except etree.XMLSyntaxError: try: doc = etree.fromstring(raw, parser=RECOVER_PARSER) if doc is None: raise Exception('parse failed') except: doc = etree.fromstring(raw.replace('& ', '&'), parser=RECOVER_PARSER) if doc is None: raise ValueError('The FB2 file is not valid XML') doc = ensure_namespace(doc) try: fb_ns = doc.nsmap[doc.prefix] except Exception: fb_ns = FB2NS NAMESPACES = {'f': fb_ns, 'l': XLINK_NS} stylesheets = doc.xpath( '//*[local-name() = "stylesheet" and @type="text/css"]') css = '' for s in stylesheets: css += etree.tostring( s, encoding=str, method='text', with_tail=False) + '\n\n' if css: import cssutils, logging parser = cssutils.CSSParser(fetcher=None, log=logging.getLogger('calibre.css')) XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % XHTML_NS text = XHTML_CSS_NAMESPACE + css log.debug('Parsing stylesheet...') stylesheet = parser.parseString(text) stylesheet.namespaces['h'] = XHTML_NS css = str(stylesheet.cssText).replace('h|style', 'h|span') css = re.sub(r'name\s*=\s*', 'class=', css) self.extract_embedded_content(doc) log.debug('Converting XML to HTML...') ss = open(P('templates/fb2.xsl'), 'rb').read() ss = ss.replace("__FB_NS__", fb_ns) if options.no_inline_fb2_toc: log('Disabling generation of inline FB2 TOC') ss = re.compile(r'<!-- BUILD TOC -->.*<!-- END BUILD TOC -->', re.DOTALL).sub('', ss) styledoc = etree.fromstring(ss) transform = etree.XSLT(styledoc) result = transform(doc) # Handle links of type note and cite notes = { a.get('href')[1:]: a for a in result.xpath('//a[@link_note and @href]') if a.get('href').startswith('#') } cites = { a.get('link_cite'): a for a in result.xpath('//a[@link_cite]') if not a.get('href', '') } all_ids = {x for x in result.xpath('//*/@id')} for cite, a in cites.items(): note = notes.get(cite, None) if note: c = 1 while 'cite%d' % c in all_ids: c += 1 if not note.get('id', None): note.set('id', 'cite%d' % c) all_ids.add(note.get('id')) a.set('href', '#%s' % note.get('id')) for x in result.xpath('//*[@link_note or @link_cite]'): x.attrib.pop('link_note', None) x.attrib.pop('link_cite', None) for img in result.xpath('//img[@src]'): src = img.get('src') img.set('src', self.binary_map.get(src, src)) index = transform.tostring(result) open('index.xhtml', 'wb').write(index) open('inline-styles.css', 'wb').write(css) stream.seek(0) mi = get_metadata(stream, 'fb2') if not mi.title: mi.title = _('Unknown') if not mi.authors: mi.authors = [_('Unknown')] cpath = None if mi.cover_data and mi.cover_data[1]: with open('fb2_cover_calibre_mi.jpg', 'wb') as f: f.write(mi.cover_data[1]) cpath = os.path.abspath('fb2_cover_calibre_mi.jpg') else: for img in doc.xpath('//f:coverpage/f:image', namespaces=NAMESPACES): href = img.get('{%s}href' % XLINK_NS, img.get('href', None)) if href is not None: if href.startswith('#'): href = href[1:] cpath = os.path.abspath(href) break opf = OPFCreator(os.getcwd(), mi) entries = [(f2, guess_type(f2)[0]) for f2 in os.listdir('.')] opf.create_manifest(entries) opf.create_spine(['index.xhtml']) if cpath: opf.guide.set_cover(cpath) with open('metadata.opf', 'wb') as f: opf.render(f) return os.path.join(os.getcwd(), 'metadata.opf')
def create_opf(self, htmlfile, guide=None, root=None): mi = getattr(self.book_header.exth, 'mi', self.embedded_mi) if mi is None: mi = MetaInformation(self.book_header.title, [_('Unknown')]) opf = OPFCreator(os.path.dirname(htmlfile), mi) if hasattr(self.book_header.exth, 'cover_offset'): opf.cover = u'images/%05d.jpg' % (self.book_header.exth.cover_offset + 1) elif mi.cover is not None: opf.cover = mi.cover else: opf.cover = u'images/%05d.jpg' % 1 if not os.path.exists(os.path.join(os.path.dirname(htmlfile), * opf.cover.split('/'))): opf.cover = None cover = opf.cover cover_copied = None if cover is not None: cover = cover.replace('/', os.sep) if os.path.exists(cover): ncover = u'images'+os.sep+u'calibre_cover.jpg' if os.path.exists(ncover): os.remove(ncover) shutil.copyfile(cover, ncover) cover_copied = os.path.abspath(ncover) opf.cover = ncover.replace(os.sep, '/') manifest = [(htmlfile, 'application/xhtml+xml'), (os.path.abspath(u'styles.css'), 'text/css')] bp = os.path.dirname(htmlfile) added = set([]) for i in getattr(self, 'image_names', []): path = os.path.join(bp, 'images', i) added.add(path) manifest.append((path, 'image/jpeg')) if cover_copied is not None: manifest.append((cover_copied, 'image/jpeg')) opf.create_manifest(manifest) opf.create_spine([os.path.basename(htmlfile)]) toc = None if guide is not None: opf.create_guide(guide) for ref in opf.guide: if ref.type.lower() == 'toc': toc = ref.href() ncx_manifest_entry = None if toc: ncx_manifest_entry = 'toc.ncx' elems = root.xpath('//*[@id="%s"]' % toc.partition('#')[-1]) tocobj = None ent_pat = re.compile(r'&(\S+?);') if elems: tocobj = TOC() found = False reached = False for x in root.iter(): if x == elems[-1]: reached = True continue if reached and x.tag == 'a': href = x.get('href', '') if href and re.match('\w+://', href) is None: try: text = u' '.join([t.strip() for t in x.xpath('descendant::text()')]) except: text = '' text = ent_pat.sub(entity_to_unicode, text) item = tocobj.add_item(toc.partition('#')[0], href[1:], text) item.left_space = int(self.get_left_whitespace(x)) found = True if reached and found and x.get('class', None) == 'mbp_pagebreak': break if tocobj is not None: tocobj = self.structure_toc(tocobj) opf.set_toc(tocobj) return opf, ncx_manifest_entry
def write_opf(self, guide, toc, spine, resource_map): mi = self.header.exth.mi if self.cover_offset is not None and self.cover_offset < len(resource_map): mi.cover = resource_map[self.cover_offset] if len(list(toc)) < 2: self.log.warn("KF8 has no metadata Table of Contents") for ref in guide: if ref.type == "toc": href = ref.href() href, frag = urldefrag(href) if os.path.exists(href.replace("/", os.sep)): try: toc = self.read_inline_toc(href, frag) except: self.log.exception("Failed to read inline ToC") opf = OPFCreator(os.getcwdu(), mi) opf.guide = guide def exclude(path): return os.path.basename(path) == "debug-raw.html" opf.create_manifest_from_files_in([os.getcwdu()], exclude=exclude) for entry in opf.manifest: if entry.mime_type == "text/html": entry.mime_type = "application/xhtml+xml" opf.create_spine(spine) opf.set_toc(toc) ppd = self.resc_data.get("page-progression-direction", None) if ppd: opf.page_progression_direction = ppd with open("metadata.opf", "wb") as of, open("toc.ncx", "wb") as ncx: opf.render(of, ncx, "toc.ncx") return "metadata.opf"
def convert(self, stream, options, file_ext, log, accelerators): from lxml import etree from calibre.ebooks.metadata.fb2 import ensure_namespace, get_fb2_data from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER from calibre.ebooks.chardet import xml_to_unicode self.log = log log.debug('Parsing XML...') raw = get_fb2_data(stream)[0] raw = raw.replace(b'\0', b'') raw = xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True, resolve_entities=True)[0] try: doc = etree.fromstring(raw) except etree.XMLSyntaxError: try: doc = etree.fromstring(raw, parser=RECOVER_PARSER) if doc is None: raise Exception('parse failed') except: doc = etree.fromstring(raw.replace('& ', '&'), parser=RECOVER_PARSER) if doc is None: raise ValueError('The FB2 file is not valid XML') doc = ensure_namespace(doc) try: fb_ns = doc.nsmap[doc.prefix] except Exception: fb_ns = FB2NS NAMESPACES = {'f':fb_ns, 'l':XLINK_NS} stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]') css = '' for s in stylesheets: css += etree.tostring(s, encoding='unicode', method='text', with_tail=False) + '\n\n' if css: import css_parser, logging parser = css_parser.CSSParser(fetcher=None, log=logging.getLogger('calibre.css')) XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % XHTML_NS text = XHTML_CSS_NAMESPACE + css log.debug('Parsing stylesheet...') stylesheet = parser.parseString(text) stylesheet.namespaces['h'] = XHTML_NS css = stylesheet.cssText if isinstance(css, bytes): css = css.decode('utf-8', 'replace') css = css.replace('h|style', 'h|span') css = re.sub(r'name\s*=\s*', 'class=', css) self.extract_embedded_content(doc) log.debug('Converting XML to HTML...') with open(P('templates/fb2.xsl'), 'rb') as f: ss = f.read().decode('utf-8') ss = ss.replace("__FB_NS__", fb_ns) if options.no_inline_fb2_toc: log('Disabling generation of inline FB2 TOC') ss = re.compile(r'<!-- BUILD TOC -->.*<!-- END BUILD TOC -->', re.DOTALL).sub('', ss) styledoc = etree.fromstring(ss) transform = etree.XSLT(styledoc) result = transform(doc) # Handle links of type note and cite notes = {a.get('href')[1:]: a for a in result.xpath('//a[@link_note and @href]') if a.get('href').startswith('#')} cites = {a.get('link_cite'): a for a in result.xpath('//a[@link_cite]') if not a.get('href', '')} all_ids = {x for x in result.xpath('//*/@id')} for cite, a in iteritems(cites): note = notes.get(cite, None) if note: c = 1 while 'cite%d' % c in all_ids: c += 1 if not note.get('id', None): note.set('id', 'cite%d' % c) all_ids.add(note.get('id')) a.set('href', '#%s' % note.get('id')) for x in result.xpath('//*[@link_note or @link_cite]'): x.attrib.pop('link_note', None) x.attrib.pop('link_cite', None) for img in result.xpath('//img[@src]'): src = img.get('src') img.set('src', self.binary_map.get(src, src)) index = transform.tostring(result) with open(u'index.xhtml', 'wb') as f: f.write(index.encode('utf-8')) with open(u'inline-styles.css', 'wb') as f: f.write(css.encode('utf-8')) stream.seek(0) mi = get_metadata(stream, 'fb2') if not mi.title: mi.title = _('Unknown') if not mi.authors: mi.authors = [_('Unknown')] cpath = None if mi.cover_data and mi.cover_data[1]: with open(u'fb2_cover_calibre_mi.jpg', 'wb') as f: f.write(mi.cover_data[1]) cpath = os.path.abspath(u'fb2_cover_calibre_mi.jpg') else: for img in doc.xpath('//f:coverpage/f:image', namespaces=NAMESPACES): href = img.get('{%s}href'%XLINK_NS, img.get('href', None)) if href is not None: if href.startswith('#'): href = href[1:] cpath = os.path.abspath(href) break opf = OPFCreator(getcwd(), mi) entries = [(f2, guess_type(f2)[0]) for f2 in os.listdir(u'.')] opf.create_manifest(entries) opf.create_spine([u'index.xhtml']) if cpath: opf.guide.set_cover(cpath) with open(u'metadata.opf', 'wb') as f: opf.render(f) return os.path.join(getcwd(), u'metadata.opf')
def write_opf(self, guide, toc, spine, resource_map): mi = self.header.exth.mi if self.cover_offset is not None and self.cover_offset < len(resource_map): mi.cover = resource_map[self.cover_offset] if len(list(toc)) < 2: self.log.warn("KF8 has no metadata Table of Contents") for ref in guide: if ref.type == "toc": href = ref.href() href, frag = urldefrag(href) if os.path.exists(href.replace("/", os.sep)): try: toc = self.read_inline_toc(href, frag) except: self.log.exception("Failed to read inline ToC") opf = OPFCreator(os.getcwdu(), mi) opf.guide = guide def exclude(path): return os.path.basename(path) == "debug-raw.html" # If there are no images then the azw3 input plugin dumps all # binary records as .unknown images, remove them if self.for_tweak and os.path.exists("images") and os.path.isdir("images"): files = os.listdir("images") unknown = [x for x in files if x.endswith(".unknown")] if len(files) == len(unknown): [os.remove("images/" + f) for f in files] if self.for_tweak: try: os.remove("debug-raw.html") except: pass opf.create_manifest_from_files_in([os.getcwdu()], exclude=exclude) for entry in opf.manifest: if entry.mime_type == "text/html": entry.mime_type = "application/xhtml+xml" opf.create_spine(spine) opf.set_toc(toc) ppd = getattr(self.header.exth, "page_progression_direction", None) if ppd in {"ltr", "rtl", "default"}: opf.page_progression_direction = ppd with open("metadata.opf", "wb") as of, open("toc.ncx", "wb") as ncx: opf.render(of, ncx, "toc.ncx") return "metadata.opf"
def write_opf(self, guide, toc, spine, resource_map): mi = self.header.exth.mi if (self.cover_offset is not None and self.cover_offset < len(resource_map)): mi.cover = resource_map[self.cover_offset] if len(list(toc)) < 2: self.log.warn('KF8 has no metadata Table of Contents') for ref in guide: if ref.type == 'toc': href = ref.href() href, frag = urldefrag(href) if os.path.exists(href.replace('/', os.sep)): try: toc = self.read_inline_toc(href, frag) except: self.log.exception('Failed to read inline ToC') opf = OPFCreator(os.getcwdu(), mi) opf.guide = guide def exclude(path): return os.path.basename(path) == 'debug-raw.html' opf.create_manifest_from_files_in([os.getcwdu()], exclude=exclude) opf.create_spine(spine) opf.set_toc(toc) with open('metadata.opf', 'wb') as of, open('toc.ncx', 'wb') as ncx: opf.render(of, ncx, 'toc.ncx') return 'metadata.opf'
def convert(self, stream, opts, file_ext, log, accelerators): from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.toc import TOC self.opts, self.log = opts, log if file_ext == 'cbc': comics_ = self.get_comics_from_collection(stream) else: comics_ = [['Comic', os.path.abspath(stream.name)]] stream.close() comics = [] for i, x in enumerate(comics_): title, fname = x cdir = u'comic_%d' % (i + 1) if len(comics_) > 1 else u'.' cdir = os.path.abspath(cdir) if not os.path.exists(cdir): os.makedirs(cdir) pages = self.get_pages(fname, cdir) if not pages: continue wrappers = self.create_wrappers(pages) comics.append((title, pages, wrappers)) if not comics: raise ValueError('No comic pages found in %s' % stream.name) mi = MetaInformation( os.path.basename(stream.name).rpartition('.')[0], [_('Unknown')]) opf = OPFCreator(os.getcwdu(), mi) entries = [] def href(x): if len(comics) == 1: return os.path.basename(x) return '/'.join(x.split(os.sep)[-2:]) for comic in comics: pages, wrappers = comic[1:] entries += [(w, None) for w in map(href, wrappers)] + \ [(x, None) for x in map(href, pages)] opf.create_manifest(entries) spine = [] for comic in comics: spine.extend(map(href, comic[2])) self._images = [] for comic in comics: self._images.extend(comic[1]) opf.create_spine(spine) toc = TOC() if len(comics) == 1: wrappers = comics[0][2] for i, x in enumerate(wrappers): toc.add_item(href(x), None, _('Page') + ' %d' % (i + 1), play_order=i) else: po = 0 for comic in comics: po += 1 wrappers = comic[2] stoc = toc.add_item(href(wrappers[0]), None, comic[0], play_order=po) if not opts.dont_add_comic_pages_to_toc: for i, x in enumerate(wrappers): stoc.add_item(href(x), None, _('Page') + ' %d' % (i + 1), play_order=po) po += 1 opf.set_toc(toc) m, n = open(u'metadata.opf', 'wb'), open('toc.ncx', 'wb') opf.render(m, n, u'toc.ncx') return os.path.abspath(u'metadata.opf')
def convert(self, stream, opts, file_ext, log, accelerators): from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.toc import TOC self.opts, self.log = opts, log if file_ext == 'cbc': comics_ = self.get_comics_from_collection(stream) else: comics_ = [['Comic', os.path.abspath(stream.name)]] stream.close() comics = [] num_pages_per_comic = [] for i, x in enumerate(comics_): title, fname = x cdir = 'comic_%d' % (i + 1) if len(comics_) > 1 else '.' cdir = os.path.abspath(cdir) if not os.path.exists(cdir): os.makedirs(cdir) pages = self.get_pages(fname, cdir) if not pages: continue num_pages_per_comic.append(len(pages)) if self.for_viewer: comics.append( (title, pages, [self.create_viewer_wrapper(pages, cdir)])) else: wrappers = self.create_wrappers(pages) comics.append((title, pages, wrappers)) if not comics: raise ValueError('No comic pages found in %s' % stream.name) mi = MetaInformation( os.path.basename(stream.name).rpartition('.')[0], [_('Unknown')]) opf = OPFCreator(os.getcwd(), mi) entries = [] def href(x): if len(comics) == 1: return os.path.basename(x) return '/'.join(x.split(os.sep)[-2:]) cover_href = None for comic in comics: pages, wrappers = comic[1:] page_entries = [(x, None) for x in map(href, pages)] entries += [(w, None) for w in map(href, wrappers)] + page_entries if cover_href is None and page_entries: cover_href = page_entries[0][0] opf.create_manifest(entries) spine = [] for comic in comics: spine.extend(map(href, comic[2])) self._images = [] for comic in comics: self._images.extend(comic[1]) opf.create_spine(spine) if self.for_viewer and cover_href: if os.path.isabs(cover_href): cover_href = os.path.relpath(cover_href).replace(os.sep, '/') opf.guide.set_cover(cover_href) toc = TOC() if len(comics) == 1: wrappers = comics[0][2] if self.for_viewer: wrapper_page_href = href(wrappers[0]) for i in range(num_pages_per_comic[0]): toc.add_item('{}#page_{}'.format(wrapper_page_href, i + 1), None, _('Page') + ' %d' % (i + 1), play_order=i) else: for i, x in enumerate(wrappers): toc.add_item(href(x), None, _('Page') + ' %d' % (i + 1), play_order=i) else: po = 0 for num_pages, comic in zip(num_pages_per_comic, comics): po += 1 wrappers = comic[2] stoc = toc.add_item(href(wrappers[0]), None, comic[0], play_order=po) if not opts.dont_add_comic_pages_to_toc: if self.for_viewer: wrapper_page_href = href(wrappers[0]) for i in range(num_pages): stoc.add_item('{}#page_{}'.format( wrapper_page_href, i + 1), None, _('Page') + ' %d' % (i + 1), play_order=po) po += 1 else: for i, x in enumerate(wrappers): stoc.add_item(href(x), None, _('Page') + ' %d' % (i + 1), play_order=po) po += 1 opf.set_toc(toc) with open('metadata.opf', 'wb') as m, open('toc.ncx', 'wb') as n: opf.render(m, n, 'toc.ncx') return os.path.abspath('metadata.opf')
def opf_writer(path, opf_name, manifest, spine, mi): opf = OPFCreator(path, mi) opf.create_manifest(manifest) opf.create_spine(spine) with open(os.path.join(path, opf_name), 'wb') as opffile: opf.render(opffile)
def write_opf(self, guide, toc, spine, resource_map): mi = self.header.exth.mi if (self.cover_offset is not None and self.cover_offset < len(resource_map)): mi.cover = resource_map[self.cover_offset] if len(list(toc)) < 2: self.log.warn('KF8 has no metadata Table of Contents') for ref in guide: if ref.type == 'toc': href = ref.href() href, frag = urldefrag(href) if os.path.exists(href.replace('/', os.sep)): try: toc = self.read_inline_toc(href, frag) except: self.log.exception('Failed to read inline ToC') opf = OPFCreator(os.getcwdu(), mi) opf.guide = guide def exclude(path): return os.path.basename(path) == 'debug-raw.html' # If there are no images then the azw3 input plugin dumps all # binary records as .unknown images, remove them if self.for_tweak and os.path.exists('images') and os.path.isdir( 'images'): files = os.listdir('images') unknown = [x for x in files if x.endswith('.unknown')] if len(files) == len(unknown): [os.remove('images/' + f) for f in files] if self.for_tweak: try: os.remove('debug-raw.html') except: pass opf.create_manifest_from_files_in([os.getcwdu()], exclude=exclude) for entry in opf.manifest: if entry.mime_type == 'text/html': entry.mime_type = 'application/xhtml+xml' opf.create_spine(spine) opf.set_toc(toc) ppd = getattr(self.header.exth, 'page_progression_direction', None) if ppd in {'ltr', 'rtl', 'default'}: opf.page_progression_direction = ppd with open('metadata.opf', 'wb') as of, open('toc.ncx', 'wb') as ncx: opf.render(of, ncx, 'toc.ncx') return 'metadata.opf'
def create_opf(self, htmlfile, guide=None, root=None): mi = getattr(self.book_header.exth, 'mi', self.embedded_mi) if mi is None: mi = MetaInformation(self.book_header.title, [_('Unknown')]) opf = OPFCreator(os.path.dirname(htmlfile), mi) if hasattr(self.book_header.exth, 'cover_offset'): opf.cover = u'images/%05d.jpg' % (self.book_header.exth.cover_offset + 1) elif mi.cover is not None: opf.cover = mi.cover else: opf.cover = u'images/%05d.jpg' % 1 if not os.path.exists(os.path.join(os.path.dirname(htmlfile), * opf.cover.split('/'))): opf.cover = None cover = opf.cover cover_copied = None if cover is not None: cover = cover.replace('/', os.sep) if os.path.exists(cover): ncover = u'images'+os.sep+u'calibre_cover.jpg' if os.path.exists(ncover): os.remove(ncover) shutil.copyfile(cover, ncover) cover_copied = os.path.abspath(ncover) opf.cover = ncover.replace(os.sep, '/') manifest = [(htmlfile, 'application/xhtml+xml'), (os.path.abspath(u'styles.css'), 'text/css')] bp = os.path.dirname(htmlfile) added = set([]) for i in getattr(self, 'image_names', []): path = os.path.join(bp, 'images', i) added.add(path) manifest.append((path, 'image/jpeg')) if cover_copied is not None: manifest.append((cover_copied, 'image/jpeg')) opf.create_manifest(manifest) opf.create_spine([os.path.basename(htmlfile)]) toc = None if guide is not None: opf.create_guide(guide) for ref in opf.guide: if ref.type.lower() == 'toc': toc = ref.href() ncx_manifest_entry = None if toc: ncx_manifest_entry = 'toc.ncx' elems = root.xpath('//*[@id="%s"]' % toc.partition('#')[-1]) tocobj = None ent_pat = re.compile(r'&(\S+?);') if elems: tocobj = TOC() found = False reached = False for x in root.iter(): if x == elems[-1]: reached = True continue if reached and x.tag == 'a': href = x.get('href', '') if href and re.match('\\w+://', href) is None: try: text = u' '.join([t.strip() for t in x.xpath('descendant::text()')]) except: text = '' text = ent_pat.sub(entity_to_unicode, text) item = tocobj.add_item(toc.partition('#')[0], href[1:], text) item.left_space = int(self.get_left_whitespace(x)) found = True if reached and found and x.get('class', None) == 'mbp_pagebreak': break if tocobj is not None: tocobj = self.structure_toc(tocobj) opf.set_toc(tocobj) return opf, ncx_manifest_entry
def convert(self, stream, opts, file_ext, log, accelerators): from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.toc import TOC self.opts, self.log= opts, log if file_ext == 'cbc': comics_ = self.get_comics_from_collection(stream) else: comics_ = [['Comic', os.path.abspath(stream.name)]] stream.close() comics = [] for i, x in enumerate(comics_): title, fname = x cdir = u'comic_%d'%(i+1) if len(comics_) > 1 else u'.' cdir = os.path.abspath(cdir) if not os.path.exists(cdir): os.makedirs(cdir) pages = self.get_pages(fname, cdir) if not pages: continue wrappers = self.create_wrappers(pages) comics.append((title, pages, wrappers)) if not comics: raise ValueError('No comic pages found in %s'%stream.name) mi = MetaInformation(os.path.basename(stream.name).rpartition('.')[0], [_('Unknown')]) opf = OPFCreator(os.getcwdu(), mi) entries = [] def href(x): if len(comics) == 1: return os.path.basename(x) return '/'.join(x.split(os.sep)[-2:]) for comic in comics: pages, wrappers = comic[1:] entries += [(w, None) for w in map(href, wrappers)] + \ [(x, None) for x in map(href, pages)] opf.create_manifest(entries) spine = [] for comic in comics: spine.extend(map(href, comic[2])) self._images = [] for comic in comics: self._images.extend(comic[1]) opf.create_spine(spine) toc = TOC() if len(comics) == 1: wrappers = comics[0][2] for i, x in enumerate(wrappers): toc.add_item(href(x), None, _('Page')+' %d'%(i+1), play_order=i) else: po = 0 for comic in comics: po += 1 wrappers = comic[2] stoc = toc.add_item(href(wrappers[0]), None, comic[0], play_order=po) if not opts.dont_add_comic_pages_to_toc: for i, x in enumerate(wrappers): stoc.add_item(href(x), None, _('Page')+' %d'%(i+1), play_order=po) po += 1 opf.set_toc(toc) m, n = open(u'metadata.opf', 'wb'), open('toc.ncx', 'wb') opf.render(m, n, u'toc.ncx') return os.path.abspath(u'metadata.opf')
def convert(self, stream, options, file_ext, log, accelerators): from lxml import etree from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException from calibre.ebooks.rtf.input import InlineClass from calibre.utils.xml_parse import safe_xml_fromstring self.opts = options self.log = log self.log('Converting RTF to XML...') try: xml = self.generate_xml(stream.name) except RtfInvalidCodeException as e: self.log.exception('Unable to parse RTF') raise ValueError( _('This RTF file has a feature calibre does not ' 'support. Convert it to HTML first and then try it.\n%s') % e) d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf')) if d: imap = {} try: imap = self.extract_images(d[0]) except: self.log.exception('Failed to extract images...') self.log('Parsing XML...') doc = safe_xml_fromstring(xml) border_styles = self.convert_borders(doc) for pict in doc.xpath( '//rtf:pict[@num]', namespaces={'rtf': 'http://rtf2xml.sourceforge.net/'}): num = int(pict.get('num')) name = imap.get(num, None) if name is not None: pict.set('num', name) self.log('Converting XML to HTML...') inline_class = InlineClass(self.log) styledoc = safe_xml_fromstring(P('templates/rtf.xsl', data=True), recover=False) extensions = {('calibre', 'inline-class'): inline_class} transform = etree.XSLT(styledoc, extensions=extensions) result = transform(doc) html = 'index.xhtml' with open(html, 'wb') as f: res = as_bytes(transform.tostring(result)) # res = res[:100].replace('xmlns:html', 'xmlns') + res[100:] # clean multiple \n res = re.sub(b'\n+', b'\n', res) # Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines # res = re.sub('\s*<body>', '<body>', res) # res = re.sub('(?<=\n)\n{2}', # u'<p>\u00a0</p>\n'.encode('utf-8'), res) f.write(res) self.write_inline_css(inline_class, border_styles) stream.seek(0) mi = get_metadata(stream, 'rtf') if not mi.title: mi.title = _('Unknown') if not mi.authors: mi.authors = [_('Unknown')] opf = OPFCreator(os.getcwd(), mi) opf.create_manifest([('index.xhtml', None)]) opf.create_spine(['index.xhtml']) opf.render(open('metadata.opf', 'wb')) return os.path.abspath('metadata.opf')
def write_opf(self, guide, toc, spine, resource_map): mi = self.header.exth.mi if (self.cover_offset is not None and self.cover_offset < len(resource_map)): mi.cover = resource_map[self.cover_offset] if len(list(toc)) < 2: self.log.warn('KF8 has no metadata Table of Contents') for ref in guide: if ref.type == 'toc': href = ref.href() href, frag = urldefrag(href) if os.path.exists(href.replace('/', os.sep)): try: toc = self.read_inline_toc(href, frag) except: self.log.exception('Failed to read inline ToC') opf = OPFCreator(os.getcwdu(), mi) opf.guide = guide def exclude(path): return os.path.basename(path) == 'debug-raw.html' # If there are no images then the azw3 input plugin dumps all # binary records as .unknown images, remove them if self.for_tweak and os.path.exists('images') and os.path.isdir('images'): files = os.listdir('images') unknown = [x for x in files if x.endswith('.unknown')] if len(files) == len(unknown): [os.remove('images/'+f) for f in files] if self.for_tweak: try: os.remove('debug-raw.html') except: pass opf.create_manifest_from_files_in([os.getcwdu()], exclude=exclude) for entry in opf.manifest: if entry.mime_type == 'text/html': entry.mime_type = 'application/xhtml+xml' opf.create_spine(spine) opf.set_toc(toc) ppd = getattr(self.header.exth, 'page_progression_direction', None) if ppd in {'ltr', 'rtl', 'default'}: opf.page_progression_direction = ppd with open('metadata.opf', 'wb') as of, open('toc.ncx', 'wb') as ncx: opf.render(of, ncx, 'toc.ncx') return 'metadata.opf'