def structure_toc(entries): indent_vals = sorted({x.indent for x in entries}) last_found = [None for i in indent_vals] newtoc = TOC() if len(indent_vals) > 6: for x in entries: newtoc.add_item('index.html', x.anchor, x.text) return newtoc def find_parent(level): candidates = last_found[:level] for x in reversed(candidates): if x is not None: return x return newtoc for item in entries: level = indent_vals.index(item.indent) parent = find_parent(level) last_found[level] = parent.add_item('index.html', item.anchor, item.text) for i in xrange(level+1, len(last_found)): last_found[i] = None return newtoc
def structure_toc(entries): indent_vals = sorted({x.indent for x in entries}) last_found = [None for i in indent_vals] newtoc = TOC() if len(indent_vals) > 6: for x in entries: newtoc.add_item('index.html', x.anchor, x.text) return newtoc def find_parent(level): candidates = last_found[:level] for x in reversed(candidates): if x is not None: return x return newtoc for item in entries: level = indent_vals.index(item.indent) parent = find_parent(level) last_found[level] = parent.add_item('index.html', item.anchor, item.text) for i in range(level+1, len(last_found)): last_found[i] = None return newtoc
def _parse_toc(self, ul, basedir=os.getcwdu()): toc = TOC(play_order=self._playorder, base_path=basedir, text='') self._playorder += 1 for li in ul('li', recursive=False): href = li.object('param', {'name': 'Local'})[0]['value'] if href.count('#'): href, frag = href.split('#') else: frag = None name = self._deentity(li.object('param', {'name': 'Name'})[0]['value']) #print "========>", name toc.add_item(href, frag, name, play_order=self._playorder) self._playorder += 1 if li.ul: child = self._parse_toc(li.ul) child.parent = toc toc.append(child) #print toc return toc
def _parse_toc(self, ul, basedir=getcwd()): toc = TOC(play_order=self._playorder, base_path=basedir, text='') self._playorder += 1 for li in ul('li', recursive=False): href = li.object('param', {'name': 'Local'})[0]['value'] if href.count('#'): href, frag = href.split('#') else: frag = None name = self._deentity(li.object('param', {'name': 'Name'})[0]['value']) # print "========>", name toc.add_item(href, frag, name, play_order=self._playorder) self._playorder += 1 if li.ul: child = self._parse_toc(li.ul) child.parent = toc toc.append(child) # print toc return toc
def convert(self, stream, opts, file_ext, log, accelerators): from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.toc import TOC self.opts, self.log= opts, log if file_ext == 'cbc': comics_ = self.get_comics_from_collection(stream) else: comics_ = [['Comic', os.path.abspath(stream.name)]] stream.close() comics = [] for i, x in enumerate(comics_): title, fname = x cdir = u'comic_%d'%(i+1) if len(comics_) > 1 else u'.' cdir = os.path.abspath(cdir) if not os.path.exists(cdir): os.makedirs(cdir) pages = self.get_pages(fname, cdir) if not pages: continue wrappers = self.create_wrappers(pages) comics.append((title, pages, wrappers)) if not comics: raise ValueError('No comic pages found in %s'%stream.name) mi = MetaInformation(os.path.basename(stream.name).rpartition('.')[0], [_('Unknown')]) opf = OPFCreator(os.getcwdu(), mi) entries = [] def href(x): if len(comics) == 1: return os.path.basename(x) return '/'.join(x.split(os.sep)[-2:]) for comic in comics: pages, wrappers = comic[1:] entries += [(w, None) for w in map(href, wrappers)] + \ [(x, None) for x in map(href, pages)] opf.create_manifest(entries) spine = [] for comic in comics: spine.extend(map(href, comic[2])) self._images = [] for comic in comics: self._images.extend(comic[1]) opf.create_spine(spine) toc = TOC() if len(comics) == 1: wrappers = comics[0][2] for i, x in enumerate(wrappers): toc.add_item(href(x), None, _('Page')+' %d'%(i+1), play_order=i) else: po = 0 for comic in comics: po += 1 wrappers = comic[2] stoc = toc.add_item(href(wrappers[0]), None, comic[0], play_order=po) if not opts.dont_add_comic_pages_to_toc: for i, x in enumerate(wrappers): stoc.add_item(href(x), None, _('Page')+' %d'%(i+1), play_order=po) po += 1 opf.set_toc(toc) m, n = open(u'metadata.opf', 'wb'), open('toc.ncx', 'wb') opf.render(m, n, u'toc.ncx') return os.path.abspath(u'metadata.opf')
def convert(self, stream, opts, file_ext, log, accelerators): from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.toc import TOC self.opts, self.log = opts, log if file_ext == 'cbc': comics_ = self.get_comics_from_collection(stream) else: comics_ = [['Comic', os.path.abspath(stream.name)]] stream.close() comics = [] num_pages_per_comic = [] for i, x in enumerate(comics_): title, fname = x cdir = 'comic_%d' % (i + 1) if len(comics_) > 1 else '.' cdir = os.path.abspath(cdir) if not os.path.exists(cdir): os.makedirs(cdir) pages = self.get_pages(fname, cdir) if not pages: continue num_pages_per_comic.append(len(pages)) if self.for_viewer: comics.append( (title, pages, [self.create_viewer_wrapper(pages, cdir)])) else: wrappers = self.create_wrappers(pages) comics.append((title, pages, wrappers)) if not comics: raise ValueError('No comic pages found in %s' % stream.name) mi = MetaInformation( os.path.basename(stream.name).rpartition('.')[0], [_('Unknown')]) opf = OPFCreator(os.getcwd(), mi) entries = [] def href(x): if len(comics) == 1: return os.path.basename(x) return '/'.join(x.split(os.sep)[-2:]) cover_href = None for comic in comics: pages, wrappers = comic[1:] page_entries = [(x, None) for x in map(href, pages)] entries += [(w, None) for w in map(href, wrappers)] + page_entries if cover_href is None and page_entries: cover_href = page_entries[0][0] opf.create_manifest(entries) spine = [] for comic in comics: spine.extend(map(href, comic[2])) self._images = [] for comic in comics: self._images.extend(comic[1]) opf.create_spine(spine) if self.for_viewer and cover_href: if os.path.isabs(cover_href): cover_href = os.path.relpath(cover_href).replace(os.sep, '/') opf.guide.set_cover(cover_href) toc = TOC() if len(comics) == 1: wrappers = comics[0][2] if self.for_viewer: wrapper_page_href = href(wrappers[0]) for i in range(num_pages_per_comic[0]): toc.add_item('{}#page_{}'.format(wrapper_page_href, i + 1), None, _('Page') + ' %d' % (i + 1), play_order=i) else: for i, x in enumerate(wrappers): toc.add_item(href(x), None, _('Page') + ' %d' % (i + 1), play_order=i) else: po = 0 for num_pages, comic in zip(num_pages_per_comic, comics): po += 1 wrappers = comic[2] stoc = toc.add_item(href(wrappers[0]), None, comic[0], play_order=po) if not opts.dont_add_comic_pages_to_toc: if self.for_viewer: wrapper_page_href = href(wrappers[0]) for i in range(num_pages): stoc.add_item('{}#page_{}'.format( wrapper_page_href, i + 1), None, _('Page') + ' %d' % (i + 1), play_order=po) po += 1 else: for i, x in enumerate(wrappers): stoc.add_item(href(x), None, _('Page') + ' %d' % (i + 1), play_order=po) po += 1 opf.set_toc(toc) with open('metadata.opf', 'wb') as m, open('toc.ncx', 'wb') as n: opf.render(m, n, 'toc.ncx') return os.path.abspath('metadata.opf')
def get_toc(self): ''' Toc can have up to 5 levels, 0 - 4 inclusive. This function will add items to their appropriate depth in the TOC tree. If the specified depth is invalid (item would not have a valid parent) add it to the next valid level above the specified level. ''' # Base toc object all items will be added to. n_toc = TOC() # Used to track nodes in the toc so we can add # sub items to the appropriate place in tree. t_l0 = None t_l1 = None t_l2 = None t_l3 = None for level, (href, id, text) in self.toc: if level == '0': t_l0 = n_toc.add_item(href, id, text) t_l1 = None t_l2 = None t_l3 = None elif level == '1': if t_l0 is None: t_l0 = n_toc t_l1 = t_l0.add_item(href, id, text) t_l2 = None t_l3 = None elif level == '2': if t_l1 is None: if t_l0 is None: t_l1 = n_toc else: t_l1 = t_l0 t_l2 = t_l1.add_item(href, id, text) t_l3 = None elif level == '3': if t_l2 is None: if t_l1 is None: if t_l0 is None: t_l2 = n_toc else: t_l2 = t_l0 else: t_l2 = t_l1 t_l3 = t_l2.add_item(href, id, text) # Level 4. # Anything above 4 is invalid but we will count # it as level 4. else: if t_l3 is None: if t_l2 is None: if t_l1 is None: if t_l0 is None: t_l3 = n_toc else: t_l3 = t_l0 else: t_l3 = t_l1 else: t_l3 = t_l2 t_l3.add_item(href, id, text) return n_toc
def get_toc(self): ''' Toc can have up to 5 levels, 0 - 4 inclusive. This function will add items to their appropriate depth in the TOC tree. If the specified depth is invalid (item would not have a valid parent) add it to the next valid level above the specified level. ''' # Base toc object all items will be added to. n_toc = TOC() # Used to track nodes in the toc so we can add # sub items to the appropriate place in tree. t_l0 = None t_l1 = None t_l2 = None t_l3 = None for level, (href, id, text) in self.toc: if level == u'0': t_l0 = n_toc.add_item(href, id, text) t_l1 = None t_l2 = None t_l3 = None elif level == u'1': if t_l0 is None: t_l0 = n_toc t_l1 = t_l0.add_item(href, id, text) t_l2 = None t_l3 = None elif level == u'2': if t_l1 is None: if t_l0 is None: t_l1 = n_toc else: t_l1 = t_l0 t_l2 = t_l1.add_item(href, id, text) t_l3 = None elif level == u'3': if t_l2 is None: if t_l1 is None: if t_l0 is None: t_l2 = n_toc else: t_l2 = t_l0 else: t_l2 = t_l1 t_l3 = t_l2.add_item(href, id, text) # Level 4. # Anything above 4 is invalid but we will count # it as level 4. else: if t_l3 is None: if t_l2 is None: if t_l1 is None: if t_l0 is None: t_l3 = n_toc else: t_l3 = t_l0 else: t_l3 = t_l1 else: t_l3 = t_l2 t_l3.add_item(href, id, text) return n_toc
def create_opf(self, htmlfile, guide=None, root=None): mi = getattr(self.book_header.exth, 'mi', self.embedded_mi) if mi is None: mi = MetaInformation(self.book_header.title, [_('Unknown')]) opf = OPFCreator(os.path.dirname(htmlfile), mi) if hasattr(self.book_header.exth, 'cover_offset'): opf.cover = u'images/%05d.jpg' % (self.book_header.exth.cover_offset + 1) elif mi.cover is not None: opf.cover = mi.cover else: opf.cover = u'images/%05d.jpg' % 1 if not os.path.exists(os.path.join(os.path.dirname(htmlfile), * opf.cover.split('/'))): opf.cover = None cover = opf.cover cover_copied = None if cover is not None: cover = cover.replace('/', os.sep) if os.path.exists(cover): ncover = u'images'+os.sep+u'calibre_cover.jpg' if os.path.exists(ncover): os.remove(ncover) shutil.copyfile(cover, ncover) cover_copied = os.path.abspath(ncover) opf.cover = ncover.replace(os.sep, '/') manifest = [(htmlfile, 'application/xhtml+xml'), (os.path.abspath(u'styles.css'), 'text/css')] bp = os.path.dirname(htmlfile) added = set([]) for i in getattr(self, 'image_names', []): path = os.path.join(bp, 'images', i) added.add(path) manifest.append((path, 'image/jpeg')) if cover_copied is not None: manifest.append((cover_copied, 'image/jpeg')) opf.create_manifest(manifest) opf.create_spine([os.path.basename(htmlfile)]) toc = None if guide is not None: opf.create_guide(guide) for ref in opf.guide: if ref.type.lower() == 'toc': toc = ref.href() ncx_manifest_entry = None if toc: ncx_manifest_entry = 'toc.ncx' elems = root.xpath('//*[@id="%s"]' % toc.partition('#')[-1]) tocobj = None ent_pat = re.compile(r'&(\S+?);') if elems: tocobj = TOC() found = False reached = False for x in root.iter(): if x == elems[-1]: reached = True continue if reached and x.tag == 'a': href = x.get('href', '') if href and re.match('\\w+://', href) is None: try: text = u' '.join([t.strip() for t in x.xpath('descendant::text()')]) except: text = '' text = ent_pat.sub(entity_to_unicode, text) item = tocobj.add_item(toc.partition('#')[0], href[1:], text) item.left_space = int(self.get_left_whitespace(x)) found = True if reached and found and x.get('class', None) == 'mbp_pagebreak': break if tocobj is not None: tocobj = self.structure_toc(tocobj) opf.set_toc(tocobj) return opf, ncx_manifest_entry
def create_opf(self, htmlfile, guide=None, root=None): mi = getattr(self.book_header.exth, 'mi', self.embedded_mi) if mi is None: mi = MetaInformation(self.book_header.title, [_('Unknown')]) opf = OPFCreator(os.path.dirname(htmlfile), mi) if hasattr(self.book_header.exth, 'cover_offset'): opf.cover = u'images/%05d.jpg' % (self.book_header.exth.cover_offset + 1) elif mi.cover is not None: opf.cover = mi.cover else: opf.cover = u'images/%05d.jpg' % 1 if not os.path.exists(os.path.join(os.path.dirname(htmlfile), * opf.cover.split('/'))): opf.cover = None cover = opf.cover cover_copied = None if cover is not None: cover = cover.replace('/', os.sep) if os.path.exists(cover): ncover = u'images'+os.sep+u'calibre_cover.jpg' if os.path.exists(ncover): os.remove(ncover) shutil.copyfile(cover, ncover) cover_copied = os.path.abspath(ncover) opf.cover = ncover.replace(os.sep, '/') manifest = [(htmlfile, 'application/xhtml+xml'), (os.path.abspath(u'styles.css'), 'text/css')] bp = os.path.dirname(htmlfile) added = set([]) for i in getattr(self, 'image_names', []): path = os.path.join(bp, 'images', i) added.add(path) manifest.append((path, 'image/jpeg')) if cover_copied is not None: manifest.append((cover_copied, 'image/jpeg')) opf.create_manifest(manifest) opf.create_spine([os.path.basename(htmlfile)]) toc = None if guide is not None: opf.create_guide(guide) for ref in opf.guide: if ref.type.lower() == 'toc': toc = ref.href() ncx_manifest_entry = None if toc: ncx_manifest_entry = 'toc.ncx' elems = root.xpath('//*[@id="%s"]' % toc.partition('#')[-1]) tocobj = None ent_pat = re.compile(r'&(\S+?);') if elems: tocobj = TOC() found = False reached = False for x in root.iter(): if x == elems[-1]: reached = True continue if reached and x.tag == 'a': href = x.get('href', '') if href and re.match('\w+://', href) is None: try: text = u' '.join([t.strip() for t in x.xpath('descendant::text()')]) except: text = '' text = ent_pat.sub(entity_to_unicode, text) item = tocobj.add_item(toc.partition('#')[0], href[1:], text) item.left_space = int(self.get_left_whitespace(x)) found = True if reached and found and x.get('class', None) == 'mbp_pagebreak': break if tocobj is not None: tocobj = self.structure_toc(tocobj) opf.set_toc(tocobj) return opf, ncx_manifest_entry
def convert(self, stream, opts, file_ext, log, accelerators): from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.toc import TOC self.opts, self.log = opts, log if file_ext == 'cbc': comics_ = self.get_comics_from_collection(stream) else: comics_ = [['Comic', os.path.abspath(stream.name)]] stream.close() comics = [] for i, x in enumerate(comics_): title, fname = x cdir = u'comic_%d' % (i + 1) if len(comics_) > 1 else u'.' cdir = os.path.abspath(cdir) if not os.path.exists(cdir): os.makedirs(cdir) pages = self.get_pages(fname, cdir) if not pages: continue wrappers = self.create_wrappers(pages) comics.append((title, pages, wrappers)) if not comics: raise ValueError('No comic pages found in %s' % stream.name) mi = MetaInformation( os.path.basename(stream.name).rpartition('.')[0], [_('Unknown')]) opf = OPFCreator(os.getcwdu(), mi) entries = [] def href(x): if len(comics) == 1: return os.path.basename(x) return '/'.join(x.split(os.sep)[-2:]) for comic in comics: pages, wrappers = comic[1:] entries += [(w, None) for w in map(href, wrappers)] + \ [(x, None) for x in map(href, pages)] opf.create_manifest(entries) spine = [] for comic in comics: spine.extend(map(href, comic[2])) self._images = [] for comic in comics: self._images.extend(comic[1]) opf.create_spine(spine) toc = TOC() if len(comics) == 1: wrappers = comics[0][2] for i, x in enumerate(wrappers): toc.add_item(href(x), None, _('Page') + ' %d' % (i + 1), play_order=i) else: po = 0 for comic in comics: po += 1 wrappers = comic[2] stoc = toc.add_item(href(wrappers[0]), None, comic[0], play_order=po) if not opts.dont_add_comic_pages_to_toc: for i, x in enumerate(wrappers): stoc.add_item(href(x), None, _('Page') + ' %d' % (i + 1), play_order=po) po += 1 opf.set_toc(toc) m, n = open(u'metadata.opf', 'wb'), open('toc.ncx', 'wb') opf.render(m, n, u'toc.ncx') return os.path.abspath(u'metadata.opf')