Exemplo n.º 1
0
def structure_toc(entries):
    indent_vals = sorted({x.indent for x in entries})
    last_found = [None for i in indent_vals]
    newtoc = TOC()

    if len(indent_vals) > 6:
        for x in entries:
            newtoc.add_item('index.html', x.anchor, x.text)
        return newtoc

    def find_parent(level):
        candidates = last_found[:level]
        for x in reversed(candidates):
            if x is not None:
                return x
        return newtoc

    for item in entries:
        level = indent_vals.index(item.indent)
        parent = find_parent(level)
        last_found[level] = parent.add_item('index.html', item.anchor,
                    item.text)
        for i in xrange(level+1, len(last_found)):
            last_found[i] = None

    return newtoc
Exemplo n.º 2
0
def structure_toc(entries):
    indent_vals = sorted({x.indent for x in entries})
    last_found = [None for i in indent_vals]
    newtoc = TOC()

    if len(indent_vals) > 6:
        for x in entries:
            newtoc.add_item('index.html', x.anchor, x.text)
        return newtoc

    def find_parent(level):
        candidates = last_found[:level]
        for x in reversed(candidates):
            if x is not None:
                return x
        return newtoc

    for item in entries:
        level = indent_vals.index(item.indent)
        parent = find_parent(level)
        last_found[level] = parent.add_item('index.html', item.anchor,
                    item.text)
        for i in range(level+1, len(last_found)):
            last_found[i] = None

    return newtoc
Exemplo n.º 3
0
 def _parse_toc(self, ul, basedir=os.getcwdu()):
     toc = TOC(play_order=self._playorder, base_path=basedir, text='')
     self._playorder += 1
     for li in ul('li', recursive=False):
         href = li.object('param', {'name': 'Local'})[0]['value']
         if href.count('#'):
             href, frag = href.split('#')
         else:
             frag = None
         name = self._deentity(li.object('param', {'name': 'Name'})[0]['value'])
         #print "========>", name
         toc.add_item(href, frag, name, play_order=self._playorder)
         self._playorder += 1
         if li.ul:
            child = self._parse_toc(li.ul)
            child.parent = toc
            toc.append(child)
     #print toc
     return toc
Exemplo n.º 4
0
 def _parse_toc(self, ul, basedir=getcwd()):
     toc = TOC(play_order=self._playorder, base_path=basedir, text='')
     self._playorder += 1
     for li in ul('li', recursive=False):
         href = li.object('param', {'name': 'Local'})[0]['value']
         if href.count('#'):
             href, frag = href.split('#')
         else:
             frag = None
         name = self._deentity(li.object('param', {'name': 'Name'})[0]['value'])
         # print "========>", name
         toc.add_item(href, frag, name, play_order=self._playorder)
         self._playorder += 1
         if li.ul:
             child = self._parse_toc(li.ul)
             child.parent = toc
             toc.append(child)
     # print toc
     return toc
Exemplo n.º 5
0
    def convert(self, stream, opts, file_ext, log, accelerators):
        from calibre.ebooks.metadata import MetaInformation
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.metadata.toc import TOC

        self.opts, self.log= opts, log
        if file_ext == 'cbc':
            comics_ = self.get_comics_from_collection(stream)
        else:
            comics_ = [['Comic', os.path.abspath(stream.name)]]
        stream.close()
        comics = []
        for i, x in enumerate(comics_):
            title, fname = x
            cdir = u'comic_%d'%(i+1) if len(comics_) > 1 else u'.'
            cdir = os.path.abspath(cdir)
            if not os.path.exists(cdir):
                os.makedirs(cdir)
            pages = self.get_pages(fname, cdir)
            if not pages: continue
            wrappers = self.create_wrappers(pages)
            comics.append((title, pages, wrappers))

        if not comics:
            raise ValueError('No comic pages found in %s'%stream.name)

        mi  = MetaInformation(os.path.basename(stream.name).rpartition('.')[0],
            [_('Unknown')])
        opf = OPFCreator(os.getcwdu(), mi)
        entries = []

        def href(x):
            if len(comics) == 1: return os.path.basename(x)
            return '/'.join(x.split(os.sep)[-2:])

        for comic in comics:
            pages, wrappers = comic[1:]
            entries += [(w, None) for w in map(href, wrappers)] + \
                    [(x, None) for x in map(href, pages)]
        opf.create_manifest(entries)
        spine = []
        for comic in comics:
            spine.extend(map(href, comic[2]))
        self._images = []
        for comic in comics:
            self._images.extend(comic[1])
        opf.create_spine(spine)
        toc = TOC()
        if len(comics) == 1:
            wrappers = comics[0][2]
            for i, x in enumerate(wrappers):
                toc.add_item(href(x), None, _('Page')+' %d'%(i+1),
                        play_order=i)
        else:
            po = 0
            for comic in comics:
                po += 1
                wrappers = comic[2]
                stoc = toc.add_item(href(wrappers[0]),
                        None, comic[0], play_order=po)
                if not opts.dont_add_comic_pages_to_toc:
                    for i, x in enumerate(wrappers):
                        stoc.add_item(href(x), None,
                                _('Page')+' %d'%(i+1), play_order=po)
                        po += 1
        opf.set_toc(toc)
        m, n = open(u'metadata.opf', 'wb'), open('toc.ncx', 'wb')
        opf.render(m, n, u'toc.ncx')
        return os.path.abspath(u'metadata.opf')
Exemplo n.º 6
0
    def convert(self, stream, opts, file_ext, log, accelerators):
        from calibre.ebooks.metadata import MetaInformation
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.metadata.toc import TOC

        self.opts, self.log = opts, log
        if file_ext == 'cbc':
            comics_ = self.get_comics_from_collection(stream)
        else:
            comics_ = [['Comic', os.path.abspath(stream.name)]]
        stream.close()
        comics = []
        num_pages_per_comic = []
        for i, x in enumerate(comics_):
            title, fname = x
            cdir = 'comic_%d' % (i + 1) if len(comics_) > 1 else '.'
            cdir = os.path.abspath(cdir)
            if not os.path.exists(cdir):
                os.makedirs(cdir)
            pages = self.get_pages(fname, cdir)
            if not pages:
                continue
            num_pages_per_comic.append(len(pages))
            if self.for_viewer:
                comics.append(
                    (title, pages, [self.create_viewer_wrapper(pages, cdir)]))
            else:
                wrappers = self.create_wrappers(pages)
                comics.append((title, pages, wrappers))

        if not comics:
            raise ValueError('No comic pages found in %s' % stream.name)

        mi = MetaInformation(
            os.path.basename(stream.name).rpartition('.')[0], [_('Unknown')])
        opf = OPFCreator(os.getcwd(), mi)
        entries = []

        def href(x):
            if len(comics) == 1:
                return os.path.basename(x)
            return '/'.join(x.split(os.sep)[-2:])

        cover_href = None
        for comic in comics:
            pages, wrappers = comic[1:]
            page_entries = [(x, None) for x in map(href, pages)]
            entries += [(w, None) for w in map(href, wrappers)] + page_entries
            if cover_href is None and page_entries:
                cover_href = page_entries[0][0]
        opf.create_manifest(entries)
        spine = []
        for comic in comics:
            spine.extend(map(href, comic[2]))
        self._images = []
        for comic in comics:
            self._images.extend(comic[1])
        opf.create_spine(spine)
        if self.for_viewer and cover_href:
            if os.path.isabs(cover_href):
                cover_href = os.path.relpath(cover_href).replace(os.sep, '/')
            opf.guide.set_cover(cover_href)
        toc = TOC()
        if len(comics) == 1:
            wrappers = comics[0][2]
            if self.for_viewer:
                wrapper_page_href = href(wrappers[0])
                for i in range(num_pages_per_comic[0]):
                    toc.add_item('{}#page_{}'.format(wrapper_page_href, i + 1),
                                 None,
                                 _('Page') + ' %d' % (i + 1),
                                 play_order=i)

            else:
                for i, x in enumerate(wrappers):
                    toc.add_item(href(x),
                                 None,
                                 _('Page') + ' %d' % (i + 1),
                                 play_order=i)
        else:
            po = 0
            for num_pages, comic in zip(num_pages_per_comic, comics):
                po += 1
                wrappers = comic[2]
                stoc = toc.add_item(href(wrappers[0]),
                                    None,
                                    comic[0],
                                    play_order=po)
                if not opts.dont_add_comic_pages_to_toc:
                    if self.for_viewer:
                        wrapper_page_href = href(wrappers[0])
                        for i in range(num_pages):
                            stoc.add_item('{}#page_{}'.format(
                                wrapper_page_href, i + 1),
                                          None,
                                          _('Page') + ' %d' % (i + 1),
                                          play_order=po)
                            po += 1
                    else:
                        for i, x in enumerate(wrappers):
                            stoc.add_item(href(x),
                                          None,
                                          _('Page') + ' %d' % (i + 1),
                                          play_order=po)
                            po += 1
        opf.set_toc(toc)
        with open('metadata.opf', 'wb') as m, open('toc.ncx', 'wb') as n:
            opf.render(m, n, 'toc.ncx')
        return os.path.abspath('metadata.opf')
Exemplo n.º 7
0
    def get_toc(self):
        '''
        Toc can have up to 5 levels, 0 - 4 inclusive.

        This function will add items to their appropriate
        depth in the TOC tree. If the specified depth is
        invalid (item would not have a valid parent) add
        it to the next valid level above the specified
        level.
        '''
        # Base toc object all items will be added to.
        n_toc = TOC()
        # Used to track nodes in the toc so we can add
        # sub items to the appropriate place in tree.
        t_l0 = None
        t_l1 = None
        t_l2 = None
        t_l3 = None

        for level, (href, id, text) in self.toc:
            if level == '0':
                t_l0 = n_toc.add_item(href, id, text)
                t_l1 = None
                t_l2 = None
                t_l3 = None
            elif level == '1':
                if t_l0 is None:
                    t_l0 = n_toc
                t_l1 = t_l0.add_item(href, id, text)
                t_l2 = None
                t_l3 = None
            elif level == '2':
                if t_l1 is None:
                    if t_l0 is None:
                        t_l1 = n_toc
                    else:
                        t_l1 = t_l0
                t_l2 = t_l1.add_item(href, id, text)
                t_l3 = None
            elif level == '3':
                if t_l2 is None:
                    if t_l1 is None:
                        if t_l0 is None:
                            t_l2 = n_toc
                        else:
                            t_l2 = t_l0
                    else:
                        t_l2 = t_l1
                t_l3 = t_l2.add_item(href, id, text)
            # Level 4.
            # Anything above 4 is invalid but we will count
            # it as level 4.
            else:
                if t_l3 is None:
                    if t_l2 is None:
                        if t_l1 is None:
                            if t_l0 is None:
                                t_l3 = n_toc
                            else:
                                t_l3 = t_l0
                        else:
                            t_l3 = t_l1
                    else:
                        t_l3 = t_l2
                t_l3.add_item(href, id, text)

        return n_toc
Exemplo n.º 8
0
    def get_toc(self):
        '''
        Toc can have up to 5 levels, 0 - 4 inclusive.

        This function will add items to their appropriate
        depth in the TOC tree. If the specified depth is
        invalid (item would not have a valid parent) add
        it to the next valid level above the specified
        level.
        '''
        # Base toc object all items will be added to.
        n_toc = TOC()
        # Used to track nodes in the toc so we can add
        # sub items to the appropriate place in tree.
        t_l0 = None
        t_l1 = None
        t_l2 = None
        t_l3 = None

        for level, (href, id, text) in self.toc:
            if level == u'0':
                t_l0 = n_toc.add_item(href, id, text)
                t_l1 = None
                t_l2 = None
                t_l3 = None
            elif level == u'1':
                if t_l0 is None:
                    t_l0 = n_toc
                t_l1 = t_l0.add_item(href, id, text)
                t_l2 = None
                t_l3 = None
            elif level == u'2':
                if t_l1 is None:
                    if t_l0 is None:
                        t_l1 = n_toc
                    else:
                        t_l1 = t_l0
                t_l2 = t_l1.add_item(href, id, text)
                t_l3 = None
            elif level == u'3':
                if t_l2 is None:
                    if t_l1 is None:
                        if t_l0 is None:
                            t_l2 = n_toc
                        else:
                            t_l2 = t_l0
                    else:
                        t_l2 = t_l1
                t_l3 = t_l2.add_item(href, id, text)
            # Level 4.
            # Anything above 4 is invalid but we will count
            # it as level 4.
            else:
                if t_l3 is None:
                    if t_l2 is None:
                        if t_l1 is None:
                            if t_l0 is None:
                                t_l3 = n_toc
                            else:
                                t_l3 = t_l0
                        else:
                            t_l3 = t_l1
                    else:
                        t_l3 = t_l2
                t_l3.add_item(href, id, text)

        return n_toc
Exemplo n.º 9
0
    def create_opf(self, htmlfile, guide=None, root=None):
        mi = getattr(self.book_header.exth, 'mi', self.embedded_mi)
        if mi is None:
            mi = MetaInformation(self.book_header.title, [_('Unknown')])
        opf = OPFCreator(os.path.dirname(htmlfile), mi)
        if hasattr(self.book_header.exth, 'cover_offset'):
            opf.cover = u'images/%05d.jpg' % (self.book_header.exth.cover_offset + 1)
        elif mi.cover is not None:
            opf.cover = mi.cover
        else:
            opf.cover = u'images/%05d.jpg' % 1
            if not os.path.exists(os.path.join(os.path.dirname(htmlfile),
                * opf.cover.split('/'))):
                opf.cover = None

        cover = opf.cover
        cover_copied = None
        if cover is not None:
            cover = cover.replace('/', os.sep)
            if os.path.exists(cover):
                ncover = u'images'+os.sep+u'calibre_cover.jpg'
                if os.path.exists(ncover):
                    os.remove(ncover)
                shutil.copyfile(cover, ncover)
                cover_copied = os.path.abspath(ncover)
                opf.cover = ncover.replace(os.sep, '/')

        manifest = [(htmlfile, 'application/xhtml+xml'),
            (os.path.abspath(u'styles.css'), 'text/css')]
        bp = os.path.dirname(htmlfile)
        added = set([])
        for i in getattr(self, 'image_names', []):
            path = os.path.join(bp, 'images', i)
            added.add(path)
            manifest.append((path, 'image/jpeg'))
        if cover_copied is not None:
            manifest.append((cover_copied, 'image/jpeg'))

        opf.create_manifest(manifest)
        opf.create_spine([os.path.basename(htmlfile)])
        toc = None
        if guide is not None:
            opf.create_guide(guide)
            for ref in opf.guide:
                if ref.type.lower() == 'toc':
                    toc = ref.href()

        ncx_manifest_entry = None
        if toc:
            ncx_manifest_entry = 'toc.ncx'
            elems = root.xpath('//*[@id="%s"]' % toc.partition('#')[-1])
            tocobj = None
            ent_pat = re.compile(r'&(\S+?);')
            if elems:
                tocobj = TOC()
                found = False
                reached = False
                for x in root.iter():
                    if x == elems[-1]:
                        reached = True
                        continue
                    if reached and x.tag == 'a':
                        href = x.get('href', '')
                        if href and re.match('\\w+://', href) is None:
                            try:
                                text = u' '.join([t.strip() for t in
                                    x.xpath('descendant::text()')])
                            except:
                                text = ''
                            text = ent_pat.sub(entity_to_unicode, text)
                            item = tocobj.add_item(toc.partition('#')[0], href[1:],
                                text)
                            item.left_space = int(self.get_left_whitespace(x))
                            found = True
                    if reached and found and x.get('class', None) == 'mbp_pagebreak':
                        break
            if tocobj is not None:
                tocobj = self.structure_toc(tocobj)
                opf.set_toc(tocobj)

        return opf, ncx_manifest_entry
Exemplo n.º 10
0
    def create_opf(self, htmlfile, guide=None, root=None):
        mi = getattr(self.book_header.exth, 'mi', self.embedded_mi)
        if mi is None:
            mi = MetaInformation(self.book_header.title, [_('Unknown')])
        opf = OPFCreator(os.path.dirname(htmlfile), mi)
        if hasattr(self.book_header.exth, 'cover_offset'):
            opf.cover = u'images/%05d.jpg' % (self.book_header.exth.cover_offset + 1)
        elif mi.cover is not None:
            opf.cover = mi.cover
        else:
            opf.cover = u'images/%05d.jpg' % 1
            if not os.path.exists(os.path.join(os.path.dirname(htmlfile),
                * opf.cover.split('/'))):
                opf.cover = None

        cover = opf.cover
        cover_copied = None
        if cover is not None:
            cover = cover.replace('/', os.sep)
            if os.path.exists(cover):
                ncover = u'images'+os.sep+u'calibre_cover.jpg'
                if os.path.exists(ncover):
                    os.remove(ncover)
                shutil.copyfile(cover, ncover)
                cover_copied = os.path.abspath(ncover)
                opf.cover = ncover.replace(os.sep, '/')

        manifest = [(htmlfile, 'application/xhtml+xml'),
            (os.path.abspath(u'styles.css'), 'text/css')]
        bp = os.path.dirname(htmlfile)
        added = set([])
        for i in getattr(self, 'image_names', []):
            path = os.path.join(bp, 'images', i)
            added.add(path)
            manifest.append((path, 'image/jpeg'))
        if cover_copied is not None:
            manifest.append((cover_copied, 'image/jpeg'))

        opf.create_manifest(manifest)
        opf.create_spine([os.path.basename(htmlfile)])
        toc = None
        if guide is not None:
            opf.create_guide(guide)
            for ref in opf.guide:
                if ref.type.lower() == 'toc':
                    toc = ref.href()

        ncx_manifest_entry = None
        if toc:
            ncx_manifest_entry = 'toc.ncx'
            elems = root.xpath('//*[@id="%s"]' % toc.partition('#')[-1])
            tocobj = None
            ent_pat = re.compile(r'&(\S+?);')
            if elems:
                tocobj = TOC()
                found = False
                reached = False
                for x in root.iter():
                    if x == elems[-1]:
                        reached = True
                        continue
                    if reached and x.tag == 'a':
                        href = x.get('href', '')
                        if href and re.match('\w+://', href) is None:
                            try:
                                text = u' '.join([t.strip() for t in
                                    x.xpath('descendant::text()')])
                            except:
                                text = ''
                            text = ent_pat.sub(entity_to_unicode, text)
                            item = tocobj.add_item(toc.partition('#')[0], href[1:],
                                text)
                            item.left_space = int(self.get_left_whitespace(x))
                            found = True
                    if reached and found and x.get('class', None) == 'mbp_pagebreak':
                        break
            if tocobj is not None:
                tocobj = self.structure_toc(tocobj)
                opf.set_toc(tocobj)

        return opf, ncx_manifest_entry
Exemplo n.º 11
0
    def convert(self, stream, opts, file_ext, log, accelerators):
        from calibre.ebooks.metadata import MetaInformation
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.metadata.toc import TOC

        self.opts, self.log = opts, log
        if file_ext == 'cbc':
            comics_ = self.get_comics_from_collection(stream)
        else:
            comics_ = [['Comic', os.path.abspath(stream.name)]]
        stream.close()
        comics = []
        for i, x in enumerate(comics_):
            title, fname = x
            cdir = u'comic_%d' % (i + 1) if len(comics_) > 1 else u'.'
            cdir = os.path.abspath(cdir)
            if not os.path.exists(cdir):
                os.makedirs(cdir)
            pages = self.get_pages(fname, cdir)
            if not pages:
                continue
            wrappers = self.create_wrappers(pages)
            comics.append((title, pages, wrappers))

        if not comics:
            raise ValueError('No comic pages found in %s' % stream.name)

        mi = MetaInformation(
            os.path.basename(stream.name).rpartition('.')[0], [_('Unknown')])
        opf = OPFCreator(os.getcwdu(), mi)
        entries = []

        def href(x):
            if len(comics) == 1:
                return os.path.basename(x)
            return '/'.join(x.split(os.sep)[-2:])

        for comic in comics:
            pages, wrappers = comic[1:]
            entries += [(w, None) for w in map(href, wrappers)] + \
                    [(x, None) for x in map(href, pages)]
        opf.create_manifest(entries)
        spine = []
        for comic in comics:
            spine.extend(map(href, comic[2]))
        self._images = []
        for comic in comics:
            self._images.extend(comic[1])
        opf.create_spine(spine)
        toc = TOC()
        if len(comics) == 1:
            wrappers = comics[0][2]
            for i, x in enumerate(wrappers):
                toc.add_item(href(x),
                             None,
                             _('Page') + ' %d' % (i + 1),
                             play_order=i)
        else:
            po = 0
            for comic in comics:
                po += 1
                wrappers = comic[2]
                stoc = toc.add_item(href(wrappers[0]),
                                    None,
                                    comic[0],
                                    play_order=po)
                if not opts.dont_add_comic_pages_to_toc:
                    for i, x in enumerate(wrappers):
                        stoc.add_item(href(x),
                                      None,
                                      _('Page') + ' %d' % (i + 1),
                                      play_order=po)
                        po += 1
        opf.set_toc(toc)
        m, n = open(u'metadata.opf', 'wb'), open('toc.ncx', 'wb')
        opf.render(m, n, u'toc.ncx')
        return os.path.abspath(u'metadata.opf')