Пример #1
0
def parse_outline(raw, output_dir):
    from lxml import etree
    from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
    raw = clean_xml_chars(
        xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True)[0])
    outline = etree.fromstring(raw,
                               parser=RECOVER_PARSER).xpath('(//outline)[1]')
    if outline:
        from calibre.ebooks.oeb.polish.toc import TOC, create_ncx
        outline = outline[0]
        toc = TOC()
        count = [0]

        def process_node(node, toc):
            for child in node.iterchildren('*'):
                if child.tag == 'outline':
                    parent = toc.children[-1] if toc.children else toc
                    process_node(child, parent)
                else:
                    if child.text:
                        page = child.get('page', '1')
                        toc.add(child.text, 'index.html', 'p' + page)
                        count[0] += 1

        process_node(outline, toc)
        if count[0] > 2:
            root = create_ncx(toc, (lambda x: x), 'pdftohtml', 'en',
                              'pdftohtml')
            with open(os.path.join(output_dir, 'toc.ncx'), 'wb') as f:
                f.write(
                    etree.tostring(root,
                                   pretty_print=True,
                                   with_tail=False,
                                   encoding='utf-8',
                                   xml_declaration=True))
Пример #2
0
def parse_outline(raw, output_dir):
    from lxml import etree
    from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
    raw = clean_xml_chars(xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True)[0])
    outline = etree.fromstring(raw, parser=RECOVER_PARSER).xpath('(//outline)[1]')
    if outline:
        from calibre.ebooks.oeb.polish.toc import TOC, create_ncx
        outline = outline[0]
        toc = TOC()
        count = [0]

        def process_node(node, toc):
            for child in node.iterdescendants('*'):
                if child.tag == 'outline':
                    parent = toc.children[-1] if toc.children else toc
                    process_node(child, parent)
                else:
                    page = child.get('page', '1')
                    toc.add(child.text, 'index.html', page)
                    count[0] += 1
        process_node(outline, toc)
        if count[0] > 2:
            root = create_ncx(toc, (lambda x:x), 'pdftohtml', 'en', 'pdftohtml')
            with open(os.path.join(output_dir, 'toc.ncx'), 'wb') as f:
                f.write(etree.tostring(root, pretty_print=True, with_tail=False, encoding='utf-8', xml_declaration=True))
Пример #3
0
def create_toc(mi, opf, html_name, lang):
    uuid = ''
    for u in opf.xpath('//*[@id="uuid_id"]'):
        uuid = u.text
    toc = TOC()
    toc.add(_('Start'), html_name)
    return create_ncx(toc, lambda x: x, mi.title, lang, uuid)
Пример #4
0
def create_toc(mi, opf, html_name, lang):
    uuid = ''
    for u in opf.xpath('//*[@id="uuid_id"]'):
        uuid = u.text
    toc = TOC()
    toc.add(_('Start'), html_name)
    return create_ncx(toc, lambda x:x, mi.title, lang, uuid)