Exemplo n.º 1
0
def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, cover_data=None, report_progress=lambda x, y: None):
    container = Container(opf_path, log)
    fix_markup(container)
    report_progress(0.05, _('Parsed all content for markup transformation'))
    if opts.pdf_hyphenate:
        from calibre.ebooks.oeb.polish.hyphenation import add_soft_hyphens
        add_soft_hyphens(container)
    has_maths = add_maths_script(container)
    fix_fullscreen_images(container)

    name_anchor_map = make_anchors_unique(container, log)
    margin_files = tuple(create_margin_files(container))
    toc = get_toc(container, verify_destinations=False)
    has_toc = toc and len(toc)
    links_page_uuid = add_all_links(container, margin_files)
    container.commit()
    report_progress(0.1, _('Completed markup transformation'))

    manager = RenderManager(opts, log, container.root)
    page_layout = get_page_layout(opts)
    pdf_doc = None
    anchor_locations = {}
    jobs = []
    for margin_file in margin_files:
        jobs.append(job_for_name(container, margin_file.name, margin_file.margins, page_layout))
    results = manager.convert_html_files(jobs, settle_time=1, has_maths=has_maths)
    num_pages = 0
    page_margins_map = []
    for margin_file in margin_files:
        name = margin_file.name
        data = results[name]
        if not isinstance(data, bytes):
            raise SystemExit(data)
        doc = data_as_pdf_doc(data)
        anchor_locations.update(get_anchor_locations(name, doc, num_pages + 1, links_page_uuid, log))
        doc_pages = doc.page_count()
        page_margins_map.extend(repeat(resolve_margins(margin_file.margins, page_layout), doc_pages))
        num_pages += doc_pages

        if pdf_doc is None:
            pdf_doc = doc
        else:
            pdf_doc.append(doc)

    page_number_display_map = get_page_number_display_map(manager, opts, num_pages, log)

    if has_toc:
        annotate_toc(toc, anchor_locations, name_anchor_map, log)
        if opts.pdf_add_toc:
            tocname = create_skeleton(container)
            root = container.parsed(tocname)
            add_pagenum_toc(root, toc, opts, page_number_display_map)
            container.commit()
            jobs = [job_for_name(container, tocname, None, page_layout)]
            results = manager.convert_html_files(jobs, settle_time=1)
            tocdoc = data_as_pdf_doc(results[tocname])
            page_margins_map.extend(repeat(resolve_margins(None, page_layout), tocdoc.page_count()))
            pdf_doc.append(tocdoc)

    report_progress(0.7, _('Rendered all HTML as PDF'))

    fix_links(pdf_doc, anchor_locations, name_anchor_map, opts.pdf_mark_links, log)
    if toc and len(toc):
        add_toc(PDFOutlineRoot(pdf_doc), toc)
    report_progress(0.75, _('Added links to PDF content'))

    pdf_metadata = PDFMetadata(metadata)
    add_header_footer(
        manager, opts, pdf_doc, container,
        page_number_display_map, page_layout, page_margins_map,
        pdf_metadata, report_progress, toc if has_toc else None)

    num_removed = remove_unused_fonts(pdf_doc)
    if num_removed:
        log('Removed', num_removed, 'unused fonts')

    merge_fonts(pdf_doc, log)
    num_removed = dedup_type3_fonts(pdf_doc)
    if num_removed:
        log('Removed', num_removed, 'duplicated Type3 glyphs')

    num_removed = pdf_doc.dedup_images()
    if num_removed:
        log('Removed', num_removed, 'duplicate images')

    if opts.pdf_odd_even_offset:
        for i in range(1, pdf_doc.page_count()):
            margins = page_margins_map[i]
            mult = -1 if i % 2 else 1
            val = opts.pdf_odd_even_offset
            if abs(val) < min(margins.left, margins.right):
                box = list(pdf_doc.get_page_box("CropBox", i))
                box[0] += val * mult
                pdf_doc.set_page_box("CropBox", i, *box)

    if cover_data:
        add_cover(pdf_doc, cover_data, page_layout, opts)

    if metadata is not None:
        update_metadata(pdf_doc, pdf_metadata)
    report_progress(1, _('Updated metadata in PDF'))

    if opts.uncompressed_pdf:
        pdf_doc.uncompress()

    pdf_data = pdf_doc.write()
    if output_path is None:
        return pdf_data
    with open(output_path, 'wb') as f:
        f.write(pdf_data)
Exemplo n.º 2
0
def polish_one(ebook, opts, report, customization=None):
    rt = lambda x: report('\n### ' + x)
    jacket = None
    changed = False
    customization = customization or CUSTOMIZATION.copy()
    has_subsettable_fonts = False
    for x in iter_subsettable_fonts(ebook):
        has_subsettable_fonts = True
        break

    if (opts.subset and has_subsettable_fonts) or opts.embed:
        stats = StatsCollector(ebook, do_embed=opts.embed)

    if opts.opf:
        changed = True
        rt(_('Updating metadata'))
        update_metadata(ebook, opts.opf)
        jacket = find_existing_jacket(ebook)
        if jacket is not None:
            replace_jacket(ebook, jacket)
            report(_('Updated metadata jacket'))
        report(_('Metadata updated\n'))

    if opts.cover:
        changed = True
        rt(_('Setting cover'))
        set_cover(ebook, opts.cover, report)
        report('')

    if opts.jacket:
        changed = True
        rt(_('Inserting metadata jacket'))
        if jacket is None:
            if add_or_replace_jacket(ebook):
                report(_('Existing metadata jacket replaced'))
            else:
                report(_('Metadata jacket inserted'))
        else:
            report(_('Existing metadata jacket replaced'))
        report('')

    if opts.remove_jacket:
        rt(_('Removing metadata jacket'))
        if remove_jacket(ebook):
            report(_('Metadata jacket removed'))
            changed = True
        else:
            report(_('No metadata jacket found'))
        report('')

    if opts.smarten_punctuation:
        rt(_('Smartening punctuation'))
        if smarten_punctuation(ebook, report):
            changed = True
        report('')

    if opts.embed:
        rt(_('Embedding referenced fonts'))
        if embed_all_fonts(ebook, stats, report):
            changed = True
            has_subsettable_fonts = True
        report('')

    if opts.subset:
        if has_subsettable_fonts:
            rt(_('Subsetting embedded fonts'))
            if subset_all_fonts(ebook, stats.font_stats, report):
                changed = True
        else:
            rt(_('No embedded fonts to subset'))
        report('')

    if opts.remove_unused_css:
        rt(_('Removing unused CSS rules'))
        if remove_unused_css(
                ebook,
                report,
                remove_unused_classes=customization['remove_unused_classes'],
                merge_rules=customization['merge_identical_selectors'],
                merge_rules_with_identical_properties=customization[
                    'merge_rules_with_identical_properties'],
                remove_unreferenced_sheets=customization[
                    'remove_unreferenced_sheets']):
            changed = True
        report('')

    if opts.compress_images:
        rt(_('Losslessly compressing images'))
        if compress_images(ebook, report)[0]:
            changed = True
        report('')

    if opts.upgrade_book:
        rt(_('Upgrading book, if possible'))
        if upgrade_book(ebook, report, remove_ncx=customization['remove_ncx']):
            changed = True
        report('')

    if opts.remove_soft_hyphens:
        rt(_('Removing soft hyphens'))
        remove_soft_hyphens(ebook, report)
        changed = True
    elif opts.add_soft_hyphens:
        rt(_('Adding soft hyphens'))
        add_soft_hyphens(ebook, report)
        changed = True

    return changed