def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, cover_data=None, report_progress=lambda x, y: None): container = Container(opf_path, log) report_progress(0.05, _('Parsed all content for markup transformation')) has_maths = add_maths_script(container) fix_fullscreen_images(container) name_anchor_map = make_anchors_unique(container, log) margin_files = tuple(create_margin_files(container)) toc = get_toc(container, verify_destinations=False) has_toc = toc and len(toc) links_page_uuid = add_all_links(container, margin_files) container.commit() report_progress(0.1, _('Completed markup transformation')) manager = RenderManager(opts, log, container.root) page_layout = get_page_layout(opts) pdf_doc = None anchor_locations = {} jobs = [] for margin_file in margin_files: jobs.append(job_for_name(container, margin_file.name, margin_file.margins, page_layout)) results = manager.convert_html_files(jobs, settle_time=1, has_maths=has_maths) num_pages = 0 page_margins_map = [] for margin_file in margin_files: name = margin_file.name data = results[name] if not isinstance(data, bytes): raise SystemExit(data) doc = data_as_pdf_doc(data) anchor_locations.update(get_anchor_locations(doc, num_pages + 1, links_page_uuid)) doc_pages = doc.page_count() page_margins_map.extend(repeat(resolve_margins(margin_file.margins, page_layout), doc_pages)) num_pages += doc_pages if pdf_doc is None: pdf_doc = doc else: pdf_doc.append(doc) page_number_display_map = get_page_number_display_map(manager, opts, num_pages, log) if has_toc: annotate_toc(toc, anchor_locations, name_anchor_map, log) if opts.pdf_add_toc: tocname = create_skeleton(container) root = container.parsed(tocname) add_pagenum_toc(root, toc, opts, page_number_display_map) container.commit() jobs = [job_for_name(container, tocname, None, page_layout)] results = manager.convert_html_files(jobs, settle_time=1) tocdoc = data_as_pdf_doc(results[tocname]) page_margins_map.extend(repeat(resolve_margins(None, page_layout), tocdoc.page_count())) pdf_doc.append(tocdoc) report_progress(0.7, _('Rendered all HTML as PDF')) fix_links(pdf_doc, anchor_locations, name_anchor_map, opts.pdf_mark_links, log) if toc and len(toc): add_toc(PDFOutlineRoot(pdf_doc), toc) report_progress(0.75, _('Added links to PDF content')) pdf_metadata = PDFMetadata(metadata) add_header_footer( manager, opts, pdf_doc, container, page_number_display_map, page_layout, page_margins_map, pdf_metadata, report_progress, toc if has_toc else None) merge_fonts(pdf_doc) num_removed = dedup_type3_fonts(pdf_doc) if num_removed: log('Removed', num_removed, 'duplicated Type3 glyphs') num_removed = remove_unused_fonts(pdf_doc) if num_removed: log('Removed', num_removed, 'unused fonts') num_removed = pdf_doc.dedup_images() if num_removed: log('Removed', num_removed, 'duplicate images') if cover_data: add_cover(pdf_doc, cover_data, page_layout, opts) if metadata is not None: update_metadata(pdf_doc, pdf_metadata) report_progress(1, _('Updated metadata in PDF')) if opts.uncompressed_pdf: pdf_doc.uncompress() pdf_data = pdf_doc.write() if output_path is None: return pdf_data with open(output_path, 'wb') as f: f.write(pdf_data)
def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, cover_data=None, report_progress=lambda x, y: None): container = Container(opf_path, log) fix_markup(container) report_progress(0.05, _('Parsed all content for markup transformation')) if opts.pdf_hyphenate: from calibre.ebooks.oeb.polish.hyphenation import add_soft_hyphens add_soft_hyphens(container) has_maths = add_maths_script(container) fix_fullscreen_images(container) name_anchor_map = make_anchors_unique(container, log) margin_files = tuple(create_margin_files(container)) toc = get_toc(container, verify_destinations=False) has_toc = toc and len(toc) links_page_uuid = add_all_links(container, margin_files) container.commit() report_progress(0.1, _('Completed markup transformation')) manager = RenderManager(opts, log, container.root) page_layout = get_page_layout(opts) pdf_doc = None anchor_locations = {} jobs = [] for margin_file in margin_files: jobs.append( job_for_name(container, margin_file.name, margin_file.margins, page_layout)) results = manager.convert_html_files(jobs, settle_time=1, has_maths=has_maths) num_pages = 0 page_margins_map = [] for margin_file in margin_files: name = margin_file.name data = results[name] if not isinstance(data, bytes): raise SystemExit(data) doc = data_as_pdf_doc(data) anchor_locations.update( get_anchor_locations(name, doc, num_pages + 1, links_page_uuid, log)) doc_pages = doc.page_count() page_margins_map.extend( repeat(resolve_margins(margin_file.margins, page_layout), doc_pages)) num_pages += doc_pages if pdf_doc is None: pdf_doc = doc else: pdf_doc.append(doc) page_number_display_map = get_page_number_display_map( manager, opts, num_pages, log) if has_toc: annotate_toc(toc, anchor_locations, name_anchor_map, log) if opts.pdf_add_toc: tocname = create_skeleton(container) root = container.parsed(tocname) add_pagenum_toc(root, toc, opts, page_number_display_map) container.commit() jobs = [job_for_name(container, tocname, None, page_layout)] results = manager.convert_html_files(jobs, settle_time=1) tocdoc = data_as_pdf_doc(results[tocname]) page_margins_map.extend( repeat(resolve_margins(None, page_layout), tocdoc.page_count())) pdf_doc.append(tocdoc) report_progress(0.7, _('Rendered all HTML as PDF')) fix_links(pdf_doc, anchor_locations, name_anchor_map, opts.pdf_mark_links, log) if toc and len(toc): add_toc(PDFOutlineRoot(pdf_doc), toc) report_progress(0.75, _('Added links to PDF content')) pdf_metadata = PDFMetadata(metadata) add_header_footer(manager, opts, pdf_doc, container, page_number_display_map, page_layout, page_margins_map, pdf_metadata, report_progress, toc if has_toc else None) merge_fonts(pdf_doc, log) num_removed = dedup_type3_fonts(pdf_doc) if num_removed: log('Removed', num_removed, 'duplicated Type3 glyphs') num_removed = remove_unused_fonts(pdf_doc) if num_removed: log('Removed', num_removed, 'unused fonts') # Needed because of https://bugreports.qt.io/browse/QTBUG-88976 subset_fonts(pdf_doc, log) num_removed = pdf_doc.dedup_images() if num_removed: log('Removed', num_removed, 'duplicate images') if opts.pdf_odd_even_offset: for i in range(1, pdf_doc.page_count()): margins = page_margins_map[i] mult = -1 if i % 2 else 1 val = opts.pdf_odd_even_offset if abs(val) < min(margins.left, margins.right): box = list(pdf_doc.get_page_box("CropBox", i)) box[0] += val * mult pdf_doc.set_page_box("CropBox", i, *box) if cover_data: add_cover(pdf_doc, cover_data, page_layout, opts) if metadata is not None: update_metadata(pdf_doc, pdf_metadata) report_progress(1, _('Updated metadata in PDF')) if opts.uncompressed_pdf: pdf_doc.uncompress() pdf_data = pdf_doc.write() if output_path is None: return pdf_data with open(output_path, 'wb') as f: f.write(pdf_data)