def run_checks(container): errors = [] # Check parsing xml_items, html_items, raster_images, stylesheets = [], [], [], [] for name, mt in container.mime_map.iteritems(): items = None if mt in XML_TYPES: items = xml_items elif mt in OEB_DOCS: items = html_items elif mt in OEB_STYLES: items = stylesheets elif is_raster_image(mt): items = raster_images if items is not None: items.append((name, mt, container.open(name, "rb").read())) errors.extend(run_checkers(check_html_size, html_items)) errors.extend(run_checkers(check_xml_parsing, xml_items)) errors.extend(run_checkers(check_xml_parsing, html_items)) errors.extend(run_checkers(check_raster_images, raster_images)) for err in errors: if err.level > WARN: return errors # cssutils is not thread safe for name, mt, raw in stylesheets: if not raw: errors.append(EmptyFile(name)) continue errors.extend(check_css_parsing(name, raw)) for name, mt, raw in html_items + xml_items: errors.extend(check_encoding_declarations(name, container)) for name, mt, raw in html_items: if not raw: continue root = container.parsed(name) for style in root.xpath('//*[local-name()="style"]'): if style.get("type", "text/css") == "text/css" and style.text: errors.extend(check_css_parsing(name, style.text, line_offset=style.sourceline - 1)) for elem in root.xpath("//*[@style]"): raw = elem.get("style") if raw: errors.extend(check_css_parsing(name, raw, line_offset=elem.sourceline - 1, is_declaration=True)) errors += check_mimetypes(container) errors += check_links(container) + check_link_destinations(container) errors += check_fonts(container) errors += check_filenames(container) errors += check_ids(container) errors += check_markup(container) errors += check_opf(container) return errors
def run_checks(container): errors = [] # Check parsing xml_items, html_items, raster_images, stylesheets = [], [], [], [] for name, mt in container.mime_map.iteritems(): items = None if mt in XML_TYPES: items = xml_items elif mt in OEB_DOCS: items = html_items elif mt in OEB_STYLES: items = stylesheets elif is_raster_image(mt): items = raster_images if items is not None: items.append((name, mt, container.open(name, 'rb').read())) errors.extend(run_checkers(check_html_size, html_items)) errors.extend(run_checkers(check_xml_parsing, xml_items)) errors.extend(run_checkers(check_xml_parsing, html_items)) errors.extend(run_checkers(check_raster_images, raster_images)) for err in errors: if err.level > WARN: return errors # cssutils is not thread safe for name, mt, raw in stylesheets: if not raw: errors.append(EmptyFile(name)) continue errors.extend(check_css_parsing(name, raw)) for name, mt, raw in html_items + xml_items: errors.extend(check_encoding_declarations(name, container)) for name, mt, raw in html_items: if not raw: continue root = container.parsed(name) for style in root.xpath('//*[local-name()="style"]'): if style.get('type', 'text/css') == 'text/css' and style.text: errors.extend(check_css_parsing(name, style.text, line_offset=style.sourceline - 1)) for elem in root.xpath('//*[@style]'): raw = elem.get('style') if raw: errors.extend(check_css_parsing(name, raw, line_offset=elem.sourceline - 1, is_declaration=True)) errors += check_mimetypes(container) errors += check_links(container) + check_link_destinations(container) errors += check_fonts(container) errors += check_ids(container) errors += check_filenames(container) errors += check_markup(container) errors += check_opf(container) return errors
def fix_opf(self, container): spine_names = {n for n, l in container.spine_names} spine = container.opf_xpath('//opf:spine')[0] rmap = {v: k for k, v in iteritems(container.manifest_id_map)} # Add unreferenced text files to the spine for name, mt in iteritems(container.mime_map): if mt in OEB_DOCS and name not in spine_names: spine_names.add(name) container.insert_into_xml( spine, spine.makeelement(OPF('itemref'), idref=rmap[name])) # Remove duplicate entries from spine seen = set() for item, name, linear in container.spine_iter: if name in seen: container.remove_from_xml(item) seen.add(name) # Remove the <guide> which is not needed in EPUB 3 for guide in container.opf_xpath('//*[local-name()="guide"]'): guide.getparent().remove(guide) # Ensure that the cover-image property is set cover_id = rmap['_static/' + self.config.epub_cover[0]] for item in container.opf_xpath( '//opf:item[@id="{}"]'.format(cover_id)): item.set('properties', 'cover-image') for item in container.opf_xpath( '//opf:item[@href="epub-cover.xhtml"]'): item.set('properties', 'svg calibre:title-page') for item in container.opf_xpath('//opf:package'): prefix = item.get('prefix') or '' if prefix: prefix += ' ' item.set('prefix', prefix + 'calibre: https://calibre-ebook.com') # Remove any <meta cover> tag as it is not needed in epub 3 for meta in container.opf_xpath('//opf:meta[@name="cover"]'): meta.getparent().remove(meta) # Remove unreferenced files for error in check_links(container): if error.__class__ is UnreferencedResource: container.remove_item(error.name) # Pretty print the OPF pretty_opf(container.parsed(container.opf_name)) container.dirty(container.opf_name)
def run_checks(container): errors = [] # Check parsing xml_items, html_items, raster_images, stylesheets = [], [], [], [] for name, mt in container.mime_map.iteritems(): items = None if mt in XML_TYPES: items = xml_items elif mt in OEB_DOCS: items = html_items elif mt in OEB_STYLES: items = stylesheets elif is_raster_image(mt): items = raster_images if items is not None: items.append((name, mt, container.open(name, 'rb').read())) errors.extend(run_checkers(check_xml_parsing, xml_items)) errors.extend(run_checkers(check_xml_parsing, html_items)) errors.extend(run_checkers(check_raster_images, raster_images)) # cssutils is not thread safe for name, mt, raw in stylesheets: errors.extend(check_css_parsing(name, raw)) for name, mt, raw in html_items: root = container.parsed(name) for style in root.xpath('//*[local-name()="style"]'): if style.get('type', 'text/css') == 'text/css': errors.extend( check_css_parsing(name, style.text, line_offset=style.sourceline - 1)) for elem in root.xpath('//*[@style]'): raw = elem.get('style') if raw: errors.extend( check_css_parsing(name, raw, line_offset=elem.sourceline - 1, is_declaration=True)) errors += check_links(container) errors += check_fonts(container) return errors
def run_checks(container): errors = [] # Check parsing xml_items, html_items, raster_images, stylesheets = [], [], [], [] for name, mt in container.mime_map.iteritems(): items = None if mt in XML_TYPES: items = xml_items elif mt in OEB_DOCS: items = html_items elif mt in OEB_STYLES: items = stylesheets elif is_raster_image(mt): items = raster_images if items is not None: items.append((name, mt, container.open(name, 'rb').read())) errors.extend(run_checkers(check_html_size, html_items)) errors.extend(run_checkers(check_xml_parsing, xml_items)) errors.extend(run_checkers(check_xml_parsing, html_items)) errors.extend(run_checkers(check_raster_images, raster_images)) # cssutils is not thread safe for name, mt, raw in stylesheets: errors.extend(check_css_parsing(name, raw)) for name, mt, raw in html_items: root = container.parsed(name) for style in root.xpath('//*[local-name()="style"]'): if style.get('type', 'text/css') == 'text/css' and style.text: errors.extend(check_css_parsing(name, style.text, line_offset=style.sourceline - 1)) for elem in root.xpath('//*[@style]'): raw = elem.get('style') if raw: errors.extend(check_css_parsing(name, raw, line_offset=elem.sourceline - 1, is_declaration=True)) errors += check_mimetypes(container) errors += check_links(container) + check_link_destinations(container) errors += check_fonts(container) errors += check_filenames(container) errors += check_ids(container) errors += check_opf(container) return errors
def fix_opf(self, container): spine_names = {n for n, l in container.spine_names} spine = container.opf_xpath('//opf:spine')[0] rmap = {v: k for k, v in container.manifest_id_map.iteritems()} # Add unreferenced text files to the spine for name, mt in container.mime_map.iteritems(): if mt in OEB_DOCS and name not in spine_names: spine_names.add(name) container.insert_into_xml( spine, spine.makeelement(OPF('itemref'), idref=rmap[name])) # Remove duplicate entries from spine seen = set() for item, name, linear in container.spine_iter: if name in seen: container.remove_from_xml(item) seen.add(name) # Ensure that the meta cover tag is correct cover_id = rmap['_static/' + self.config.epub_cover[0]] for meta in container.opf_xpath('//opf:meta[@name="cover"]'): meta.set('content', cover_id) # Add description metadata metadata = container.opf_xpath('//opf:metadata')[0] container.insert_into_xml(metadata, metadata.makeelement(DC('description'))) metadata[-1].text = 'Comprehensive documentation for calibre' # Remove search.html since it is useless in EPUB container.remove_item('search.html') # Remove unreferenced files for error in check_links(container): if error.__class__ is UnreferencedResource: container.remove_item(error.name) # Pretty print the OPF pretty_opf(container.parsed(container.opf_name)) container.dirty(container.opf_name)
def fix_opf(self, container): spine_names = {n for n, l in container.spine_names} spine = container.opf_xpath('//opf:spine')[0] rmap = {v:k for k, v in container.manifest_id_map.iteritems()} # Add unreferenced text files to the spine for name, mt in container.mime_map.iteritems(): if mt in OEB_DOCS and name not in spine_names: spine_names.add(name) container.insert_into_xml(spine, spine.makeelement(OPF('itemref'), idref=rmap[name])) # Remove duplicate entries from spine seen = set() for item, name, linear in container.spine_iter: if name in seen: container.remove_from_xml(item) seen.add(name) # Ensure that the meta cover tag is correct cover_id = rmap['_static/' + self.config.epub_cover[0]] for meta in container.opf_xpath('//opf:meta[@name="cover"]'): meta.set('content', cover_id) # Add description metadata metadata = container.opf_xpath('//opf:metadata')[0] container.insert_into_xml(metadata, metadata.makeelement(DC('description'))) metadata[-1].text = 'Comprehensive documentation for calibre' # Remove search.html since it is useless in EPUB container.remove_item('search.html') # Remove unreferenced files for error in check_links(container): if error.__class__ is UnreferencedResource: container.remove_item(error.name) # Pretty print the OPF pretty_opf(container.parsed(container.opf_name)) container.dirty(container.opf_name)
def fix_opf(self, container): spine_names = {n for n, l in container.spine_names} spine = container.opf_xpath('//opf:spine')[0] rmap = {v:k for k, v in iteritems(container.manifest_id_map)} # Add unreferenced text files to the spine for name, mt in iteritems(container.mime_map): if mt in OEB_DOCS and name not in spine_names: spine_names.add(name) container.insert_into_xml(spine, spine.makeelement(OPF('itemref'), idref=rmap[name])) # Remove duplicate entries from spine seen = set() for item, name, linear in container.spine_iter: if name in seen: container.remove_from_xml(item) seen.add(name) # Remove the <guide> which is not needed in EPUB 3 for guide in container.opf_xpath('//*[local-name()="guide"]'): guide.getparent().remove(guide) # Ensure that the cover-image property is set cover_id = rmap['_static/' + self.config.epub_cover[0]] for item in container.opf_xpath('//opf:item[@id="{}"]'.format(cover_id)): item.set('properties', 'cover-image') # Remove any <meta cover> tag as it is not needed in epub 3 for meta in container.opf_xpath('//opf:meta[@name="cover"]'): meta.getparent().remove(meta) # Remove unreferenced files for error in check_links(container): if error.__class__ is UnreferencedResource: container.remove_item(error.name) # Pretty print the OPF pretty_opf(container.parsed(container.opf_name)) container.dirty(container.opf_name)