def _read_opf(self): data = self.oeb.container.read(None) data = self.oeb.decode(data) data = XMLDECL_RE.sub('', data) data = re.sub(r'http://openebook.org/namespaces/oeb-package/1.0(/*)', OPF1_NS, data) try: opf = etree.fromstring(data) except etree.XMLSyntaxError: data = xml_replace_entities(clean_xml_chars(data), encoding=None) try: opf = etree.fromstring(data) self.logger.warn('OPF contains invalid HTML named entities') except etree.XMLSyntaxError: data = re.sub(r'(?is)<tours>.+</tours>', '', data) data = data.replace('<dc-metadata>', '<dc-metadata xmlns:dc="http://purl.org/metadata/dublin_core">') try: opf = etree.fromstring(data) self.logger.warn('OPF contains invalid tours section') except etree.XMLSyntaxError: from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER opf = etree.fromstring(data, parser=RECOVER_PARSER) self.logger.warn('OPF contains invalid markup, trying to parse it anyway') ns = namespace(opf.tag) if ns not in ('', OPF1_NS, OPF2_NS): raise OEBError('Invalid namespace %r for OPF document' % ns) opf = self._clean_opf(opf) return opf
def __call__(self, oeb, path): """ Write the book in the :class:`OEBBook` object :param:`oeb` to a folder at :param:`path`. """ version = int(self.version[0]) opfname = None if os.path.splitext(path)[1].lower() == '.opf': opfname = os.path.basename(path) path = os.path.dirname(path) if not os.path.isdir(path): os.mkdir(path) output = DirContainer(path, oeb.log) for item in oeb.manifest.values(): output.write(item.href, item.bytes_representation) if version == 1: metadata = oeb.to_opf1() elif version == 2: metadata = oeb.to_opf2(page_map=self.page_map) else: raise OEBError("Unrecognized OPF version %r" % self.version) pretty_print = self.pretty_print for mime, (href, data) in metadata.items(): if opfname and mime == OPF_MIME: href = opfname output.write(href, xml2str(data, pretty_print=pretty_print)) return
def _spine_from_opf(self, opf): spine = self.oeb.spine manifest = self.oeb.manifest for elem in xpath(opf, '/o2:package/o2:spine/o2:itemref'): idref = elem.get('idref') if idref not in manifest.ids: self.logger.warn('Spine item %r not found' % idref) continue item = manifest.ids[idref] if item.media_type.lower() in OEB_DOCS and hasattr( item.data, 'xpath'): spine.add(item, elem.get('linear')) else: if hasattr(item.data, 'tag') and item.data.tag and item.data.tag.endswith( '}html'): item.media_type = XHTML_MIME spine.add(item, elem.get('linear')) else: self.oeb.log.warn('The item %s is not a XML document.' ' Removing it from spine.' % item.href) if len(spine) == 0: raise OEBError("Spine is empty") self._spine_add_extra() for val in xpath(opf, '/o2:package/o2:spine/@page-progression-direction'): if val in {'ltr', 'rtl'}: spine.page_progression_direction = val
def _spine_from_opf(self, opf): spine = self.oeb.spine manifest = self.oeb.manifest for elem in xpath(opf, '/o2:package/o2:spine/o2:itemref'): idref = elem.get('idref') if idref not in manifest.ids: self.logger.warn(u'Spine item %r not found' % idref) continue item = manifest.ids[idref] if item.media_type.lower() in OEB_DOCS and hasattr(item.data, 'xpath'): spine.add(item, elem.get('linear')) else: self.oeb.log.warn('The item %s is not a XML document.' ' Removing it from spine.'%item.href) if len(spine) == 0: raise OEBError("Spine is empty") self._spine_add_extra()