def _parseData(self): cnt = 0 for prefix, tname, tattr, tcontent in self._opf_tag_iter(): if self._debug: print (" Parsing OPF: ", prefix, tname, tattr, tcontent) # package if tname == "package": ver = tattr.pop("version", "2.0") uid = tattr.pop("unique-identifier","bookid") self.package = (ver, uid, tattr) continue # metadata if tname == "metadata": self.metadata_attr = tattr continue if tname in ["meta", "link"] or tname.startswith("dc:") and "metadata" in prefix: self.metadata.append((tname, tattr, tcontent)) if tattr.get("name","") == "cover": self.cover_id = tattr.get("content",None) continue # manifest if tname == "item" and prefix.endswith("manifest"): nid = "xid%03d" % cnt cnt += 1 id = tattr.pop("id", nid) href = tattr.pop("href",'') mtype = tattr.pop("media-type",'') if mtype == "text/html": mtype = "application/xhtml+xml" href = unquoteurl(href) properties = tattr.pop("properties",None) self.manifest_id_to_href[id] = href self.manifest_id_to_mime[id] = mtype self.href_to_manifest_id[href] = id self.manifest_id_to_properties[id] = properties continue # spine if tname == "spine": if tattr is not None: self.spine_ppd = tattr.get("page-progression-direction", None) continue if tname == "itemref" and prefix.endswith("spine"): idref = tattr.pop("idref", "") linear = tattr.pop("linear", None) properties = tattr.pop("properties", None) self.spine.append((idref, linear, properties)) continue # guide if tname == "reference" and prefix.endswith("guide"): type = tattr.pop("type",'') title = tattr.pop("title",'') href = unquoteurl(tattr.pop("href",'')) self.guide.append((type, title, href)) continue # bindings (stored but ignored for now) if tname in ["mediaType", "mediatype"] and prefix.endswith("bindings"): mtype = tattr.pop("media-type","") handler = tattr.pop("handler","") self.bindings.append((mtype, handler)) continue
def readotherfile(self, book_href): id = unicode_str(book_href) id = unquoteurl(id) if id is None: raise WrapperException('None is not a valid book href') if id not in self.other and id in self.id_to_href: raise WrapperException('Incorrect interface routine - use readfile') # handle special case of trying to read the opf after it has been modified if id == "OEBPS/content.opf": if id in self.modified: return self.build_opf() filepath = self.book_href_to_filepath.get(id, None) if filepath is None: raise WrapperException('Book href does not exist') basedir = self.ebook_root if id in self.added or id in self.modified: basedir = self.outdir filepath = os.path.join(basedir, filepath) if not unipath.exists(filepath): raise WrapperException('File Does Not Exist') basename = os.path.basename(filepath) ext = os.path.splitext(basename)[1] ext = ext.lower() mime = ext_mime_map.get(ext,"") data = b'' with open(filepath,'rb') as fp: data = fp.read() if mime in TEXT_MIMETYPES: data = unicode_str(data) return data
def getmime(self, href): href = unicode_str(href) href = unquoteurl(href) filename = os.path.basename(href) ext = os.path.splitext(filename)[1] ext = ext.lower() return ext_mime_map.get(ext, "")
def readotherfile(self, book_href): id = unicode_str(book_href) id = unquoteurl(id) if id is None: raise WrapperException('None is not a valid book href') if id not in self.other and id in self.id_to_href: raise WrapperException( 'Incorrect interface routine - use readfile') # handle special case of trying to read the opf after it has been modified if id == "OEBPS/content.opf": if id in self.modified: return self.build_opf() filepath = self.book_href_to_filepath.get(id, None) if filepath is None: raise WrapperException('Book href does not exist') basedir = self.ebook_root if id in self.added or id in self.modified: basedir = self.outdir filepath = os.path.join(basedir, filepath) if not unipath.exists(filepath): raise WrapperException('File Does Not Exist') basename = os.path.basename(filepath) ext = os.path.splitext(basename)[1] ext = ext.lower() mime = ext_mime_map.get(ext, "") data = b'' with open(filepath, 'rb') as fp: data = fp.read() if mime in TEXT_MIMETYPES: data = unicode_str(data) return data
def deleteotherfile(self, book_href): id = unicode_str(book_href) id = unquoteurl(id) if id is None: raise WrapperException('None is not a valid book hrefbook href') if id not in self.other and id in self.id_to_href: raise WrapperException( 'Incorrect interface routine - use deletefile') filepath = self.book_href_to_filepath.get(id, None) if filepath is None: raise WrapperException('Book href does not exist') if id in PROTECTED_FILES: raise WrapperException('attempt to delete protected file') add_to_deleted = True # if file was added or modified delete file from outdir if id in self.added or id in self.modified: filepath = os.path.join(self.outdir, filepath) if unipath.exists(filepath) and unipath.isfile(filepath): os.remove(filepath) if id in self.added: self.added.remove(id) add_to_deleted = False if id in self.other: self.other.remove(id) if id in self.modified: del self.modified[id] if add_to_deleted: self.deleted.append(('other', id, book_href)) del self.book_href_to_filepath[id]
def deleteotherfile(self, book_href): id = unicode_str(book_href) id = unquoteurl(id) if id is None: raise WrapperException('None is not a valid book hrefbook href') if id not in self.other and id in self.id_to_href: raise WrapperException('Incorrect interface routine - use deletefile') filepath = self.book_href_to_filepath.get(id, None) if filepath is None: raise WrapperException('Book href does not exist') if id in PROTECTED_FILES: raise WrapperException('attempt to delete protected file') add_to_deleted = True # if file was added or modified delete file from outdir if id in self.added or id in self.modified: filepath = os.path.join(self.outdir,filepath) if unipath.exists(filepath) and unipath.isfile(filepath): os.remove(filepath) if id in self.added: self.added.remove(id) add_to_deleted = False if id in self.other: self.other.remove(id) if id in self.modified: del self.modified[id] if add_to_deleted: self.deleted.append(('other', id, book_href)) del self.book_href_to_filepath[id]
def _parseData(self): for prefix, tname, tattr, tcontent in self._opf_tag_iter(): if self._debug: print (" Parsing OPF: ", prefix, tname, tattr, tcontent) # package if tname == "package": self.package_tag = [tname, tattr] # metadata if tname == "metadata": self.metadata_tag = [tname, tattr] if tname == "meta" or tname.startswith("dc:") and "metadata" in prefix: self.metadata.append([tname, tattr, tcontent]) if tattr.get("name","") == "cover": self.cover_id = tattr.get("content",None) # manifest if tname == "item" and prefix.endswith("manifest"): id = tattr.pop("id",'') href = tattr.pop("href",'') mtype = tattr.pop("media-type",'') if mtype == "text/html": mtype = "application/xhtml+xml" href = unquoteurl(href) self.manifest_id_to_href[id] = href self.manifest_id_to_mime[id] = mtype self.href_to_manifest_id[href] = id # spine if tname == "spine": if tattr is not None: self.spine_ppd = tattr.get("page-progression-direction", None) if tname == "itemref" and prefix.endswith("spine"): idref = tattr.pop("idref", None) linear = tattr.pop("linear", None) self.spine.append((idref,linear)) # ver 3 allows page properites per page # remove id since may be confusing # if "id" in tattr: # del tattr["id"] # if "properties in tattr: # self.spine_pageattributes[idref] = tattr # guide if tname == "reference" and prefix.endswith("guide"): type = tattr.pop("type",'') title = tattr.pop("title",'') href = unquoteurl(tattr.pop("href",'')) self.guide.append((type, title, href))
def _parseData(self): for prefix, tname, tattr, tcontent in self._opf_tag_iter(): if self._debug: print(" Parsing OPF: ", prefix, tname, tattr, tcontent) # package if tname == "package": self.package_tag = [tname, tattr] # metadata if tname == "metadata": self.metadata_tag = [tname, tattr] if tname == "meta" or tname.startswith( "dc:") and "metadata" in prefix: self.metadata.append([tname, tattr, tcontent]) if tattr.get("name", "") == "cover": self.cover_id = tattr.get("content", None) # manifest if tname == "item" and prefix.endswith("manifest"): id = tattr.pop("id", '') href = tattr.pop("href", '') mtype = tattr.pop("media-type", '') href = unquoteurl(href) self.manifest_id_to_href[id] = href self.manifest_id_to_mime[id] = mtype self.href_to_manifest_id[href] = id # spine if tname == "spine": if tattr is not None: self.spine_ppd = tattr.get("page-progession-direction", None) if tname == "itemref" and prefix.endswith("spine"): idref = tattr.pop("idref", None) linear = tattr.pop("linear", None) self.spine.append((idref, linear)) # ver 3 allows page properites per page # remove id since may be confusing # if "id" in tattr: # del tattr["id"] # if "properties in tattr: # self.spine_pageattributes[idref] = tattr # guide if tname == "reference" and prefix.endswith("guide"): type = tattr.pop("type", '') title = tattr.pop("title", '') href = unquoteurl(tattr.pop("href", '')) self.guide.append((type, title, href))
def setguide(self, new_guide): guide = [] for (type, title, href) in new_guide: type = unicode_str(type) title = unicode_str(title) href = unicode_str(href) href = unquoteurl(href) if type not in _guide_types: type = "other." + type if title is None: title = 'title missing' thref = href.split('#')[0] if thref not in self.href_to_id: raise WrapperException('guide href not in manifest') guide.append((type, title, href)) self.guide = guide self.modified['OEBPS/content.opf'] = 'file'
def writeotherfile(self, book_href, data): id = unicode_str(book_href) id = unquoteurl(id) if id in self.id_to_href: raise WrapperException('Incorrect interface routine - use writefile') filepath = self.id_to_filepath.get(id, None) if filepath is None: raise WrapperException('book href does not exist') if id in PROTECTED_FILES: raise WrapperException('Attempt to modify protected file') filepath = os.path.join(self.outdir, filepath) base = os.path.dirname(filepath) if not unipath.exists(base): os.makedirs(base) if isinstance(data, text_type): data = utf8_str(data) with open(filepath,'wb') as fp: fp.write(data) self.modified[id] = 'file'
def addotherfile(self, book_href, data): id = unicode_str(book_href) id = unquoteurl(id) if id in self.other: raise WrapperException('book href must be unquie') desired_path = id.replace("/", os.sep) filepath = os.path.join(self.outdir, desired_path) if unipath.isfile(filepath): raise WrapperException('desired path already exists') base = os.path.dirname(filepath) if not unipath.exists(base): os.makedirs(pathof(base)) if isinstance(data, text_type): data = utf8_str(data) with open(pathof(filepath), 'wb') as fp: fp.write(data) self.other.append(id) self.added.append(id) self.id_to_filepath[id] = desired_path
def addotherfile(self, book_href, data) : id = unicode_str(book_href) id = unquoteurl(id) if id in self.other: raise WrapperException('book href must be unquie') desired_path = id.replace("/",os.sep) filepath = os.path.join(self.outdir,desired_path) if unipath.isfile(filepath): raise WrapperException('desired path already exists') base = os.path.dirname(filepath) if not unipath.exists(base): os.makedirs(pathof(base)) if isinstance(data, text_type): data = utf8_str(data) with open(pathof(filepath),'wb')as fp: fp.write(data) self.other.append(id) self.added.append(id) self.id_to_filepath[id] = desired_path
def map_href_to_id(self, href, ow): href = unicode_str(href) href = unquoteurl(href) return self.href_to_id.get(href, ow)
def map_href_to_id(self, href, ow): href = unicode_str(href) href = unquoteurl(href) return self.href_to_id.get(href,ow)
def _parseData(self): cnt = 0 for prefix, tname, tattr, tcontent in self._opf_tag_iter(): if self._debug: print(" Parsing OPF: ", prefix, tname, tattr, tcontent) # package if tname == "package": ver = tattr.pop("version", "2.0") uid = tattr.pop("unique-identifier", "bookid") self.package = (ver, uid, tattr) continue # metadata if tname == "metadata": self.metadata_attr = tattr continue if tname in ["meta", "link" ] or tname.startswith("dc:") and "metadata" in prefix: self.metadata.append((tname, tattr, tcontent)) if tattr.get("name", "") == "cover": self.cover_id = tattr.get("content", None) continue # manifest if tname == "item" and prefix.endswith("manifest"): nid = "xid%03d" % cnt cnt += 1 id = tattr.pop("id", nid) href = tattr.pop("href", '') mtype = tattr.pop("media-type", '') if mtype == "text/html": mtype = "application/xhtml+xml" href = unquoteurl(href) properties = tattr.pop("properties", None) self.manifest_id_to_href[id] = href self.manifest_id_to_mime[id] = mtype self.href_to_manifest_id[href] = id self.manifest_id_to_properties[id] = properties continue # spine if tname == "spine": if tattr is not None: self.spine_ppd = tattr.get("page-progression-direction", None) continue if tname == "itemref" and prefix.endswith("spine"): idref = tattr.pop("idref", "") linear = tattr.pop("linear", None) properties = tattr.pop("properties", None) self.spine.append((idref, linear, properties)) continue # guide if tname == "reference" and prefix.endswith("guide"): type = tattr.pop("type", '') title = tattr.pop("title", '') href = unquoteurl(tattr.pop("href", '')) self.guide.append((type, title, href)) continue # bindings (stored but ignored for now) if tname in ["mediaTypes", "mediatypes" ] and prefix.endswith("bindings"): mtype = tattr.pop("media-type", "") handler = tattr.pop("handler", "") self.bindings.append((mtype, handler)) continue