def parseExceptionsFile(filename): safename = utf8_str(filename) words_list = [] snippet = min(32, os.path.getsize(pathof(safename))) raw = open(pathof(safename), 'rb').read(snippet) if raw.startswith(codecs.BOM_UTF8): enc = 'utf-8-sig' else: encodings = ['utf-8', 'utf-16' 'windows-1252', 'windows-1250'] for e in encodings: try: fh = file_open(pathof(safename), 'r', encoding=e) fh.readlines() fh.seek(0) except UnicodeDecodeError: print('Got unicode error with %s , trying different encoding' % e) else: break enc = e try: with file_open(pathof(safename), 'r', encoding=enc) as fd: words_list = [line.rstrip() for line in fd] # words_list = filter(None, words_list) words_list = [_f for _f in words_list if _f] print('Parsing apostrophe exception file %s' % filename) except: print('Error parsing apostrophe exception file %s: ignoring' % filename) words_list = [] return words_list
def processCRES(i, files, rscnames, sect, data, beg, rsc_ptr, use_hd): # extract an HDImage global DUMP data = data[12:] imgtype = get_image_type(None, data) if imgtype is None: print("Warning: CRES Section %s does not contain a recognised resource" % i) rscnames.append(None) sect.setsectiondescription(i,"Mysterious CRES data, first four bytes %s" % describe(data[0:4])) if DUMP: fname = "unknown%05d.dat" % i outname= os.path.join(files.outdir, fname) with open(pathof(outname), 'wb') as f: f.write(data) sect.setsectiondescription(i,"Mysterious CRES data, first four bytes %s extracting as %s" % (describe(data[0:4]), fname)) rsc_ptr += 1 return rscnames, rsc_ptr if use_hd: # overwrite corresponding lower res image with hd version imgname = rscnames[rsc_ptr] imgdest = files.imgdir else: imgname = "HDimage%05d.%s" % (i-beg+1, imgtype) imgdest = files.hdimgdir print("Extracting HD image: {0:s} from section {1:d}".format(imgname,i-beg+1)) outimg = os.path.join(imgdest, imgname) with open(pathof(outimg), 'wb') as f: f.write(data) rscnames.append(None) sect.setsectiondescription(i,"Optional HD Image {0:s}".format(imgname)) rsc_ptr += 1 return rscnames, rsc_ptr
def copy_book_contents_to(self, destdir): destdir = unicode_str(destdir) if destdir is None or not unipath.isdir(destdir): raise WrapperException('destination directory does not exist') for id in self.id_to_filepath: rpath = self.id_to_filepath[id] in_manifest = id in self.id_to_mime data = self.readfile(id) filepath = os.path.join(destdir,rpath) base = os.path.dirname(filepath) if not unipath.exists(base): os.makedirs(base) if isinstance(data,text_type): data = utf8_str(data) with open(pathof(filepath),'wb') as fp: fp.write(data) for id in self.book_href_to_filepath: rpath = self.book_href_to_filepath[id] data = self.readotherfile(id) filepath = os.path.join(destdir,rpath) base = os.path.dirname(filepath) if not unipath.exists(base): os.makedirs(base) if isinstance(data,text_type): data = utf8_str(data) with open(pathof(filepath),'wb') as fp: fp.write(data)
def copy_book_contents_to(self, destdir): destdir = unicode_str(destdir) if destdir is None or not unipath.isdir(destdir): raise WrapperException('destination directory does not exist') for id in self.id_to_filepath: rpath = self.id_to_filepath[id] in_manifest = id in self.id_to_mime data = self.readfile(id) filepath = os.path.join(destdir, rpath) base = os.path.dirname(filepath) if not unipath.exists(base): os.makedirs(base) if isinstance(data, text_type): data = utf8_str(data) with open(pathof(filepath), 'wb') as fp: fp.write(data) for id in self.book_href_to_filepath: rpath = self.book_href_to_filepath[id] data = self.readotherfile(id) filepath = os.path.join(destdir, rpath) base = os.path.dirname(filepath) if not unipath.exists(base): os.makedirs(base) if isinstance(data, text_type): data = utf8_str(data) with open(pathof(filepath), 'wb') as fp: fp.write(data)
def processImage(i, files, rscnames, sect, data, beg, rsc_ptr, cover_offset): global DUMP # Extract an Image imgtype = get_image_type(None, data) if imgtype is None: print("Warning: Section %s does not contain a recognised resource" % i) rscnames.append(None) sect.setsectiondescription(i,"Mysterious Section, first four bytes %s" % describe(data[0:4])) if DUMP: fname = "unknown%05d.dat" % i outname= os.path.join(files.outdir, fname) with open(pathof(outname), 'wb') as f: f.write(data) sect.setsectiondescription(i,"Mysterious Section, first four bytes %s extracting as %s" % (describe(data[0:4]), fname)) return rscnames, rsc_ptr imgname = "image%05d.%s" % (i-beg+1, imgtype) if cover_offset is not None and i == beg + cover_offset: imgname = "cover%05d.%s" % (i-beg+1, imgtype) print("Extracting image: {0:s} from section {1:d}".format(imgname,i-beg+1)) outimg = os.path.join(files.imgdir, imgname) with open(pathof(outimg), 'wb') as f: f.write(data) rscnames.append(imgname) sect.setsectiondescription(i,"Image {0:s}".format(imgname)) if rsc_ptr == -1: rsc_ptr = i - beg return rscnames, rsc_ptr
def __init__(self, ebook_root, outdir, op, plugin_dir, plugin_name, debug=False): self._debug = debug self.ebook_root = pathof(ebook_root) # plugins and plugin containers can get name and user plugin dir self.plugin_dir = pathof(plugin_dir) self.plugin_name = plugin_name self.outdir = pathof(outdir) # dictionaries used to map opf manifest information self.id_to_href = {} self.id_to_mime = {} self.href_to_id = {} self.spine_ppd = None self.spine = [] self.guide = [] self.package_tag = '' self.metadataxml = '' self.op = op if self.op is not None: # copy in data from parsing of initial opf self.opfname = op.opfname self.id_to_href = op.get_manifest_id_to_href_dict().copy() self.id_to_mime = op.get_manifest_id_to_mime_dict().copy() self.href_to_id = op.get_href_to_manifest_id_dict().copy() self.spine_ppd = op.get_spine_ppd() self.spine = op.get_spine() self.guide = op.get_guide() self.package_tag = op.get_package_tag() self.metadataxml = op.get_metadataxml() self.other = [] # non-manifest file information self.id_to_filepath = {} self.modified = {} self.added = [] self.deleted = [] # walk the ebook directory tree building up initial list of # all unmanifested (other) files for filepath in unipath.walk(ebook_root): book_href = filepath.replace(os.sep, "/") # OS X file names and paths use NFD form. The EPUB # spec requires all text including filenames to be in NFC form. book_href = unicodedata.normalize('NFC', book_href) # if book_href file in manifest convert to manifest id id = None if book_href.startswith('OEBPS/'): href = book_href[6:] id = self.href_to_id.get(href, None) if id is None: self.other.append(book_href) self.id_to_filepath[book_href] = filepath else: self.id_to_filepath[id] = filepath
def epub_zip_up_book_contents(ebook_path, epub_filepath): outzip = zipfile.ZipFile(pathof(epub_filepath), 'w') files = unipath.walk(ebook_path) if 'mimetype' in files: outzip.write(pathof(os.path.join(ebook_path, 'mimetype')), pathof('mimetype'), zipfile.ZIP_STORED) else: raise Exception('mimetype file is missing') files.remove('mimetype') for file in files: filepath = os.path.join(ebook_path, file) outzip.write(pathof(filepath),pathof(file),zipfile.ZIP_DEFLATED) outzip.close()
def writeXHTML(self): files = self.files cover_page = self.cover_page data = self.buildXHTML() outfile = os.path.join(files.k8text, cover_page) if os.path.exists(pathof(outfile)): print('Warning: {:s} already exists.'.format(cover_page)) os.remove(pathof(outfile)) with open(pathof(outfile), 'wb') as f: f.write(data.encode('utf-8')) return
def epub_zip_up_book_contents(ebook_path, epub_filepath): outzip = zipfile.ZipFile(pathof(epub_filepath), 'w') files = unipath.walk(ebook_path) if 'mimetype' in files: outzip.write(pathof(os.path.join(ebook_path, 'mimetype')), pathof('mimetype'), zipfile.ZIP_STORED) else: raise Exception('mimetype file is missing') files.remove('mimetype') for file in files: filepath = os.path.join(ebook_path, file) outzip.write(pathof(filepath), pathof(file), zipfile.ZIP_DEFLATED) outzip.close()
def writeOPF(self, has_obfuscated_fonts=False): if self.isK8: data = self.buildEPUBOPF(has_obfuscated_fonts) outopf = os.path.join(self.files.k8oebps, EPUB_OPF) with open(pathof(outopf), 'wb') as f: f.write(data.encode('utf-8')) return self.BookId else: data = self.buildMobi7OPF() outopf = os.path.join(self.files.mobi7dir, 'content.opf') with open(pathof(outopf), 'wb') as f: f.write(data.encode('utf-8')) return 0
def __init__(self, ebook_root, outdir, op, plugin_dir, plugin_name, debug=False): self._debug = debug self.ebook_root = pathof(ebook_root) # plugins and plugin containers can get name and user plugin dir self.plugin_dir = pathof(plugin_dir) self.plugin_name = plugin_name self.outdir = pathof(outdir) # dictionaries used to map opf manifest information self.id_to_href = {} self.id_to_mime = {} self.href_to_id = {} self.spine_ppd = None self.spine = [] self.guide = [] self.package_tag = "" self.metadataxml = "" self.op = op if self.op is not None: # copy in data from parsing of initial opf self.opfname = op.opfname self.id_to_href = op.get_manifest_id_to_href_dict().copy() self.id_to_mime = op.get_manifest_id_to_mime_dict().copy() self.href_to_id = op.get_href_to_manifest_id_dict().copy() self.spine_ppd = op.get_spine_ppd() self.spine = op.get_spine() self.guide = op.get_guide() self.package_tag = op.get_package_tag() self.metadataxml = op.get_metadataxml() self.other = [] # non-manifest file information self.id_to_filepath = {} self.modified = {} self.added = [] self.deleted = [] # walk the ebook directory tree building up initial list of # all unmanifested (other) files for filepath in unipath.walk(ebook_root): book_href = filepath.replace(os.sep, "/") # OS X file names and paths use NFD form. The EPUB # spec requires all text including filenames to be in NFC form. book_href = unicodedata.normalize("NFC", book_href) # if book_href file in manifest convert to manifest id id = None if book_href.startswith("OEBPS/"): href = book_href[6:] id = self.href_to_id.get(href, None) if id is None: self.other.append(book_href) self.id_to_filepath[book_href] = filepath else: self.id_to_filepath[id] = filepath
def quitApp(self): global prefs if self.edu_quotes.get() == 'q': self.gui_prefs['educateQuotes'] = 1 else: self.gui_prefs['educateQuotes'] = 0 self.gui_prefs['dashes'] = self.dashBox.current() if self.edu_ellipses.get() == 'e': self.gui_prefs['educateEllipses'] = 1 else: self.gui_prefs['educateEllipses'] = 0 self.gui_prefs['useFile'] = self.use_file.get() if len(self.cust_file_path.get()): self.gui_prefs['useFilePath'] = pathof(self.cust_file_path.get()) else: self.gui_prefs['useFilePath'] = '' self.gui_prefs['useUnicodeChars'] = self.unicodevar.get() self.misc_prefs['windowGeometry'] = self.parent.geometry() # copy preferences settings groups pack to global dict prefs['gui_selections'] = self.gui_prefs prefs['miscellaneous_settings'] = self.misc_prefs prefs['update_settings'] = self.update_prefs self.parent.destroy() self.quit()
def deletefile(self, id): id = unicode_str(id) filepath = self.id_to_filepath.get(id, None) if id is None: raise WrapperException('id does not exist in manifest') add_to_deleted = True # if file was added or modified, delete file from outdir if id in self.added or id in self.modified: filepath = os.path.join(self.outdir, filepath) if unipath.exists(filepath) and unipath.isfile(filepath): os.remove(pathof(filepath)) if id in self.added: self.added.remove(id) add_to_deleted = False if id in self.modified: del self.modified[id] # remove from manifest href = self.id_to_href[id] del self.id_to_href[id] del self.id_to_mime[id] del self.href_to_id[href] # remove from spine new_spine = [] was_modified = False for sid, linear in self.spine: if sid != id: new_spine.append((sid, linear)) else: was_modified = True if was_modified: setspine(new_spine) if add_to_deleted: self.deleted.append(id) self.modified['OEBPS/content.opf'] = 'file' del self.id_to_filepath[id]
def writeNAV(self, ncx_data, guidetext, metadata): # build the xhtml # print("Write Navigation Document.") xhtml = self.buildNAV(ncx_data, guidetext, metadata.get('Title')[0], metadata.get('Language')[0]) fname = os.path.join(self.files.k8text, self.navname) with open(pathof(fname), 'wb') as f: f.write(xhtml.encode('utf-8'))
def deletefile(self, id): id = unicode_str(id) filepath = self.id_to_filepath.get(id, None) if id is None: raise WrapperException('id does not exist in manifest') add_to_deleted = True # if file was added or modified, delete file from outdir if id in self.added or id in self.modified: filepath = os.path.join(self.outdir,filepath) if unipath.exists(filepath) and unipath.isfile(filepath): os.remove(pathof(filepath)) if id in self.added: self.added.remove(id) add_to_deleted = False if id in self.modified: del self.modified[id] # remove from manifest href = self.id_to_href[id] del self.id_to_href[id] del self.id_to_mime[id] del self.href_to_id[href] # remove from spine new_spine = [] was_modified = False for sid, linear in self.spine: if sid != id: new_spine.append((sid, linear)) else: was_modified = True if was_modified: setspine(new_spine) if add_to_deleted: self.deleted.append(id) self.modified['OEBPS/content.opf'] = 'file' del self.id_to_filepath[id]
def zipUpDir(self, myzip, tdir, localname, compress_type=zipfile.ZIP_DEFLATED): currentdir = tdir if localname != "": currentdir = os.path.join(currentdir, localname) list = unipath.listdir(currentdir) for file in list: afilename = file localfilePath = os.path.join(localname, afilename) realfilePath = os.path.join(currentdir, file) if unipath.isfile(realfilePath): myzip.write(pathof(realfilePath), pathof(localfilePath), compress_type) elif unipath.isdir(realfilePath): self.zipUpDir(myzip, tdir, localfilePath)
def write_opf(self): if self.op is not None: filepath = pathof(os.path.join(self.outdir, 'OEBPS', self.opfname)) base = os.path.dirname(filepath) if not unipath.exists(base): os.makedirs(base) with open(filepath, 'wb') as fp: data = utf8_str(self.build_opf()) fp.write(data)
def addotherfile(self, book_href, data) : id = unicode_str(book_href) if id in self.other: raise WrapperException('book href must be unquie') desired_path = id.replace("/",os.sep) filepath = os.path.join(self.outdir,desired_path) if unipath.isfile(filepath): raise WrapperException('desired path already exists') base = os.path.dirname(filepath) if not unipath.exists(base): os.makedirs(pathof(base)) if isinstance(data, text_type): data = utf8_str(data) with open(pathof(filepath),'wb')as fp: fp.write(data) self.other.append(id) self.added.append(id) self.id_to_filepath[id] = desired_path
def write_opf(self): if self.op is not None: filepath = pathof(os.path.join(self.outdir, 'OEBPS', self.opfname)) base = os.path.dirname(filepath) if not unipath.exists(base): os.makedirs(base) with open(filepath,'wb') as fp: data = utf8_str(self.build_opf()) fp.write(data)
def processSRCS(i, files, rscnames, sect, data): # extract the source zip archive and save it. print("File contains kindlegen source archive, extracting as %s" % KINDLEGENSRC_FILENAME) srcname = os.path.join(files.outdir, KINDLEGENSRC_FILENAME) with open(pathof(srcname), 'wb') as f: f.write(data[16:]) rscnames.append(None) sect.setsectiondescription(i,"Zipped Source Files") return rscnames
def processCMET(i, files, rscnames, sect, data): # extract the build log print("File contains kindlegen build log, extracting as %s" % KINDLEGENLOG_FILENAME) srcname = os.path.join(files.outdir, KINDLEGENLOG_FILENAME) with open(pathof(srcname), 'wb') as f: f.write(data[10:]) rscnames.append(None) sect.setsectiondescription(i,"Kindlegen log") return rscnames
def processPrintReplica(metadata, files, rscnames, mh): global DUMP global WRITE_RAW_DATA rawML = mh.getRawML() if DUMP or WRITE_RAW_DATA: outraw = os.path.join(files.outdir,files.getInputFileBasename() + '.rawpr') with open(pathof(outraw),'wb') as f: f.write(rawML) fileinfo = [] print("Print Replica ebook detected") try: numTables, = struct.unpack_from(b'>L', rawML, 0x04) tableIndexOffset = 8 + 4*numTables # for each table, read in count of sections, assume first section is a PDF # and output other sections as binary files for i in range(numTables): sectionCount, = struct.unpack_from(b'>L', rawML, 0x08 + 4*i) for j in range(sectionCount): sectionOffset, sectionLength, = struct.unpack_from(b'>LL', rawML, tableIndexOffset) tableIndexOffset += 8 pdf_fpath = u'' if j == 0: if azw2zip_cfg.isOutputPdf(): pdf_fpath = os.path.join(files.outdir, '..', azw2zip_cfg.makeOutputFileName(metadata) + ('.%03d.pdf' % (i+1))) entryName = os.path.join(files.outdir, files.getInputFileBasename() + ('.%03d.pdf' % (i+1))) else: entryName = os.path.join(files.outdir, files.getInputFileBasename() + ('.%03d.%03d.data' % ((i+1),j))) with open(pathof(entryName), 'wb') as f: f.write(rawML[sectionOffset:(sectionOffset+sectionLength)]) if pdf_fpath: with open(pathof(pdf_fpath), 'wb') as f: f.write(rawML[sectionOffset:(sectionOffset+sectionLength)]) except Exception as e: print('Error processing Print Replica: ' + str(e)) fileinfo.append([None,'', files.getInputFileBasename() + '.pdf']) usedmap = {} for name in rscnames: if name is not None: usedmap[name] = 'used' opf = OPFProcessor(files, metadata, fileinfo, rscnames, False, mh, usedmap) opf.writeOPF()
def write_opf(self): if self.op is not None: platpath = self.opfbookpath.replace('/', os.sep) filepath = pathof(os.path.join(self.outdir, platpath)) base = os.path.dirname(filepath) if not unipath.exists(base): os.makedirs(base) with open(filepath, 'wb') as fp: data = utf8_str(self.build_opf()) fp.write(data)
def writeK8NCX(self, ncx_data, metadata): # build the xml self.isNCX = True print("Write K8 ncx") xml = self.buildK8NCX(ncx_data, metadata['Title'][0], metadata['UniqueID'][0], metadata.get('Language')[0]) bname = 'toc.ncx' ncxname = os.path.join(self.files.k8oebps, bname) with open(pathof(ncxname), 'wb') as f: f.write(xml.encode('utf-8'))
def processUnknownSections(mh, sect, files, K8Boundary): global DUMP global TERMINATION_INDICATOR1 global TERMINATION_INDICATOR2 global TERMINATION_INDICATOR3 if DUMP: print("Unpacking any remaining unknown records") beg = mh.start end = sect.num_sections if beg < K8Boundary: # then we're processing the first part of a combination file end = K8Boundary for i in range(beg, end): if sect.sectiondescriptions[i] == "": data = sect.loadSection(i) type = data[0:4] if type == TERMINATION_INDICATOR3: description = "Termination Marker 3 Nulls" elif type == TERMINATION_INDICATOR2: description = "Termination Marker 2 Nulls" elif type == TERMINATION_INDICATOR1: description = "Termination Marker 1 Null" elif type == "INDX": fname = "Unknown%05d_INDX.dat" % i description = "Unknown INDX section" if DUMP: outname= os.path.join(files.outdir, fname) with open(pathof(outname), 'wb') as f: f.write(data) print("Extracting %s: %s from section %d" % (description, fname, i)) description = description + ", extracting as %s" % fname else: fname = "unknown%05d.dat" % i description = "Mysterious Section, first four bytes %s" % describe(data[0:4]) if DUMP: outname= os.path.join(files.outdir, fname) with open(pathof(outname), 'wb') as f: f.write(data) print("Extracting %s: %s from section %d" % (description, fname, i)) description = description + ", extracting as %s" % fname sect.setsectiondescription(i, description)
def get_image_type(imgname, imgdata=None): imgtype = unicode_str(imghdr.what(pathof(imgname), imgdata)) if imgtype == "jpeg": imgtype = "jpg" # imghdr only checks for JFIF or Exif JPEG files. Apparently, there are some # with only the magic JPEG bytes out there... # ImageMagick handles those, so, do it too. if imgtype is None: if imgdata is None: with open(pathof(imgname), 'rb') as f: imgdata = f.read() if imgdata[0:2] == b'\xFF\xD8': # Get last non-null bytes last = len(imgdata) while (imgdata[last - 1:last] == b'\x00'): last -= 1 # Be extra safe, check the trailing bytes, too. if imgdata[last - 2:last] == b'\xFF\xD9': imgtype = "jpg" return imgtype
def unzip_epub_to_dir(path_to_epub, destdir): f = open(pathof(path_to_epub), 'rb') sz = ZipFile(f) for name in sz.namelist(): data = sz.read(name) name = name.replace("/", os.sep) filepath = os.path.join(destdir, name) basedir = os.path.dirname(filepath) if not os.path.isdir(basedir): os.makedirs(basedir) with open(filepath, 'wb') as fp: fp.write(data) f.close()
def unzip_epub_to_dir(path_to_epub, destdir): f = open(pathof(path_to_epub), 'rb') sz = ZipFile(f) for name in sz.namelist(): data = sz.read(name) name = name.replace("/", os.sep) filepath = os.path.join(destdir,name) basedir = os.path.dirname(filepath) if not os.path.isdir(basedir): os.makedirs(basedir) with open(filepath,'wb') as fp: fp.write(data) f.close()
def writefile(self, id, data): id = unicode_str(id) filepath = self.id_to_filepath.get(id, None) if filepath is None: raise WrapperException('Id does not exist in manifest') mime = self.id_to_mime.get(id,'') filepath = os.path.join(self.outdir, filepath) base = os.path.dirname(filepath) if not unipath.exists(base): os.makedirs(pathof(base)) if mime.endswith('+xml') or isinstance(data, text_type): data = utf8_str(data) with open(filepath,'wb') as fp: fp.write(data) self.modified[id] = 'file'
def writeNCX(self, metadata): # build the xml self.isNCX = True print("Write ncx") # htmlname = os.path.basename(self.files.outbase) # htmlname += '.html' htmlname = 'book.html' xml = self.buildNCX(htmlname, metadata['Title'][0], metadata['UniqueID'][0], metadata.get('Language')[0]) # write the ncx file # ncxname = os.path.join(self.files.mobi7dir, self.files.getInputFileBasename() + '.ncx') ncxname = os.path.join(self.files.mobi7dir, 'toc.ncx') with open(pathof(ncxname), 'wb') as f: f.write(xml.encode('utf-8'))
def fileChooser(self): file_opt = {} file_opt['parent'] = None file_opt['title']= 'Select exception file' file_opt['defaultextension'] = '.txt' file_opt['initialdir'] = unicode_str(self.misc_prefs['lastDir'], 'utf-8') file_opt['multiple'] = False file_opt['filetypes'] = [('Text Files', '.txt'), ('All files', '.*')] inpath = tkinter_filedialog.askopenfilename(**file_opt) if len(inpath): self.cust_file_path.config(state="normal") self.cust_file_path.delete(0, tkinter_constants.END) self.cust_file_path.insert(0, os.path.normpath(inpath)) self.misc_prefs['lastDir'] = pathof(os.path.dirname(inpath)) self.cust_file_path.config(state="readonly")
def writefile(self, id, data): id = unicode_str(id) filepath = self.id_to_filepath.get(id, None) if filepath is None: raise WrapperException('Id does not exist in manifest') mime = self.id_to_mime.get(id, '') filepath = os.path.join(self.outdir, filepath) base = os.path.dirname(filepath) if not unipath.exists(base): os.makedirs(pathof(base)) if mime.endswith('+xml') or isinstance(data, text_type): data = utf8_str(data) with open(filepath, 'wb') as fp: fp.write(data) self.modified[id] = 'file'
def fileChooser(self): file_opt = {} file_opt['parent'] = None file_opt['title'] = 'Select exception file' file_opt['defaultextension'] = '.txt' file_opt['initialdir'] = unicode_str(self.misc_prefs['lastDir'], 'utf-8') file_opt['multiple'] = False file_opt['filetypes'] = [('Text Files', '.txt'), ('All files', '.*')] inpath = tkinter_filedialog.askopenfilename(**file_opt) if len(inpath): self.cust_file_path.config(state="normal") self.cust_file_path.delete(0, tkinter_constants.END) self.cust_file_path.insert(0, os.path.normpath(inpath)) self.misc_prefs['lastDir'] = pathof(os.path.dirname(inpath)) self.cust_file_path.config(state="readonly")
def deletefile(self, id): id = unicode_str(id) if id not in self.id_to_href: raise WrapperException('Id does not exist in manifest') filepath = self.id_to_filepath.get(id, None) if id is None: raise WrapperException('Id does not exist in manifest') if self.epub_version.startswith("2") and id == self.gettocid(): raise WrapperException('Can not add or remove an ncx under epub2') add_to_deleted = True # if file was added or modified, delete file from outdir if id in self.added or id in self.modified: filepath = os.path.join(self.outdir, filepath) if unipath.exists(filepath) and unipath.isfile(filepath): os.remove(pathof(filepath)) if id in self.added: self.added.remove(id) add_to_deleted = False if id in self.modified: del self.modified[id] # remove from manifest href = self.id_to_href[id] mime = self.id_to_mime[id] bookpath = self.id_to_bookpath[id] del self.id_to_href[id] del self.id_to_mime[id] del self.id_to_props[id] del self.id_to_fall[id] del self.id_to_over[id] del self.id_to_bookpath[id] del self.href_to_id[href] del self.bookpath_to_id[bookpath] # remove from spine new_spine = [] was_modified = False for sid, linear, properties in self.spine: if sid != id: new_spine.append((sid, linear, properties)) else: was_modified = True if was_modified: self.setspine_epub3(new_spine) if add_to_deleted: self.deleted.append(('manifest', id, bookpath)) self.modified[self.opfbookpath] = 'file' del self.id_to_filepath[id]
def processRESC(i, files, rscnames, sect, data, k8resc): global DUMP if DUMP: rescname = "RESC%05d.dat" % i print("Extracting Resource: ", rescname) outrsc = os.path.join(files.outdir, rescname) with open(pathof(outrsc), 'wb') as f: f.write(data) if True: # try: # parse the spine and metadata from RESC k8resc = K8RESCProcessor(data[16:], DUMP) else: # except: print("Warning: cannot extract information from RESC.") k8resc = None rscnames.append(None) sect.setsectiondescription(i,"K8 RESC section") return rscnames, k8resc
def __init__(self, filename): self.data = b'' with open(pathof(filename), 'rb') as f: self.data = f.read() self.palmheader = self.data[:78] self.palmname = self.data[:32] self.ident = self.palmheader[0x3C:0x3C + 8] self.num_sections, = struct.unpack_from(b'>H', self.palmheader, 76) self.filelength = len(self.data) sectionsdata = struct.unpack_from( bstr('>%dL' % (self.num_sections * 2)), self.data, 78) + (self.filelength, 0) self.sectionoffsets = sectionsdata[::2] self.sectionattributes = sectionsdata[1::2] self.sectiondescriptions = ["" for x in range(self.num_sections + 1)] self.sectiondescriptions[-1] = "File Length Only" return
def processFONT(i, files, rscnames, sect, data, obfuscate_data, beg, rsc_ptr): fontname = "font%05d" % i ext = '.dat' font_error = False font_data = data try: usize, fflags, dstart, xor_len, xor_start = struct.unpack_from(b'>LLLLL',data,4) except: print("Failed to extract font: {0:s} from section {1:d}".format(fontname,i)) font_error = True ext = '.failed' pass if not font_error: print("Extracting font:", fontname) font_data = data[dstart:] extent = len(font_data) extent = min(extent, 1040) if fflags & 0x0002: # obfuscated so need to de-obfuscate the first 1040 bytes key = bytearray(data[xor_start: xor_start+ xor_len]) buf = bytearray(font_data) for n in range(extent): buf[n] ^= key[n%xor_len] font_data = bytes(buf) if fflags & 0x0001: # ZLIB compressed data font_data = zlib.decompress(font_data) hdr = font_data[0:4] if hdr == b'\0\1\0\0' or hdr == b'true' or hdr == b'ttcf': ext = '.ttf' elif hdr == b'OTTO': ext = '.otf' else: print("Warning: unknown font header %s" % hexlify(hdr)) if (ext == '.ttf' or ext == '.otf') and (fflags & 0x0002): obfuscate_data.append(fontname + ext) fontname += ext outfnt = os.path.join(files.imgdir, fontname) with open(pathof(outfnt), 'wb') as f: f.write(font_data) rscnames.append(fontname) sect.setsectiondescription(i,"Font {0:s}".format(fontname)) if rsc_ptr == -1: rsc_ptr = i - beg return rscnames, obfuscate_data, rsc_ptr
def __init__(self, opf_path, opf_bookpath, debug=False): self._debug = debug opf_path = pathof(opf_path) self.opfname = os.path.basename(opf_path) self.opf_bookpath = opf_bookpath self.opf_dir = startingDir(opf_bookpath) self.opf = None with open(opf_path, 'rb') as fp: self.opf = fp.read().decode('utf-8') self.opos = 0 self.package = None self.metadata_attr = None self.metadata = [] self.cover_id = None # let downstream invert any invertable dictionaries when needed self.manifest_id_to_href = {} self.manifest_id_to_bookpath = {} # create non-invertable dictionaries self.manifest_id_to_mime = {} self.manifest_id_to_properties = {} self.manifest_id_to_fallback = {} self.manifest_id_to_overlay = {} # spine and guide self.spine = [] self.spine_ppd = None self.guide = [] self.bindings = [] # determine folder structure self.group_folder = {} self.group_count = {} self.group_folder["epub"] = ['META-INF'] self.group_count["epub"] = [1] self.group_folder["opf"] = [self.opf_dir] self.group_count["opf"] = [1] # self.bookpaths = [] # self.bookpaths.append(self.opf_bookpath) self._parseData()
def __init__(self, opf_path, debug = False): self._debug = debug opf_path = pathof(opf_path) self.opfname = os.path.basename(opf_path) self.opf = None with open(opf_path,'rb') as fp: self.opf = fp.read().decode('utf-8') self.opos = 0 self.package = None self.metadata_attr = None self.metadata = [] self.cover_id = None self.manifest_id_to_href = {} self.manifest_id_to_mime = {} self.href_to_manifest_id = {} self.manifest_id_to_properties = {} self.spine = [] self.spine_ppd = None self.guide = [] self.bindings = [] self._parseData()
def __init__(self, opf_path, debug = False): self._debug = debug opf_path = pathof(opf_path) self.opfname = os.path.basename(opf_path) self.opf = None with open(opf_path,'rb') as fp: self.opf = fp.read().decode('utf-8') self.opos = 0 self.package_tag = [None, None] # self.package_version = None self.metadata_tag = [None, None] self.metadata = [] self.cover_id = None self.manifest_id_to_href = {} self.manifest_id_to_mime = {} self.href_to_manifest_id ={} self.spine_ppd = None # self.spine_pageattributes = {} # self.spine_idrefs = {} self.spine = [] self.guide = [] self._parseData()
def run(bk): global prefs prefs = bk.getPrefs() # set default preference values if 'use_file_path' not in prefs: prefs['use_file_path'] = expanduser('~') if 'azw3_epub_version' not in prefs: prefs['azw3_epub_version'] = "2" # A, F, 2 or 3 if 'use_hd_images' not in prefs: prefs['use_hd_images'] = True if 'use_src_from_dual_mobi' not in prefs: prefs['use_src_from_dual_mobi'] = True if 'asin_for_kindlegen_plugin' not in prefs: prefs['asin_for_kindlegen_plugin'] = False if 'preserve_kindleunpack_meta' not in prefs: prefs['preserve_kindleunpack_meta'] = False if 'last_time_checked' not in prefs: prefs['last_time_checked'] = str(datetime.now() - timedelta(hours=7)) if 'last_online_version' not in prefs: prefs['last_online_version'] = '0.1.0' chk = UpdateChecker(prefs['last_time_checked'], prefs['last_online_version'], bk._w) update_available, online_version, time = chk.update_info() # update preferences with latest date/time/version prefs['last_time_checked'] = time if online_version is not None: prefs['last_online_version'] = online_version if update_available: title = 'Plugin Update Available' msg = 'Version {} of the {} plugin is now available.'.format(online_version, bk._w.plugin_name) update_msgbox(title, msg) if _DEBUG_: print('Python sys.path', sys.path) print('Default AZW3 epub version:', prefs['azw3_epub_version']) inpath = fileChooser() if inpath == '' or not os.path.exists(inpath): print('No input file selected!') bk.savePrefs(prefs) return 0 print ('Path to Kindlebook {0}'.format(inpath)) from mobi_stuff import mobiProcessor, topaz if topaz(inpath): print('Kindlebook is in Topaz format: can\'t open!') bk.savePrefs(prefs) return -1 mobionly = False mp = mobiProcessor(inpath, prefs['azw3_epub_version'], prefs['use_hd_images']) # Save last directory accessed to JSON prefs prefs['use_file_path'] = pathof(os.path.dirname(inpath)) if mp.isEncrypted: print('Kindlebook is encrypted: can\'t open!') bk.savePrefs(prefs) return -1 if mp.isPrintReplica: print('Kindlebook is a Print Replica: can\'t open!') bk.savePrefs(prefs) return -1 if not mp.isComboFile and not mp.isKF8: mobionly = True with make_temp_directory() as temp_dir: TWEAK = True asin = None if not mobionly: epub, opf, src = mp.unpackEPUB(temp_dir) if src is not None and isEPUB(src) and prefs['use_src_from_dual_mobi']: print ('Using included kindlegen sources.') epub = src else: # If user requested no tweaks through preferences, use standard epub from KindleUnpack if not prefs['asin_for_kindlegen_plugin'] and not prefs['preserve_kindleunpack_meta']: TWEAK = False elif prefs['asin_for_kindlegen_plugin']: if opf is not None: # Get asin from metadata and put it in a dc:meta that the Kindlegen plugin can use. asin = get_asin(opf) if asin is not None: asin = unicode_str(asin) else: TWEAK = False if TWEAK: # Modify the opf with the requested tweaks and build a new epub if tweak_opf(opf, asin, preserve_comments=prefs['preserve_kindleunpack_meta']): os.remove(epub) with temp_epub_handle(delete=False) as new_epub: epub_zip_up_book_contents(os.path.join(temp_dir,'mobi8'), new_epub) epub = new_epub else: from quickepub import QuickEpub mobidir, mobi_html, mobi_opf, mobiBaseName = mp.unpackMOBI(temp_dir) if not prefs['asin_for_kindlegen_plugin'] and not prefs['preserve_kindleunpack_meta']: TWEAK = False elif prefs['asin_for_kindlegen_plugin']: if mobi_opf is not None: # Get asin from metadata and put it in a dc:meta that the Kindlegen plugin can use. asin = get_asin(mobi_opf) if asin is not None: asin = unicode_str(asin) else: TWEAK = False if TWEAK: if not tweak_opf(mobi_opf, asin, preserve_comments=prefs['preserve_kindleunpack_meta']): print('OPF manipulation failed!') return -1 qe = QuickEpub(mobidir, mobi_html, mobi_opf) epub = qe.makeEPUB() # Save prefs to json bk.savePrefs(prefs) print ('Path to epub or src {0}'.format(epub)) with file_open(epub,'rb')as fp: data = fp.read() bk.addotherfile('dummy.epub', data) return 0
def run(bk): global prefs prefs = bk.getPrefs() # set default preference values if 'use_file_path' not in prefs: prefs['use_file_path'] = expanduser('~') if 'check_for_updates' not in prefs: prefs['check_for_updates'] = True if 'last_time_checked' not in prefs: prefs['last_time_checked'] = str(datetime.now() - timedelta(hours=delta+1)) if 'last_online_version' not in prefs: prefs['last_online_version'] = '0.1.0' if prefs['check_for_updates']: chk = UpdateChecker(prefs['last_time_checked'], prefs['last_online_version'], bk._w) update_available, online_version, time = chk.update_info() # update preferences with latest date/time/version prefs['last_time_checked'] = time if online_version is not None: prefs['last_online_version'] = online_version if update_available: title = 'Plugin Update Available' msg = 'Version {} of the {} plugin is now available.'.format(online_version, bk._w.plugin_name) show_msgbox(title, msg, 'info') if 'META-INF/{}'.format(XMLFILE) in bk._w.other: title = 'File Already Present!' msg = 'The {} file is already present. Please delete it before trying to add another'.format(XMLFILE) show_msgbox(title, msg, 'error') return 0 if _DEBUG_: print('Python sys.path: {}\n'.format(sys.path)) inpath = fileChooser() if inpath == '' or not os.path.exists(inpath): print('iBooks XML file selection canceled!') bk.savePrefs(prefs) return 0 if _DEBUG_: print('Path to XML file: {}\n'.format(inpath)) # Save last directory accessed to JSON prefs prefs['use_file_path'] = pathof(os.path.dirname(inpath)) # Save prefs to json bk.savePrefs(prefs) try: with file_open(inpath,'rb')as fp: data = fp.read() except: title = 'Unexpected error!' msg = 'Error reading the {} file. Perhaps it is corrupt or missing?'.format(XMLFILE) show_msgbox(title, msg, 'error') return -1 if _DEBUG_: print('Internal epub href: META-INF/{}\n'.format(XMLFILE)) bk.addotherfile('META-INF/{}'.format(XMLFILE), data) return 0
def __init__(self, ebook_root, outdir, op, plugin_dir, plugin_name, debug = False): self._debug = debug self.ebook_root = pathof(ebook_root) # plugins and plugin containers can get name and user plugin dir self.plugin_dir = pathof(plugin_dir) self.plugin_name = plugin_name self.outdir = pathof(outdir) # initialize the sigil cofiguration info passed in outdir with sigil.cfg self.appdir = None self.usrsupdir = None self.selected = [] cfg = '' with open(os.path.join(self.outdir, 'sigil.cfg'), 'rb') as f: cfg = f.read().decode('utf-8') cfg = cfg.replace("\r", "") cfg_lst = cfg.split("\n") if len(cfg_lst) >= 2: self.appdir = cfg_lst.pop(0) self.usrsupdir = cfg_lst.pop(0) self.selected = cfg_lst os.environ['SigilGumboLibPath'] = self.get_gumbo_path() # dictionaries used to map opf manifest information self.id_to_href = {} self.id_to_mime = {} self.href_to_id = {} self.id_to_props = {} self.id_to_fall = {} self.id_to_over = {} self.spine_ppd = None self.spine = [] self.guide = [] self.package_tag = None self.epub_version = None # self.metadata_attr = None # self.metadata = [] self.metadataxml = '' self.op = op if self.op is not None: # copy in data from parsing of initial opf self.opfname = op.opfname self.id_to_href = op.get_manifest_id_to_href_dict().copy() self.id_to_mime = op.get_manifest_id_to_mime_dict().copy() self.href_to_id = op.get_href_to_manifest_id_dict().copy() self.id_to_props = op.get_manifest_id_to_properties_dict().copy() self.id_to_fall = op.get_manifest_id_to_fallback_dict().copy() self.id_to_over = op.get_manifest_id_to_overlay_dict().copy() self.spine_ppd = op.get_spine_ppd() self.spine = op.get_spine() self.guide = op.get_guide() self.package_tag = op.get_package_tag() self.epub_version = op.get_epub_version() # self.metadata = op.get_metadata() # self.metadata_attr = op.get_metadata_attr() self.metadataxml = op.get_metadataxml() self.other = [] # non-manifest file information self.id_to_filepath = {} self.modified = {} self.added = [] self.deleted = [] # walk the ebook directory tree building up initial list of # all unmanifested (other) files for filepath in unipath.walk(ebook_root): book_href = filepath.replace(os.sep, "/") # OS X file names and paths use NFD form. The EPUB # spec requires all text including filenames to be in NFC form. book_href = unicodedata.normalize('NFC', book_href) # if book_href file in manifest convert to manifest id id = None if book_href.startswith('OEBPS/'): href = book_href[6:] id = self.href_to_id.get(href,None) if id is None: self.other.append(book_href) self.id_to_filepath[book_href] = filepath else: self.id_to_filepath[id] = filepath