def parseExceptionsFile(filename): safename = utf8_str(filename) words_list = [] snippet = min(32, os.path.getsize(pathof(safename))) raw = open(pathof(safename), 'rb').read(snippet) if raw.startswith(codecs.BOM_UTF8): enc = 'utf-8-sig' else: encodings = ['utf-8', 'utf-16' 'windows-1252', 'windows-1250'] for e in encodings: try: fh = file_open(pathof(safename), 'r', encoding=e) fh.readlines() fh.seek(0) except UnicodeDecodeError: print('Got unicode error with %s , trying different encoding' % e) else: break enc = e try: with file_open(pathof(safename), 'r', encoding=enc) as fd: words_list = [line.rstrip() for line in fd] # words_list = filter(None, words_list) words_list = [_f for _f in words_list if _f] print('Parsing apostrophe exception file %s' % filename) except: print('Error parsing apostrophe exception file %s: ignoring' % filename) words_list = [] return words_list
def __init__(self, filename, out_enc): self.base_css_rules = 'blockquote { margin: 0em 0em 0em 1.25em }\n' self.base_css_rules += 'p { margin: 0em }\n' self.base_css_rules += '.bold { font-weight: bold }\n' self.base_css_rules += '.italic { font-style: italic }\n' self.base_css_rules += '.mbp_pagebreak { page-break-after: always; margin: 0; display: block }\n' self.tag_css_rules = {} self.tag_css_rule_cnt = 0 self.path = [] self.filename = filename if out_enc is not None: try: self.wipml = file_open(self.filename, 'r', encoding=out_enc).read() except UnicodeDecodeError: out_enc = None if out_enc is None: encodings = ['utf-8', 'windows-1252'] for enc in encodings: try: self.wipml = file_open(self.filename, 'r', encoding=enc).read() print ('Guessing markup character encoding') break except UnicodeDecodeError: next if out_enc is None: raise self.pos = 0 self.opfname = self.filename.rsplit('.',1)[0] + '.opf' self.opos = 0 self.meta = '' self.cssname = os.path.join(os.path.dirname(self.filename),'styles.css') self.current_font_size = 3 self.font_history = []
def makeEPUB(self): out_enc = find_output_encoding(self.opffile) print('Markup encoded as:', out_enc) ml2html = MobiMLConverter(self.htmlfile, out_enc) xhtmlstr, css, cssname = ml2html.processml() soup = gumbo_bs4.parse(xhtmlstr) xhtmlstr = soup.prettyprint_xhtml() file_open(self.htmlfile, 'wb').write(xhtmlstr.encode('utf-8')) if has_cssutils: sheet = cssutils.parseString(css) cssutils.ser.prefs.indent = 2 * ' ' cssutils.ser.prefs.indentClosingBrace = False cssutils.ser.prefs.omitLastSemicolon = False css = unicode_str(sheet.cssText) file_open(cssname, 'wb').write(css.encode('utf-8')) with file_open(self.opffile, 'r', encoding='utf-8') as fp: newopf = '' for line in fp: if line.startswith('<item'): if line.find('text/x-oeb1-document'): line = line.replace('text/x-oeb1-document', 'application/xhtml+xml') if line.find('text/html'): line = line.replace('text/html', 'application/xhtml+xml') newopf += line if line.startswith('<manifest>'): newopf += '<item id="css_file" media-type="text/css" href="styles.css" />\n' file_open(self.opffile, 'wb').write(newopf.encode('utf-8')) outzip = zipfile.ZipFile(self.epubname, 'w') # add the mimetype file uncompressed mimetype = 'application/epub+zip' fileout = os.path.join(self.outdir, 'mimetype') file_open(fileout, 'wb').write(mimetype.encode('utf-8')) nzinfo = ZipInfo('mimetype', compress_type=zipfile.ZIP_STORED) outzip.writestr(nzinfo, mimetype) self.zipUpDir(outzip, self.outdir, 'META-INF') if os.path.exists(os.path.join(self.outdir, 'Images')): self.removeThumbnailImage(os.path.join(self.outdir, 'Images')) self.zipUpDir(outzip, self.outdir, 'Images') outzip.write(self.htmlfile, os.path.basename(self.htmlfile), zipfile.ZIP_DEFLATED) outzip.write(self.opffile, os.path.basename(self.opffile), zipfile.ZIP_DEFLATED) outzip.write(cssname, 'styles.css', zipfile.ZIP_DEFLATED) if os.path.exists(os.path.join(self.outdir, 'toc.ncx')): outzip.write(os.path.join(self.outdir, 'toc.ncx'), 'toc.ncx', zipfile.ZIP_DEFLATED) outzip.close() return self.epubname
def __init__(self, outdir, htmlfile, opffile): self.outdir, self.htmlfile, self.opffile = outdir, htmlfile, opffile self.epubname = os.path.join(outdir,'new.epub') self.metainf = os.path.join(outdir,'META-INF') if not os.path.exists(self.metainf): os.mkdir(self.metainf) container = '<?xml version="1.0" encoding="UTF-8"?>\n' container += '<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">\n' container += ' <rootfiles>\n' container += '<rootfile full-path="{0}" media-type="application/oebps-package+xml"/>'.format(os.path.basename(self.opffile)) container += ' </rootfiles>\n</container>\n' fileout = os.path.join(self.metainf,'container.xml') file_open(fileout,'wb').write(container.encode('utf-8'))
def __init__(self, outdir, htmlfile, opffile): self.outdir, self.htmlfile, self.opffile = outdir, htmlfile, opffile self.epubname = os.path.join(outdir, 'new.epub') self.metainf = os.path.join(outdir, 'META-INF') if not os.path.exists(self.metainf): os.mkdir(self.metainf) container = '<?xml version="1.0" encoding="UTF-8"?>\n' container += '<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">\n' container += ' <rootfiles>\n' container += '<rootfile full-path="{0}" media-type="application/oebps-package+xml"/>'.format( os.path.basename(self.opffile)) container += ' </rootfiles>\n</container>\n' fileout = os.path.join(self.metainf, 'container.xml') file_open(fileout, 'wb').write(container.encode('utf-8'))
def makeEPUB(self): out_enc = find_output_encoding(self.opffile) print ('Markup encoded as:', out_enc) ml2html = MobiMLConverter(self.htmlfile, out_enc) xhtmlstr, css, cssname = ml2html.processml() file_open(self.htmlfile,'wb').write(xhtmlstr.encode('utf-8')) if has_cssutils: sheet = cssutils.parseString(css) cssutils.ser.prefs.indent = 2*' ' cssutils.ser.prefs.indentClosingBrace = False cssutils.ser.prefs.omitLastSemicolon = False css = unicode_str(sheet.cssText) file_open(cssname,'wb').write(css.encode('utf-8')) with file_open(self.opffile, 'r', encoding='utf-8') as fp: newopf = '' for line in fp: if line.startswith('<item'): if line.find('text/x-oeb1-document'): line = line.replace('text/x-oeb1-document', 'application/xhtml+xml') if line.find('text/html'): line = line.replace('text/html', 'application/xhtml+xml') newopf += line if line.startswith('<manifest>'): newopf += '<item id="css_file" media-type="text/css" href="styles.css" />\n' file_open(self.opffile,'wb').write(newopf.encode('utf-8')) outzip = zipfile.ZipFile(self.epubname, 'w') # add the mimetype file uncompressed mimetype = 'application/epub+zip' fileout = os.path.join(self.outdir,'mimetype') file_open(fileout,'wb').write(mimetype.encode('utf-8')) nzinfo = ZipInfo('mimetype', compress_type=zipfile.ZIP_STORED) outzip.writestr(nzinfo, mimetype) self.zipUpDir(outzip,self.outdir,'META-INF') if os.path.exists(os.path.join(self.outdir,'Images')): self.zipUpDir(outzip,self.outdir,'Images') outzip.write(self.htmlfile, os.path.basename(self.htmlfile), zipfile.ZIP_DEFLATED) outzip.write(self.opffile, os.path.basename(self.opffile), zipfile.ZIP_DEFLATED) outzip.write(cssname, 'styles.css', zipfile.ZIP_DEFLATED) if os.path.exists(os.path.join(self.outdir, 'toc.ncx')): outzip.write(os.path.join(self.outdir, 'toc.ncx'), 'toc.ncx', zipfile.ZIP_DEFLATED) outzip.close() return self.epubname
def __init__(self, filename, out_enc): self.base_css_rules = 'blockquote { margin: 0em 0em 0em 1.25em }\n' self.base_css_rules += 'p { margin: 0em }\n' self.base_css_rules += '.bold { font-weight: bold }\n' self.base_css_rules += '.italic { font-style: italic }\n' self.base_css_rules += '.mbp_pagebreak { page-break-after: always; margin: 0; display: block }\n' self.tag_css_rules = {} self.tag_css_rule_cnt = 0 self.path = [] self.filename = filename if out_enc is not None: try: self.wipml = file_open(self.filename, 'r', encoding=out_enc).read() except UnicodeDecodeError: out_enc = None if out_enc is None: encodings = ['utf-8', 'windows-1252'] for enc in encodings: try: self.wipml = file_open(self.filename, 'r', encoding=enc).read() print('Guessing markup character encoding') break except UnicodeDecodeError: next if out_enc is None: raise self.pos = 0 self.opfname = self.filename.rsplit('.', 1)[0] + '.opf' self.opos = 0 self.meta = '' self.cssname = os.path.join(os.path.dirname(self.filename), 'styles.css') self.current_font_size = 3 self.font_history = []
def run(bk): global prefs global GUI if bk.launcher_version() >= 20170115: GUI = 'pyqt' else: GUI = 'tkinter' prefs = bk.getPrefs() # set default preference values if 'use_file_path' not in prefs: prefs['use_file_path'] = expanduser('~') if 'azw3_epub_version' not in prefs: prefs['azw3_epub_version'] = "2" # A, F, 2 or 3 if 'use_hd_images' not in prefs: prefs['use_hd_images'] = True if 'use_src_from_dual_mobi' not in prefs: prefs['use_src_from_dual_mobi'] = True if 'asin_for_kindlegen_plugin' not in prefs: prefs['asin_for_kindlegen_plugin'] = False if 'preserve_kindleunpack_meta' not in prefs: prefs['preserve_kindleunpack_meta'] = False if 'last_time_checked' not in prefs: prefs['last_time_checked'] = str(datetime.now() - timedelta(hours=7)) if 'last_online_version' not in prefs: prefs['last_online_version'] = '0.1.0' chk = UpdateChecker(prefs['last_time_checked'], prefs['last_online_version'], bk._w) update_available, online_version, time = chk.update_info() # update preferences with latest date/time/version prefs['last_time_checked'] = time if online_version is not None: prefs['last_online_version'] = online_version if update_available: title = 'Plugin Update Available' msg = 'Version {} of the {} plugin is now available.'.format(online_version, bk._w.plugin_name) # update_msgbox(title, msg) update_msgbox(title, msg, bk, GUI) if _DEBUG_: print('Python sys.path', sys.path) print('Default AZW3 epub version:', prefs['azw3_epub_version']) # inpath = fileChooser() inpath = fileChooser(prefs['use_file_path'], bk, GUI) if inpath == '' or not os.path.exists(inpath): print('No input file selected!') bk.savePrefs(prefs) return 0 print('Path to Kindlebook {0}'.format(inpath)) from mobi_stuff import mobiProcessor, topaz if topaz(inpath): print('Kindlebook is in Topaz format: can\'t open!') bk.savePrefs(prefs) return -1 mobionly = False mp = mobiProcessor(inpath, prefs['azw3_epub_version'], prefs['use_hd_images']) # Save last directory accessed to JSON prefs prefs['use_file_path'] = pathof(os.path.dirname(inpath)) if mp.isEncrypted: print('Kindlebook is encrypted: can\'t open!') bk.savePrefs(prefs) return -1 if mp.isPrintReplica: print('Kindlebook is a Print Replica: can\'t open!') bk.savePrefs(prefs) return -1 if not mp.isComboFile and not mp.isKF8: mobionly = True with make_temp_directory() as temp_dir: TWEAK = True asin = None if not mobionly: epub, opf, src = mp.unpackEPUB(temp_dir) if src is not None and isEPUB(src) and prefs['use_src_from_dual_mobi']: print('Using included kindlegen sources.') epub = src else: # If user requested no tweaks through preferences, use standard epub from KindleUnpack if not prefs['asin_for_kindlegen_plugin'] and not prefs['preserve_kindleunpack_meta']: TWEAK = False elif prefs['asin_for_kindlegen_plugin']: if opf is not None: # Get asin from metadata and put it in a dc:meta that the Kindlegen plugin can use. asin = get_asin(opf) if asin is not None: asin = unicode_str(asin) else: TWEAK = False if TWEAK: # Modify the opf with the requested tweaks and build a new epub if tweak_opf(opf, asin, epub_version=prefs['azw3_epub_version'], preserve_comments=prefs['preserve_kindleunpack_meta']): os.remove(epub) with temp_epub_handle(delete=False) as new_epub: epub_zip_up_book_contents(os.path.join(temp_dir,'mobi8'), new_epub) epub = new_epub else: from quickepub import QuickEpub mobidir, mobi_html, mobi_opf, mobiBaseName = mp.unpackMOBI(temp_dir) if not prefs['asin_for_kindlegen_plugin'] and not prefs['preserve_kindleunpack_meta']: TWEAK = False elif prefs['asin_for_kindlegen_plugin']: if mobi_opf is not None: # Get asin from metadata and put it in a dc:meta that the Kindlegen plugin can use. asin = get_asin(mobi_opf) if asin is not None: asin = unicode_str(asin) else: TWEAK = False if TWEAK: if not tweak_opf(mobi_opf, asin, preserve_comments=prefs['preserve_kindleunpack_meta']): print('OPF manipulation failed!') return -1 qe = QuickEpub(mobidir, mobi_html, mobi_opf) epub = qe.makeEPUB() # Save prefs to json bk.savePrefs(prefs) print('Path to epub or src {0}'.format(epub)) with file_open(epub,'rb')as fp: data = fp.read() bk.addotherfile('dummy.epub', data) return 0
def run(bk): global prefs prefs = bk.getPrefs() # set default preference values if 'use_file_path' not in prefs: prefs['use_file_path'] = expanduser('~') if 'check_for_updates' not in prefs: prefs['check_for_updates'] = True if 'last_time_checked' not in prefs: prefs['last_time_checked'] = str(datetime.now() - timedelta(hours=delta+1)) if 'last_online_version' not in prefs: prefs['last_online_version'] = '0.1.0' if prefs['check_for_updates']: chk = UpdateChecker(prefs['last_time_checked'], prefs['last_online_version'], bk._w) update_available, online_version, time = chk.update_info() # update preferences with latest date/time/version prefs['last_time_checked'] = time if online_version is not None: prefs['last_online_version'] = online_version if update_available: title = 'Plugin Update Available' msg = 'Version {} of the {} plugin is now available.'.format(online_version, bk._w.plugin_name) show_msgbox(title, msg, 'info') if 'META-INF/{}'.format(XMLFILE) in bk._w.other: title = 'File Already Present!' msg = 'The {} file is already present. Please delete it before trying to add another'.format(XMLFILE) show_msgbox(title, msg, 'error') return 0 if _DEBUG_: print('Python sys.path: {}\n'.format(sys.path)) inpath = fileChooser() if inpath == '' or not os.path.exists(inpath): print('iBooks XML file selection canceled!') bk.savePrefs(prefs) return 0 if _DEBUG_: print('Path to XML file: {}\n'.format(inpath)) # Save last directory accessed to JSON prefs prefs['use_file_path'] = pathof(os.path.dirname(inpath)) # Save prefs to json bk.savePrefs(prefs) try: with file_open(inpath,'rb')as fp: data = fp.read() except: title = 'Unexpected error!' msg = 'Error reading the {} file. Perhaps it is corrupt or missing?'.format(XMLFILE) show_msgbox(title, msg, 'error') return -1 if _DEBUG_: print('Internal epub href: META-INF/{}\n'.format(XMLFILE)) bk.addotherfile('META-INF/{}'.format(XMLFILE), data) return 0
def run(bk): global prefs prefs = bk.getPrefs() # set default preference values if 'use_file_path' not in prefs: prefs['use_file_path'] = expanduser('~') if 'check_for_updates' not in prefs: prefs['check_for_updates'] = True if 'last_time_checked' not in prefs: prefs['last_time_checked'] = str(datetime.now() - timedelta(hours=delta + 1)) if 'last_online_version' not in prefs: prefs['last_online_version'] = '0.1.0' if prefs['check_for_updates']: chk = UpdateChecker(prefs['last_time_checked'], prefs['last_online_version'], bk._w) update_available, online_version, time = chk.update_info() # update preferences with latest date/time/version prefs['last_time_checked'] = time if online_version is not None: prefs['last_online_version'] = online_version if update_available: title = 'Plugin Update Available' msg = 'Version {} of the {} plugin is now available.'.format( online_version, bk._w.plugin_name) show_msgbox(title, msg, 'info') if 'META-INF/{}'.format(XMLFILE) in bk._w.other: title = 'File Already Present!' msg = 'The {} file is already present. Please delete it before trying to add another'.format( XMLFILE) show_msgbox(title, msg, 'error') return 0 if _DEBUG_: print('Python sys.path: {}\n'.format(sys.path)) inpath = fileChooser() if inpath == '' or not os.path.exists(inpath): print('iBooks XML file selection canceled!') bk.savePrefs(prefs) return 0 if _DEBUG_: print('Path to XML file: {}\n'.format(inpath)) # Save last directory accessed to JSON prefs prefs['use_file_path'] = pathof(os.path.dirname(inpath)) # Save prefs to json bk.savePrefs(prefs) try: with file_open(inpath, 'rb') as fp: data = fp.read() except: title = 'Unexpected error!' msg = 'Error reading the {} file. Perhaps it is corrupt or missing?'.format( XMLFILE) show_msgbox(title, msg, 'error') return -1 if _DEBUG_: print('Internal epub href: META-INF/{}\n'.format(XMLFILE)) bk.addotherfile('META-INF/{}'.format(XMLFILE), data) return 0
def run(bk): global prefs prefs = bk.getPrefs() # set default preference values if 'use_file_path' not in prefs: prefs['use_file_path'] = expanduser('~') if 'azw3_epub_version' not in prefs: prefs['azw3_epub_version'] = "2" # A, F, 2 or 3 if 'use_hd_images' not in prefs: prefs['use_hd_images'] = True if 'use_src_from_dual_mobi' not in prefs: prefs['use_src_from_dual_mobi'] = True if 'asin_for_kindlegen_plugin' not in prefs: prefs['asin_for_kindlegen_plugin'] = False if 'preserve_kindleunpack_meta' not in prefs: prefs['preserve_kindleunpack_meta'] = False if 'last_time_checked' not in prefs: prefs['last_time_checked'] = str(datetime.now() - timedelta(hours=7)) if 'last_online_version' not in prefs: prefs['last_online_version'] = '0.1.0' chk = UpdateChecker(prefs['last_time_checked'], prefs['last_online_version'], bk._w) update_available, online_version, time = chk.update_info() # update preferences with latest date/time/version prefs['last_time_checked'] = time if online_version is not None: prefs['last_online_version'] = online_version if update_available: title = 'Plugin Update Available' msg = 'Version {} of the {} plugin is now available.'.format(online_version, bk._w.plugin_name) update_msgbox(title, msg) if _DEBUG_: print('Python sys.path', sys.path) print('Default AZW3 epub version:', prefs['azw3_epub_version']) inpath = fileChooser() if inpath == '' or not os.path.exists(inpath): print('No input file selected!') bk.savePrefs(prefs) return 0 print ('Path to Kindlebook {0}'.format(inpath)) from mobi_stuff import mobiProcessor, topaz if topaz(inpath): print('Kindlebook is in Topaz format: can\'t open!') bk.savePrefs(prefs) return -1 mobionly = False mp = mobiProcessor(inpath, prefs['azw3_epub_version'], prefs['use_hd_images']) # Save last directory accessed to JSON prefs prefs['use_file_path'] = pathof(os.path.dirname(inpath)) if mp.isEncrypted: print('Kindlebook is encrypted: can\'t open!') bk.savePrefs(prefs) return -1 if mp.isPrintReplica: print('Kindlebook is a Print Replica: can\'t open!') bk.savePrefs(prefs) return -1 if not mp.isComboFile and not mp.isKF8: mobionly = True with make_temp_directory() as temp_dir: TWEAK = True asin = None if not mobionly: epub, opf, src = mp.unpackEPUB(temp_dir) if src is not None and isEPUB(src) and prefs['use_src_from_dual_mobi']: print ('Using included kindlegen sources.') epub = src else: # If user requested no tweaks through preferences, use standard epub from KindleUnpack if not prefs['asin_for_kindlegen_plugin'] and not prefs['preserve_kindleunpack_meta']: TWEAK = False elif prefs['asin_for_kindlegen_plugin']: if opf is not None: # Get asin from metadata and put it in a dc:meta that the Kindlegen plugin can use. asin = get_asin(opf) if asin is not None: asin = unicode_str(asin) else: TWEAK = False if TWEAK: # Modify the opf with the requested tweaks and build a new epub if tweak_opf(opf, asin, preserve_comments=prefs['preserve_kindleunpack_meta']): os.remove(epub) with temp_epub_handle(delete=False) as new_epub: epub_zip_up_book_contents(os.path.join(temp_dir,'mobi8'), new_epub) epub = new_epub else: from quickepub import QuickEpub mobidir, mobi_html, mobi_opf, mobiBaseName = mp.unpackMOBI(temp_dir) if not prefs['asin_for_kindlegen_plugin'] and not prefs['preserve_kindleunpack_meta']: TWEAK = False elif prefs['asin_for_kindlegen_plugin']: if mobi_opf is not None: # Get asin from metadata and put it in a dc:meta that the Kindlegen plugin can use. asin = get_asin(mobi_opf) if asin is not None: asin = unicode_str(asin) else: TWEAK = False if TWEAK: if not tweak_opf(mobi_opf, asin, preserve_comments=prefs['preserve_kindleunpack_meta']): print('OPF manipulation failed!') return -1 qe = QuickEpub(mobidir, mobi_html, mobi_opf) epub = qe.makeEPUB() # Save prefs to json bk.savePrefs(prefs) print ('Path to epub or src {0}'.format(epub)) with file_open(epub,'rb')as fp: data = fp.read() bk.addotherfile('dummy.epub', data) return 0