def zip_resume(chemin, epub): job.log('\t rezippe le résumé') part = epub.rpartition('-') if part[1]: epubresume = part[0] + '%Resume% ' + part[1] + part[2][0:-5] + '.epub' else: epubresume = part[2][0:-5] + '_%Resume%.epub' zepub = zipfile.ZipFile(epubresume, mode="w", compression=zipfile.ZIP_DEFLATED, allowZip64=True) # d'abord le mimetype non compressé. zepub.write(os.path.join(chemin, "mimetype"), arcname="mimetype", compress_type=zipfile.ZIP_STORED) # puis les autres fichiers exclude_files = ['.DS_Store', 'mimetype'] for root, _dirs, files in os.walk(chemin): for fn in files: if fn in exclude_files: continue absfn = os.path.join(root, fn) zfn = os.path.relpath(absfn, chemin).replace(os.sep, '/') zepub.write(absfn, zfn) zepub.close() try: shutil.rmtree(chemin) except: pass return
def modif_tatoo_html(monepub): for identifier in monepub.opf.manifest: item = monepub.opf.manifest[identifier] path = os.path.join(monepub.chemin, monepub.content_path) + '\\' + item.href path = os.path.normpath(urllib.parse.unquote(path)) if item.href.endswith(('html', 'htm', 'opf')): data = fonctions.parse_file(path) if data is None: job.log('\t Fichier ' + os.path.basename(path) + ' malformé, impossible à parser') continue try: # suppression des commentaires for elem in data.iter(tag=etree.Comment): if elem.text: job.logtat('suppression des commentaires html :' + str(elem.text)) tail = elem.tail parent = elem.getparent() parent.remove(elem) parent.text = tail para = XPath('.//h:p') # suppression des paragraphes contenant un mail for child in para(data): if child.text is not None and re.search('[@].*[.][a-z]{2,3}', child.text) and len(child.text) < 100 : job.logtat('suppression du paragraphe : ' + str(child.text)) child.getparent().remove(child) # suppression du blabla for child in para(data): children = child.getchildren() if (child.text is not None) and (re.search('filigrane', child.text)) and (re.search('e-book', child.text)): job.logtat('modification de ces paragraphes : "' + str(child.text) + str(children[0].tail) + '" dans ' + os.path.basename(path)) child.text = 'Cet e-book contenait un filigrane (watermark) et une identification qui ont été supprimés pour votre agrément' children[0].tail = 'par PersonnaLiseur' # suppression des watermark waterm = XPath('.//h:img') for src in waterm(data): for value in src.values(): if 'base64' in value: job.logtat('suppression du watermark dans ' + os.path.basename(path)) div = src.getparent() div.getparent().remove(div) # suppression des body id body_id = XPath('.//h:body') for body in body_id(data): if body.get('id'): job.logtat('suppression du body id "' + body.get('id') + '" dans ' + os.path.basename(path)) body.set('id', '') except: job.log('html non modifiés') return with open(path, mode='wb'): data.write(path, encoding='utf-8', xml_declaration=True, pretty_print=True) # suppression des commentaires hors html f = open(path, mode='r', encoding='utf-8') texte = f.read(-1) if re.search(r"<!--[\d\D]*?-->", texte, re.DOTALL): job.logtat('suppression des commentaires hors html dans ' + os.path.basename(path)) texte = re.sub(r"<!--[\d\D]*?-->", '', texte, re.DOTALL) with open(path, mode='w', encoding='utf-8') as f: f.write(texte) return
def modifie_titlepage(monepub, options_texte): '''Modifie l'éventuel viewbox de la page de couv pour tenir compte des nouvelles dimensions de l'image''' if monepub.pagedecouv: with open(monepub.pagedecouv, mode='r', encoding='utf-8') as f: if not 'viewBox' in f.read(-1): return job.log('\t puis on rectifie le viewbox') im = Image.open(monepub.imagedecouv) newwidth, newheight = im.size parser = etree.XMLParser(encoding='utf-8', recover=True, remove_blank_text=True) with open(monepub.pagedecouv, mode='r', encoding='utf-8') as f: data = etree.parse(f, parser=parser) viewXpath = xpath(data, './/s:svg') imageXpath = xpath(data, '//s:image') for i, key in enumerate(viewXpath[0].keys()): if key == 'viewBox': newvalue = '0 0 ' + str(newwidth) + ' ' + str(newheight) viewXpath[0].set('viewBox', newvalue) if key == 'width': imageXpath[0].set('width', str(newwidth)) if key == 'height': imageXpath[0].set('height', str(newheight)) f = open(monepub.pagedecouv, mode='wb') data.write(f, encoding='utf-8', xml_declaration=True, pretty_print=True) f.close() else: return return
def redim(options_texte, monepub): job.log('Redimensionne les images de ' + monepub.chemin) taille = options_texte[2] newheight = str.strip(re.split('x', taille, flags=re.IGNORECASE)[0]) newwidth = str.strip(re.split('x', taille, flags=re.IGNORECASE)[1]) qual = int(options_texte[5]) for root, dir, files in os.walk(monepub.chemin): for name in files : name = os.path.join(root, name) ext = os.path.splitext(name)[1] if ext in EXT_IMAGES: try : im = Image.open(name) oldwidth, oldheight = im.size coef = oldheight / int(newheight) if coef <= 1: im.thumbnail((int(newwidth), int(newheight)), Image.ANTIALIAS) else : im.thumbnail((int(newwidth), int(newheight)), Image.BICUBIC) if ext in ('.jpg', '.jpeg'): im.save(name, quality=qual) elif ext == '.png': im.save(name, optimize=True) else : im.save(name) except IOError: pass return
def inserer_jacket(f1path, monepub, LECHEMIN): job.log('\t on l\'insère') # d'abord supprimer les anciennes jacket dans le manifest for key, value in enumerate(monepub.opf.manifest) : if 'jacket' in value : del monepub.opf.manifest[value] # puis dans la spine for idref, linear in enumerate(monepub.opf.spine.itemrefs): item = monepub.opf.spine.itemrefs[idref] if 'jacket' in item or 'calibre_jacket' in item : monepub.opf.spine.itemrefs.remove(item) # puis on ajoute la nouvelle jaquette for identifier in monepub.opf.manifest : item = monepub.opf.manifest[identifier] if item.media_type == 'application/xhtml+xml' : rep = os.path.dirname(item.href) # on détermine le répertoire de xhtml if rep == '': repjacket = 'jacket.xhtml' else: repjacket = rep + '/jacket.xhtml' monepub.opf.manifest.add_item('jacket', repjacket, 'application/xhtml+xml') shutil.move(f1path, os.path.normpath(monepub.chemin + '\\' + monepub.content_path + '\\' + repjacket)) monepub.opf.spine.itemrefs.insert(1, ('jacket', True)) myopf = os.path.join(monepub.chemin, monepub.opf_path) # ../tmp/xxx + \\OEBPS + /content with open(myopf, mode='w', encoding='utf-8', errors='strict') as f : f.write(monepub.opf.as_xml_document().toprettyxml()) return
def suppr_tatoo(monepub): job.log('Détatouage') fonctions.metas_xhtml(monepub, copyr=False) suppr_fichiers_suspects(monepub) modif_metas_tatoo(monepub) modif_tatoo_html(monepub) return
def cherche_opf(self, chemin): for root, dir, files in os.walk(chemin): for name in files: if 'opf' in os.path.splitext(name)[1] : self.opf_path = files if self.opf_path is None: job.log('\n Epub invalide : pas de content.opf') else: return
def renomme_fichiers(dossier): job.log('Renommage des fichiers') dict_nom = {} dict_nomcomplet = {} i = 0 for root, dir, files in os.walk(dossier): for name in files : nomcomplet = os.path.join(root, name) ext = os.path.splitext(name)[1] chiffres = re.search('[0-9]', name) if (ext in ('.html', '.xhtml', '.htm')) and not (name.startswith('PL')) and (chiffres): dict_nomcomplet[nomcomplet] = os.path.join(root, 'PL' + str(i) + '.xhtml') dict_nom[name] = 'PL' + str(i) + '.xhtml' i += 1 os.rename(nomcomplet, dict_nomcomplet[nomcomplet]) #=========================================================================== # for key, value in dict_nom.items(): # print(key, '->', value) # for key, value in dict_nomcomplet.items(): # print(key, '->', value) #=========================================================================== for drive, dir, files in os.walk(dossier): for name in files : name = os.path.join(drive, name) ext = os.path.splitext(name)[1] if ext in ('.html', '.xhtml', '.htm', '.opf'): # path = open(os.path.normpath(name), mode='r') data = parse_file(name) root = data.getroot() lien_a = XPath('.//h:a|//opf:item|//opf:reference') for child in lien_a(data): for attr, value in child.items(): value = urllib.parse.unquote(value) for ancien_nom in dict_nom.keys(): if (attr == 'href') and ((re.search('/' + ancien_nom, value)) or (re.match(ancien_nom, value))): value = value.replace(ancien_nom, dict_nom[ancien_nom]) child.set('href', value) xml = etree.tostring(data, encoding='utf-8', xml_declaration=True, pretty_print=True) with open(name, mode='wb') as f: f.write(xml) if ext == '.ncx': # path = open(os.path.normpath(name), mode='r') data = parse_file(name) root = data.getroot() lien_a = XPath('.//dt:content') for child in lien_a(data): for attr, value in child.items(): for ancien_nom in dict_nom.keys(): if (attr == 'src') and (re.search(ancien_nom, value)): value = value.replace(ancien_nom, dict_nom[ancien_nom]) child.set('src', value) xml = etree.tostring(data, encoding='utf-8', xml_declaration=True, pretty_print=True) with open(name, mode='wb') as f: f.write(xml) return
def css_expo(monepub): os.chdir(monepub.chemin) job.log('Corrige les exposants') for elt in monepub.css : f = monepub.normalize(os.path.join(monepub.chemin, elt)) with open(f, mode='r', encoding='utf8', errors='ignore') as filecss : texte = re.sub(r'vertical-align\s?:\s?super', r'vertical-align:top', filecss.read(-1)) texte = re.sub(r'vertical-align\s?:\s?[0-9]{1,3}%', r'vertical-align:top', texte) f1 = open(monepub.normalize(os.path.join(monepub.chemin, elt)), mode='w', encoding='utf8', errors='strict') f1.write(texte) f1.close() return
def resume(nouvelepub, covimg, LECHEMIN, monepub, epub, options_texte): job.log('Création d\'un résumé') covpath = LECHEMIN + '/tmp2/cover.jpg' shutil.copy2(covimg, covpath) if 'Redimensionner les images' in options_texte : redim_cover(options_texte, covpath) jacket = LECHEMIN + '/tmp2/jacket.xhtml' cover = LECHEMIN + '/tmp2/cover.xhtml' # try: nouvelepub.insere_fichier(LECHEMIN, cover, 'ch1', 'Text/cover.xhtml', 'application/xhtml+xml', append_to_spine=True, insert_to_spine=False, index=0 , is_linear=True) nouvelepub.insere_fichier(LECHEMIN, jacket, 'ch2', 'Text/jacket.xhtml', 'application/xhtml+xml', append_to_spine=True, insert_to_spine=False, index=0 , is_linear=True) nouvelepub.insere_fichier(LECHEMIN, covpath, 'img', 'Images/cover.jpg', 'image/jpeg', append_to_spine=False, insert_to_spine=False, index=0, is_linear=True) # except : # job.log('\n PROBLEME à l\'insertion des fichiers') # pass try: nouvelepub.toc.title = monepub.opf.metadata.titles[0][0] except: nouvelepub.toc.title = '' try: nouvelepub.opf.metadata.add_title(monepub.opf.metadata.titles[0][0]) except: nouvelepub.opf.metadata.add_title('') try : nouvelepub.opf.metadata.subjects = monepub.opf.metadata.subjects except: nouvelepub.opf.metadata.subjects = '' try : nouvelepub.opf.metadata.creators = monepub.opf.metadata.creators except: nouvelepub.opf.metadata.creators = '' try: nouvelepub.opf.metadata.description = monepub.opf.metadata.description except: nouvelepub.opf.metadata.description = '' nouvelepub.opf.guide.add_reference('Text/cover.xhtml', ref_type='cover', title='Couverture') fpath = nouvelepub.chemin + nouvelepub.content_path + '/content.opf' # ../tmp2/book/ + OEBPS + /content with open(fpath, mode='w', encoding='utf-8', errors='strict') as f : f.write(nouvelepub.opf.as_xml_document().toprettyxml()) fpath = nouvelepub.chemin + nouvelepub.content_path + '/toc.ncx' # ../tmp2/book/ + OEBPS + /toc with open(fpath, mode='w', encoding='utf-8', errors='strict') as f : f.write(nouvelepub.toc.as_xml_document().toprettyxml()) zip_resume(nouvelepub.chemin, epub) return
def supprimer_jacket(monepub, dirtemp, LECHEMIN): job.log('Suppression des jackets') # d'abord supprimer les anciennes jacket dans le manifest for key, value in enumerate(monepub.opf.manifest) : item = monepub.opf.manifest[value] if 'jacket' in value : file = monepub.normalize(monepub.relat_to_abs(item.href)) os.remove(file) del monepub.opf.manifest[value] # puis dans la spine for idref, linear in enumerate(monepub.opf.spine.itemrefs): item = monepub.opf.spine.itemrefs[idref] if 'jacket' in item[0] or 'calibre_jacket' in item[0] : monepub.opf.spine.itemrefs.remove(item) myopf = os.path.join(monepub.chemin, monepub.opf_path) # ../tmp/xxx + \\OEBPS + /content with open(myopf, mode='w', encoding='utf-8', errors='strict') as f : f.write(monepub.opf.as_xml_document().toprettyxml()) return
def redim_cover(options_texte, covpath): job.log('\t redimensionne la couv du résumé') taille = options_texte[2] newheight = str.strip(re.split('x', taille, flags=re.IGNORECASE)[0]) newwidth = str.strip(re.split('x', taille, flags=re.IGNORECASE)[1]) qual = int(options_texte[5]) try: im = Image.open(os.path.normpath(urllib.parse.unquote(covpath))) except IOError: job.log('\t IOError de l\'image de couv') return oldwidth, oldheight = im.size coef = oldheight / int(newheight) if coef <= 1: im.thumbnail((int(newwidth), int(newheight)), Image.ANTIALIAS) else : im.thumbnail((int(newwidth), int(newheight)), Image.BICUBIC) im.save(covpath, quality=qual) return
def get_coverimg_name(monepub, pagecouv): # '''récupère l'image de couverture à partir de la page de couv''' if not os.path.isfile(pagecouv): job.log('\t Ce fichier n\'existe pas') return None try: dom = parse(pagecouv) images = dom.getElementsByTagName('img') for img in images: attr = img.attributes src = attr['src'] return (monepub.normalize(src.value)) images = dom.getElementsByTagName('image') for img in images: attr = img.attributes src = attr['xlink:href'] return (monepub.normalize(src.value)) except ExpatError: job.log('\t ExpatError sur le fichier de couv') return None
def maj_meta(monepub, metaname, metac, metacal, copyr): os.chdir(monepub.chemin) job.log('Met à jour les métadonnées') mesmeta = monepub.opf.as_xml_document() if metaname : job.log('\t il existe un metadata.opf') calibre_meta = opf.parse_opf(os.path.normpath(metaname)) calibrexml = calibre_meta.as_xml_document() metaxml = modifie_metas(calibrexml, metac, metacal) else : job.log('\t on utilise les metas internes') metaxml = modifie_metas(mesmeta, metac, metacal) for node in mesmeta.getElementsByTagName('package'): metas = node.firstChild package = metas.parentNode monmanifest = metas.nextSibling package.removeChild(metas) package.insertBefore(metaxml, monmanifest) path = os.path.normpath(os.path.join(monepub.chemin, monepub.opf_path)) f = open(path, mode='w', encoding='utf8', errors='strict') f.write(mesmeta.toprettyxml()) f.close() monepub.init_read() metas_xhtml(monepub, copyr) return
def set_cover(self): '''Détermine la page de couverture et son image''' EXT_IMAGES = ('.jpg', '.jpeg', '.png', '.gif', '.bmp') # job.log('\t on cherche la page de couv dans l\'epub, ') covpage = couv.has_cover(self) # url et image de la page de couv if covpage : # job.log('trouvé : ' + str(covpage)) self.pagedecouv = self.normalize(covpage) job.log('Page de couverture : ' + self.pagedecouv) covimg = couv.get_coverimg_name(self, self.pagedecouv) # job.log('\t puis l\'image, ' + str(covimg)) if covimg is not None and (covimg.startswith('..')): covimg = os.path.normpath(covpage.rsplit('\\', 2)[0] + covimg.strip('.')) elif covimg is not None: covimg = os.path.normpath(os.path.dirname(covpage) + '\\' + covimg.strip('.')) if covimg is None or not (covimg.endswith(EXT_IMAGES)) : covimg = couv.seek_img(self) # job.log('\t on finit par choisir ' + str(covimg)) covimg = self.normalize(covimg) job.log('Image de couverture : ' + str(covimg)) self.imagedecouv = covimg return
def maj_publication(monepub, copyright): from PyQt4 import QtGui try: job.ProgressDialog.setLabelText('Récupération de la date de copyright') QtGui.QApplication.processEvents() except: pass job.log('Récupération du copyright') pubdate = sorted(copyright)[0] + '-01-01' file = os.path.normpath(monepub.chemin + '\\' + monepub.opf_path) parser = etree.XMLParser(attribute_defaults=False, load_dtd=False, remove_comments=False, ns_clean=True) tree = etree.parse(file, parser) xpdate = xpath(tree, '//dc:date') for child in xpdate : if child.get("{http://www.idpf.org/2007/opf}event") == 'publication' : child.text = pubdate elif child.get("{http://www.idpf.org/2007/opf}event") is None: child.text = pubdate texte = etree.tostring(tree, encoding='utf-8', xml_declaration=True, pretty_print=True) with open(file, mode='wb') as f: f.write(texte) opf.parse_opf(file) return
def cree_couv(monepub, covname, LECHEMIN, epub, options_texte): # crée une page de couverture job.log('Création d\'une page de couverture') fpath = os.path.normpath(LECHEMIN + '/resources/covermodele.xhtml') f1path = os.path.normpath(LECHEMIN + '/tmp2/cover.xhtml') if covname : job.log('\t on utilise l\'image de couv de Calibre') covimg = covname monepub.imagedecouv = covname else : if monepub.imagedecouv is not None: covimg = monepub.imagedecouv # conversion en jpg if not os.path.splitext(monepub.imagedecouv)[1] in ('.jpg', '.jpeg'): im = Image.open(monepub.imagedecouv) ima = im.convert('RGB') output = os.path.splitext(monepub.imagedecouv)[0] + '.jpg' ima.save(output, format="JPEG") monepub.imagedecouv = output else : # pas de couv trouvée job.log('\t on utilise la page par défaut') monepub.pagedecouv = 'defaut' src = os.path.normpath(LECHEMIN + '/resources/covermodele.xhtml') dst = os.path.normpath(LECHEMIN + '/tmp2/cover.xhtml') shutil.copy2(src, dst) covpage = dst covimg = LECHEMIN + '/resources/default_cover.png' with open(fpath, mode='r', encoding='utf-8', errors='strict') as modele : texte = modele.read(-1) texte = re.sub(r'%%couv%%', '../Images/cover.jpg', texte) f1 = open(f1path, mode='w', encoding='utf8', errors='strict') f1.write(texte) f1.close() cree_jaquette(monepub, LECHEMIN, insere_jacket=False) nouvelepub = container.EpubFile() nouvelepub.createnew(LECHEMIN) return resume(nouvelepub, covimg, LECHEMIN, monepub, epub, options_texte)
def cree_jaquette(monepub, LECHEMIN, insere_jacket) : # génère une page de jaquette avec les métadonnées job.log('Création d\'une page de garde') fpath = LECHEMIN + '/resources/jacketmodele.xhtml' f1path = LECHEMIN + '/tmp2/jacket.xhtml' if not os.path.isfile(f1path): shutil.copy2(fpath, f1path) with open(f1path, mode='r', encoding='utf-8', errors='strict') as modele : if monepub.opf.metadata.titles : titre = monepub.opf.metadata.titles[0][0] else: titre = '' if monepub.opf.metadata.creators: auteur = monepub.opf.metadata.creators[0][0] else : auteur = '' if monepub.opf.metadata.publisher : editeur = monepub.opf.metadata.publisher else: editeur = '' if monepub.opf.metadata.dates: pubdate = monepub.opf.metadata.dates[0][0] else : pubdate = '' if pubdate: pubdate = '(' + pubdate[0:4] + ')' for date, event in enumerate(monepub.opf.metadata.dates) : if event == 'publication': pubdate = date if monepub.opf.metadata.subjects: sujet = monepub.opf.metadata.subjects[0] long = len(monepub.opf.metadata.subjects) if long > 1 : for elt in monepub.opf.metadata.subjects[1:long + 1]: sujet = sujet + ', ' + elt else: sujet = '' if monepub.opf.metadata.description: comment = monepub.opf.metadata.description else: comment = '' serie = monepub.opf.metadata.serie serie_index = monepub.opf.metadata.serie_index if serie : serie = serie + ' [' + serie_index + ']' else : serie = '' parser = etree.XMLParser(encoding='utf-8', recover=True, remove_blank_text=True) data = etree.parse(modele, parser=parser) TITLE = XPath('.//h:title') titrex = XPath('.//h:span[@class="title"]') series = XPath('.//h:td[@class="cbj_series"]') author = XPath('.//h:td[@class="cbj_author"]') publisher = XPath('.//h:td[@class="cbj_pubdata"]') content = XPath('.//h:td[@class="cbj_content"]') description = XPath('.//h:div[@class="cbj_comments"]') for title in TITLE(data): title.text = titre for tit in titrex(data): tit.text = titre for ser in series(data): ser.text = serie author(data)[0].text = auteur for pub in publisher(data): pub.text = editeur + ' ' + pubdate for cont in content(data): cont.text = sujet for desc in description(data): desc.text = re.sub(r"<\s*?[^>]+\s*?>", '', comment, re.DOTALL) # desc.text = comment f1 = open(f1path, mode='wb') data.write(f1, encoding='utf-8', xml_declaration=True, pretty_print=True) f1.close() if not insere_jacket: return else : return inserer_jacket(f1path, monepub, LECHEMIN)
def parse_toc(xmlstring): """Inspect an NCX formated xml document.""" toc = Ncx() try : toc_xml = minidom.parseString(xmlstring).documentElement except: try: rewrite_toc(xmlstring) toc_xml = minidom.parseString(xmlstring).documentElement except: job.log('pas de toc') return xmlns = toc_xml.getAttribute('xmlns') if xmlns: toc.xmlns = xmlns version = toc_xml.getAttribute('version') if version: toc.version = version lang = toc_xml.getAttribute('xml:lang') if lang: toc.lang = lang # Inspect head > meta; unknown meta are ignored try: head = toc_xml.getElementsByTagName('head')[0] except: rewrite_toc(xmlstring) toc_xml = minidom.parseString(xmlstring).documentElement metas = {'dtb:uid': '', 'dtb:depth': '', 'dtb:totalPageCount': '', 'dtb:maxPageNumber': '', 'dtb:generator': ''} for meta in head.getElementsByTagName('meta'): metas[meta.getAttribute('name')] = meta.getAttribute('content') toc.uid = metas['dtb:uid'] toc.depth = metas['dtb:depth'] toc.total_page_count = metas['dtb:totalPageCount'] toc.max_page_number = metas['dtb:maxPageNumber'] toc.generator = metas['dtb:generator'] # Get title (one and only one <docTitle> tag is required) doc_title_node = toc_xml.getElementsByTagName('docTitle')[0] toc.title = _parse_for_text_tag(doc_title_node) # Get authors (<docAuthor> tags are optionnal) for author in toc_xml.getElementsByTagName('docAuthor'): toc.authors.append(_parse_for_text_tag(author)) # Inspect <navMap> (one is required) nav_map_node = toc_xml.getElementsByTagName('navMap')[0] toc.nav_map = _parse_xml_nav_map(nav_map_node) # Inspect <pageList> (optionnal, only one) page_lists = toc_xml.getElementsByTagName('pageList') if len(page_lists) > 0: toc.page_list = _parse_xml_page_list(page_lists[0]) # Inspect <navList> (optionnal, many are possible) for nav_list in toc_xml.getElementsByTagName('navList'): toc.add_nav_list(_parse_xml_nav_list(nav_list)) return toc
def metas_xhtml(monepub, copyr): XHTML_NS = 'http://www.w3.org/1999/xhtml' XMLNS_NS = 'http://www.w3.org/2000/xmlns/' META_XP = XPath('/h:html/h:head/h:meta[@http-equiv="Content-Type"]') TITLE_XP = XPath('/h:html/h:head/h:title') METACONT_XP = XPath('/h:html/h:head/h:meta[@content]') METAADEPT1_XP = XPath('//h:meta[@name="Adept.resource"]') METAADEPT2_XP = XPath('//h:meta[@name="Adept.expected.resource"]') copyfind = None for path in monepub.liste_fichiers : if path.endswith(('html', 'htm')): if not os.path.isfile(path): job.log('Fichier non présent dans l\'epub : ' + path) continue data = parse_file(path) if data is None: job.log('\t Fichier ' + os.path.basename(path) + ' malformé, impossible à parser') continue try: for elem in data.iter(tag=etree.Comment): if elem.text: tail = elem.tail parent = elem.getparent() parent.remove(elem) parent.text = tail for meta in METACONT_XP(data): if (meta.get('content') != 'text/html; charset=utf-8') and (meta.get('content') != 'text/html; charset=UTF-8'): job.logtat('suppression du meta : "' + meta.get('name') + ' : ' + meta.get('content') + '"') meta.getparent().remove(meta) for title in TITLE_XP(data): title.getparent().remove(title) for meta in METAADEPT1_XP(data): job.logtat('suppression du meta : ' + meta.get('name')) meta.getparent().remove(meta) for meta in METAADEPT2_XP(data): job.logtat('suppression du meta : ' + meta.get('name')) meta.getparent().remove(meta) texte = data.xpath("string()") copyfind = re.findall('\u00A9\s*?.*?([0-9]{4})', texte) if copyfind: copyright = copyfind with open(path, mode='wb',): data.write(path, encoding='utf-8', xml_declaration=True, pretty_print=True) except: job.log('\t metas des html non modifiées') continue if path.endswith(('opf', 'ncx')): data = parse_file(path) if data is None: job.log('\t Fichier ' + os.path.basename(path) + ' malformé, impossible à parser') continue for elem in data.iter(tag=etree.Comment): if elem.text: elem.getparent().remove(elem) try: for meta in METAADEPT1_XP(data): meta.getparent().remove(meta) for meta in METAADEPT2_XP(data): meta.getparent().remove(meta) except: job.log('Fichier ' + path + ' non modifié') continue with open(path, mode='wb',): data.write(path, encoding='utf-8', xml_declaration=True, pretty_print=True) if copyr and copyfind: return maj_publication(monepub, copyright) else : return
def has_cover(monepub): # récupère la page de couverture cover_item = None cover_id = None # on cherche d'abord dans le guide if monepub.opf.guide.references : # job.log('\t on cherche dans le guide') list_couv = ('cover', 'Cover', 'couverture', 'Couverture', 'title', 'coverpage') if len(monepub.opf.guide.references) > 1: for i, (href, ref_type, title) in enumerate(monepub.opf.guide.references) : if (ref_type in list_couv) or (title in list_couv) : # job.log('\t trouvé dans le guide: ' + href) cover_item = href else: if (monepub.opf.guide.references[0][1] in list_couv) or (monepub.opf.guide.references[0][2] in list_couv): cover_item = monepub.opf.guide.references[0][0] # job.log('\t trouvé dans le guide: ' + str(cover_item)) if (cover_item is not None) and (cover_item.endswith('html')): # job.log('\t on cherche l\'url dans manifest') for identifier in monepub.opf.manifest : item = monepub.opf.manifest[identifier] if str(cover_item) == item.href: covpage = os.path.normpath(monepub.chemin + '\\' + monepub.content_path + '\\' + item.href) # job.log('\t trouvé ' + covpage) if not os.path.isfile(covpage): cover_item = None covpage = None else: return covpage elif cover_item and cover_item.endswith(('jpg', 'jpeg', 'png', 'gif', 'svg')): cover_id = cover_item cover_item = None covpage = get_cover_cover_id(monepub, cover_id) return covpage # pas trouvé dans le guide, on cherche dans les metadatas if cover_item is None : # job.log('\t on cherche dans les metas') for i, (name, content) in enumerate(monepub.opf.metadata.metas): if name == 'cover': cover_id = content # id de l'image item = monepub.get_item(cover_id) if item : cover_item_manifest = item.href # url relative de l'image else : cover_item_manifest = None if cover_item_manifest is None: cover_id = None continue covpage = get_cover_page(monepub, cover_item_manifest) # job.log('\t trouvé dans les metas') return covpage else: cover_id = None # pas trouvé dans les métadatas, on cherche dans le manifest if cover_id is None : # job.log('\t on cherche dans le manifest') cover_item_manifest = monepub.get_item('cover') # on récupère le chemin relatif dans le manifest,\ # sans doute celui de l'image if cover_item_manifest is None : cover_item_manifest = monepub.get_item('cover-img') if cover_item_manifest is None : cover_item_manifest = monepub.get_item('cover-id') if cover_item_manifest and cover_item_manifest.href.endswith('html'): covpage = os.path.normpath(monepub.chemin + '\\' + monepub.content_path + '\\' + cover_item_manifest.href) # job.log('\t trouvé dans le manifest (html)' + covpage) if not os.path.isfile(covpage): covpage = None else: return covpage elif cover_item_manifest and cover_item_manifest.href.endswith(('jpg', 'jpeg', 'png', 'gif')): # job.log('\t trouvé dans le manifest (image):' + cover_item_manifest.href + ', il faut scanner les pages') covpage = get_cover_page(monepub, cover_item_manifest) if not os.path.isfile(covpage): covpage = None else: return covpage else: job.log('\t rien trouvé dans le manifest:') pass # on cherche dans la spine if cover_item is None: cover_item = monepub.opf.spine.itemrefs[0][0] print(cover_item) cover_item_manifest = monepub.get_item(cover_item) if not cover_item_manifest.href.endswith('html'): cover_item = monepub.opf.spine.itemrefs[1][0] cover_item_manifest = monepub.get_item(cover_item) if not cover_item_manifest.href.endswith('html') or 'jacket' in cover_item: cover_item = monepub.opf.spine.itemrefs[2][0] # job.log('\t on cherche dans la spine: ' + str(cover_item)) cover_item_manifest = monepub.get_item(cover_item) pagecouv = os.path.normpath(monepub.chemin + '\\' + monepub.content_path + '\\' + cover_item_manifest.href) if get_coverimg_name(monepub, pagecouv): # on a trouvé un lien vers une image covimg = get_coverimg_name(monepub, pagecouv) covpage = pagecouv return covpage else: covpage = None covimg = None return else : job.log('\t rien trouvé dans le manifest:') covpage = None return covpage
def insere_logo(monepub, newimage, newimagepath, LECHEMIN): job.log('Insertion d\'un logo') for identifier in monepub.opf.manifest : item = monepub.opf.manifest[identifier] if item.media_type in ('image/jpeg', 'image/png') : repimg = os.path.dirname(item.href) # on détermine le répertoire des images continue if item.media_type == ('application/xhtml+xml'): # on détermine le répertoire des pages reptxt = os.path.dirname(item.href) continue if repimg == reptxt : relpathimg = os.path.basename(newimagepath) elif reptxt == '': relpathimg = repimg + '/' + os.path.basename(newimagepath) else: relpathimg = '../' + repimg + '/' + os.path.basename(newimagepath) nouvelempl = os.path.normpath(monepub.chemin + '\\' + monepub.content_path + '\\' + repimg + '\\' + os.path.basename(newimagepath)) if os.path.isfile(nouvelempl): nouvelempl = nouvelempl.rsplit('.', 1)[0] + 'PL.' + nouvelempl.rsplit('.', 1)[1] relpathimg = relpathimg.rsplit('.', 1)[0] + 'PL.' + relpathimg.rsplit('.', 1)[1] shutil.copy2(newimagepath, nouvelempl) fpath = LECHEMIN + '/resources/logo_modele.xhtml' fpath2 = os.path.normpath(monepub.chemin + '\\' + monepub.content_path + '\\' + reptxt + '\\' 'logoPL.xhtml') shutil.copy2(fpath, fpath2) relatimg = os.path.relpath(nouvelempl, start=os.path.normpath(monepub.chemin + '\\' + monepub.content_path)) relattxt = os.path.relpath(fpath2, start=os.path.normpath(monepub.chemin + '\\' + monepub.content_path)) # parser = etree.XMLParser(encoding='utf-8', recover=True, remove_blank_text=True) with open(fpath2, mode='r', encoding='utf-8') as f: data = etree.parse(f, parser=parser) viewXpath = xpath(data, './/s:svg') imageXpath = xpath(data, '//s:image') for i, key in enumerate(viewXpath[0].keys()): if key == 'viewBox': newvalue = '0 0 ' + str(newimage.width()) + ' ' + str(newimage.height()) viewXpath[0].set('viewBox', newvalue) for i, key in enumerate(imageXpath[0].keys()): if key == 'width': imageXpath[0].set('width', str(newimage.width())) if key == 'height': imageXpath[0].set('height', str(newimage.height())) if key == '{http://www.w3.org/1999/xlink}href': imageXpath[0].set('{http://www.w3.org/1999/xlink}href', relpathimg.replace('\\', '/')) f = open(fpath2, mode='wb') data.write(f, encoding='utf-8', xml_declaration=True, pretty_print=True) f.close() # d'abord supprimer les anciens logos dans le manifest for key, value in enumerate(monepub.opf.manifest) : if 'logoPL' in value : del monepub.opf.manifest[value] # puis dans la spine for idref, linear in enumerate(monepub.opf.spine.itemrefs): item = monepub.opf.spine.itemrefs[idref] if 'logoPL' in item : monepub.opf.spine.itemrefs.remove(item) monepub.insere_fichier(LECHEMIN, fpath2, 'logoPL', relattxt.replace('\\', '/'), 'application/xhtml+xml', append_to_spine=False, insert_to_spine=True, index=1, is_linear=True, move=False) monepub.insere_fichier(LECHEMIN, fpath2, 'logoPL_img', relatimg.replace('\\', '/'), 'image/png', append_to_spine=False, insert_to_spine=False, index=0, is_linear=True, move=False) fpath = os.path.normpath(monepub.chemin + '/' + monepub.opf_path) # OEBPS + /content with open(fpath, mode='w', encoding='utf-8', errors='strict') as f : f.write(monepub.opf.as_xml_document().toprettyxml()) return
def init_read(self): # Read container.xml to get OPF xml file path xmlstring = 'META-INF/container.xml' container_xml = minidom.parse(xmlstring).documentElement for e in container_xml.getElementsByTagName('rootfile'): if e.getAttribute('media-type') == MIMETYPE_OPF: # Only take the first full-path available self.opf_path = e.getAttribute('full-path') # OEBPS\content.opf ou content.opf if os.path.isfile(os.path.join(self.chemin, self.opf_path)): self.content_path = os.path.dirname(self.opf_path) # OEBPS ou '' break else : self.opf_path = self.cherche_opf(self.chemin) if self.opf_path: self.content_path = os.path.dirname(self.opf_path) # OEBPS ou '' else: job.log('Epub invalide : pas de content.opf') break # Read OPF xml file xml_string = (self.opf_path) self.opf = opf.parse_opf(xml_string) try: self.uid = [x for x in self.opf.metadata.identifiers if x[1] == self.opf.uid_id][0] except: self.uid = None self.item_toc = self.get_item(self.opf.spine.toc) if self.item_toc is None: for identifier in self.opf.manifest: item = self.opf.manifest[identifier] if 'ncx' in item.href : self.item_toc = os.path.join(self.content_path, item.href) if self.opf.metadata.creators: self.auteur = self.opf.metadata.creators[0][0] else : self.auteur = '' if self.opf.metadata.titles : self.titre = self.opf.metadata.titles[0][0] else: self.titre = '' # Get the css files (on en profite pour implémenter la liste des fichiers) self.css = [] self.liste_items = [] self.liste_fichiers = [] for identifier in self.opf.manifest: item = self.opf.manifest[identifier] self.liste_items.append(item.href) self.liste_fichiers.append(self.normalize(self.relat_to_abs(item.href))) if 'css' in item.href : self.css.append(os.path.join(self.content_path, item.href)) # OEBPS\Style\styles.css ou stylesheet.css # Inspect NCX toc file try: self.toc = ncx.parse_toc(self.read_item(self.item_toc)) except (UnicodeError, IOError) : self.toc = None # recherche la page et l'image de couverture if self.pagedecouv is None: self.set_cover()
def convert(monepub): '''convertit toutes les images non transparentes en jpg''' job.log('Conversion en jpg') ext_to_change = [] for name in monepub.liste_fichiers: if os.path.splitext(name)[1] in ('.png', '.gif', '.bmp'): try: im = Image.open(name) if (im.mode == 'RGBA'): alpha = im.tostring() val_alpha = alpha[3] except: job.log('\t Erreur de lecture de ' + name) continue if 'transparency' in im.info or (im.mode == 'RGBA' and val_alpha != 255): continue else: ext_to_change.append(os.path.basename(name)) im2 = im.convert('RGB') output = os.path.splitext(name)[0] + '.jpg' im2.save(output, 'JPEG') try: os.remove(name) except: pass job.log('fichier source non supprimé !') # modifie les liens dans les pages for page in monepub.liste_fichiers : try : if os.path.splitext(page)[1] in ('.html', '.xhtml', '.htm'): data = parse_file(page) image_src = XPath(".//h:img[@src]") image_alt = XPath(".//h:img[@alt]") for img in ext_to_change: img_jpg = img.rsplit('.', 1)[0] + '.jpg' for src in image_src(data): src.set('src', src.get('src').replace(img, img_jpg)) for alt in image_alt(data): alt.set('alt', src.get('alt').replace(img, img_jpg)) with open(page, mode='wb') as f: data.write(page, encoding='utf-8', xml_declaration=True, pretty_print=True) except: job.log('lien dans la page ' + page + ' non modifié') continue # modifie le manifest myopf = os.path.join(monepub.chemin, monepub.opf_path) data_opf = parse_file(myopf) for img in ext_to_change: img_jpg = img.rsplit('.', 1)[0] + '.jpg' imgxpath = xpath(data_opf, './/opf:item') for child in imgxpath: changed = False for i, key in enumerate(child.keys()): if key == 'href': if img in child.get('href') : changed = True href_text = child.get('href').replace(img, img_jpg) child.set('href', href_text) if key == 'media-type' and changed is True: child.set('media-type', 'image/jpeg') texte_opf = etree.tostring(data_opf, encoding='utf-8', xml_declaration=True, pretty_print=True) with open(myopf, mode='wb') as f: f.write(texte_opf) monepub.init_read() monepub.set_cover() return