def commit_toc(container, toc, lang=None, uid=None): tocname = find_existing_toc(container) if tocname is None: item = container.generate_item("toc.ncx", id_prefix="toc") tocname = container.href_to_name(item.get("href"), base=container.opf_name) if not lang: lang = get_lang() for l in container.opf_xpath("//dc:language"): l = canonicalize_lang(xml2text(l).strip()) if l: lang = l lang = lang_as_iso639_1(l) or l break lang = lang_as_iso639_1(lang) or lang if not uid: uid = uuid_id() eid = container.opf.get("unique-identifier", None) if eid: m = container.opf_xpath('//*[@id="%s"]' % eid) if m: uid = xml2text(m[0]) title = _("Table of Contents") m = container.opf_xpath("//dc:title") if m: x = xml2text(m[0]).strip() title = x or title to_href = partial(container.name_to_href, base=tocname) root = create_ncx(toc, to_href, title, lang, uid) container.replace(tocname, root) container.pretty_print.add(tocname)
def commit_ncx_toc(container, toc, lang=None, uid=None): tocname = find_existing_ncx_toc(container) if tocname is None: item = container.generate_item('toc.ncx', id_prefix='toc') tocname = container.href_to_name(item.get('href'), base=container.opf_name) ncx_id = item.get('id') [s.set('toc', ncx_id) for s in container.opf_xpath('//opf:spine')] if not lang: lang = get_lang() for l in container.opf_xpath('//dc:language'): l = canonicalize_lang(xml2text(l).strip()) if l: lang = l lang = lang_as_iso639_1(l) or l break lang = lang_as_iso639_1(lang) or lang if not uid: uid = uuid_id() eid = container.opf.get('unique-identifier', None) if eid: m = container.opf_xpath('//*[@id="%s"]'%eid) if m: uid = xml2text(m[0]) title = _('Table of Contents') m = container.opf_xpath('//dc:title') if m: x = xml2text(m[0]).strip() title = x or title to_href = partial(container.name_to_href, base=tocname) root = create_ncx(toc, to_href, title, lang, uid) container.replace(tocname, root) container.pretty_print.add(tocname)
def commit_toc(container, toc, lang=None, uid=None): tocname = find_existing_toc(container) if tocname is None: item = container.generate_item('toc.ncx', id_prefix='toc') tocname = container.href_to_name(item.get('href'), base=container.opf_name) if not lang: lang = get_lang() for l in container.opf_xpath('//dc:language'): l = canonicalize_lang(xml2text(l).strip()) if l: lang = l lang = lang_as_iso639_1(l) or l break lang = lang_as_iso639_1(lang) or lang if not uid: uid = uuid_id() eid = container.opf.get('unique-identifier', None) if eid: m = container.opf_xpath('//*[@id="%s"]' % eid) if m: uid = xml2text(m[0]) title = _('Table of Contents') m = container.opf_xpath('//dc:title') if m: x = xml2text(m[0]).strip() title = x or title to_href = partial(container.name_to_href, base=tocname) root = create_ncx(toc, to_href, title, lang, uid) container.replace(tocname, root) container.pretty_print.add(tocname)
def __init__(self, path_or_stream, dest_dir=None, log=None, notes_text=None): self.docx = DOCX(path_or_stream, log=log) self.ms_pat = re.compile(r'\s{2,}') self.ws_pat = re.compile(r'[\n\r\t]') self.log = self.docx.log self.notes_text = notes_text or _('Notes') self.dest_dir = dest_dir or os.getcwdu() self.mi = self.docx.metadata self.body = BODY() self.tables = Tables() self.styles = Styles(self.tables) self.images = Images() self.object_map = OrderedDict() self.html = HTML( HEAD( META(charset='utf-8'), TITLE(self.mi.title or _('Unknown')), LINK(rel='stylesheet', type='text/css', href='docx.css'), ), self.body ) self.html.text='\n\t' self.html[0].text='\n\t\t' self.html[0].tail='\n' for child in self.html[0]: child.tail = '\n\t\t' self.html[0][-1].tail = '\n\t' self.html[1].text = self.html[1].tail = '\n' lang = canonicalize_lang(self.mi.language) if lang and lang != 'und': lang = lang_as_iso639_1(lang) if lang: self.html.set('lang', lang)
def update_metadata(opf, mi, apply_null=False, update_timestamp=False): for x in ('guide', 'toc', 'manifest', 'spine'): setattr(mi, x, None) if mi.languages: langs = [] for lc in mi.languages: lc2 = lang_as_iso639_1(lc) if lc2: lc = lc2 langs.append(lc) mi.languages = langs opf.smart_update(mi) if getattr(mi, 'uuid', None): opf.application_id = mi.uuid if apply_null: if not getattr(mi, 'series', None): opf.series = None if not getattr(mi, 'tags', []): opf.tags = [] if not getattr(mi, 'isbn', None): opf.isbn = None if not getattr(mi, 'comments', None): opf.comments = None if update_timestamp and mi.timestamp is not None: opf.timestamp = mi.timestamp
def ensure_container_has_nav(container, lang=None, previous_nav=None): tocname = find_existing_nav_toc(container) if previous_nav is not None: nav_name = container.href_to_name(previous_nav[0]) if nav_name and container.exists(nav_name): tocname = nav_name container.apply_unique_properties(tocname, 'nav') if tocname is None: item = container.generate_item('nav.xhtml', id_prefix='nav') item.set('properties', 'nav') tocname = container.href_to_name(item.get('href'), base=container.opf_name) if previous_nav is not None: root = previous_nav[1] else: root = container.parse_xhtml( P('templates/new_nav.html', data=True).decode('utf-8')) container.replace(tocname, root) else: root = container.parsed(tocname) if lang: lang = lang_as_iso639_1(lang) or lang root.set('lang', lang) root.set('{%s}lang' % XML_NS, lang) return tocname, root
def get_iterator(lang): it = _iterators.get(lang) if it is None: it = _iterators[lang] = _icu.BreakIterator( _icu.UBRK_WORD, lang_as_iso639_1(lang) or lang) return it
def default_lookup_website(lang): lang = lang_as_iso639_1(lang) or lang if lang == "en": prefix = "https://www.wordnik.com/words/" else: prefix = "http://%s.wiktionary.org/wiki/" % lang return prefix + "{word}"
def default_lookup_website(lang): lang = lang_as_iso639_1(lang) or lang if lang == 'en': prefix = 'https://www.wordnik.com/words/' else: prefix = 'http://%s.wiktionary.org/wiki/' % lang return prefix + '{word}'
def update_metadata(opf, mi, apply_null=False, update_timestamp=False): for x in ("guide", "toc", "manifest", "spine"): setattr(mi, x, None) if mi.languages: langs = [] for lc in mi.languages: lc2 = lang_as_iso639_1(lc) if lc2: lc = lc2 langs.append(lc) mi.languages = langs opf.smart_update(mi) if getattr(mi, "uuid", None): opf.application_id = mi.uuid if apply_null: if not getattr(mi, "series", None): opf.series = None if not getattr(mi, "tags", []): opf.tags = [] if not getattr(mi, "isbn", None): opf.isbn = None if not getattr(mi, "comments", None): opf.comments = None if update_timestamp and mi.timestamp is not None: opf.timestamp = mi.timestamp
def count_words(text, lang='en'): with _lock: it = _iterators.get(lang, None) if it is None: it = _iterators[lang] = _icu.BreakIterator(_icu.UBRK_WORD, lang_as_iso639_1(lang) or lang) it.set_text(text) return len(it.split2())
def create_inline_toc(container, title=None): ''' Create an inline (HTML) Table of Contents from an existing NCX table of contents. :param title: The title for this table of contents. ''' lang = get_book_language(container) default_title = 'Table of Contents' if lang: lang = lang_as_iso639_1(lang) or lang default_title = translate(lang, default_title) title = title or default_title toc = get_toc(container) if len(toc) == 0: return None toc_name = find_inline_toc(container) name = toc_name html = toc_to_html(toc, container, name, title, lang) raw = serialize(html, 'text/html') if name is None: name, c = 'toc.xhtml', 0 while container.has_name(name): c += 1 name = 'toc%d.xhtml' % c container.add_file(name, raw, spine_index=0) else: with container.open(name, 'wb') as f: f.write(raw) set_guide_item(container, 'toc', title, name, frag='calibre_generated_inline_toc') return name
def norm(x): lc = x.langcode cc = x.countrycode or cc_map.get(lc, None) lc = lang_as_iso639_1(lc) or lc if cc: lc += '-' + cc return lc
def lang_name(l): l = l.lower() l = lang_as_iso639_1(l) if not l: l = 'en' l = {'en': 'en-us', 'nb': 'nb-no', 'el': 'el-monoton'}.get(l, l) return l.lower().replace('_', '-')
def split_into_words_and_positions(text, lang='en'): with _lock: it = _iterators.get(lang, None) if it is None: it = _iterators[lang] = _icu.BreakIterator(_icu.UBRK_WORD, lang_as_iso639_1(lang) or lang) it.set_text(text) return it.split2()
def index_of(needle, haystack, lang='en'): with _lock: it = _iterators.get(lang, None) if it is None: it = _iterators[lang] = _icu.BreakIterator(_icu.UBRK_WORD, lang_as_iso639_1(lang) or lang) it.set_text(haystack) return it.index(needle)
def __init__(self, path_or_stream, dest_dir=None, log=None, notes_text=None): self.docx = DOCX(path_or_stream, log=log) self.log = self.docx.log self.notes_text = notes_text or _("Notes") self.dest_dir = dest_dir or os.getcwdu() self.mi = self.docx.metadata self.body = BODY() self.styles = Styles() self.images = Images() self.tables = Tables() self.object_map = OrderedDict() self.html = HTML( HEAD( META(charset="utf-8"), TITLE(self.mi.title or _("Unknown")), LINK(rel="stylesheet", type="text/css", href="docx.css"), ), self.body, ) self.html.text = "\n\t" self.html[0].text = "\n\t\t" self.html[0].tail = "\n" for child in self.html[0]: child.tail = "\n\t\t" self.html[0][-1].tail = "\n\t" self.html[1].text = self.html[1].tail = "\n" lang = canonicalize_lang(self.mi.language) if lang and lang != "und": lang = lang_as_iso639_1(lang) if lang: self.html.set("lang", lang)
def lang_name(l): l = l.lower() l = lang_as_iso639_1(l) if not l: l = 'en' l = {'en':'en-us', 'nb':'nb-no', 'el':'el-monoton'}.get(l, l) return l.lower().replace('_', '-')
def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.xhtml', toc_name='toc.ncx'): ''' Create an empty book in the specified format at the specified location. ''' path = os.path.abspath(path) lang = 'und' opf = metadata_to_opf(mi, as_string=False) for l in opf.xpath('//*[local-name()="language"]'): if l.text: lang = l.text break lang = lang_as_iso639_1(lang) or lang opfns = OPF_NAMESPACES['opf'] m = opf.makeelement('{%s}manifest' % opfns) opf.insert(1, m) i = m.makeelement('{%s}item' % opfns, href=html_name, id='start') i.set('media-type', guess_type('a.xhtml')) m.append(i) i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx') i.set('media-type', guess_type(toc_name)) m.append(i) s = opf.makeelement('{%s}spine' % opfns, toc="ncx") opf.insert(2, s) i = s.makeelement('{%s}itemref' % opfns, idref='start') s.append(i) CONTAINER = '''\ <?xml version="1.0"?> <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container"> <rootfiles> <rootfile full-path="{0}" media-type="application/oebps-package+xml"/> </rootfiles> </container> '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8') HTML = P('templates/new_book.html', data=True).decode('utf-8').replace( '_LANGUAGE_', prepare_string_for_xml(lang, True) ).replace( '_TITLE_', prepare_string_for_xml(mi.title) ).replace( '_AUTHORS_', prepare_string_for_xml(authors_to_string(mi.authors)) ).encode('utf-8') h = parse(HTML) pretty_html_tree(None, h) HTML = serialize(h, 'text/html') ncx = etree.tostring(create_toc(mi, opf, html_name, lang), encoding='utf-8', xml_declaration=True, pretty_print=True) pretty_xml_tree(opf) opf = etree.tostring(opf, encoding='utf-8', xml_declaration=True, pretty_print=True) if fmt == 'azw3': with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir): for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)): with open(name, 'wb') as f: f.write(data) c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name, DevNull()) opf_to_azw3(opf_name, path, c) else: with ZipFile(path, 'w', compression=ZIP_STORED) as zf: zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED) zf.writestr('META-INF/', b'', 0755) zf.writestr('META-INF/container.xml', CONTAINER) zf.writestr(opf_name, opf) zf.writestr(html_name, HTML) zf.writestr(toc_name, ncx)
def dictionary_name_for_locale(loc): loc = loc.lower().replace('-', '_') lmap = locale_map() if loc in lmap: return lmap[loc] parts = loc.split('_') if len(parts) > 2: loc = '_'.join(parts[:2]) if loc in lmap: return lmap[loc] loc = lang_as_iso639_1(parts[0]) if not loc: return if loc in lmap: return lmap[loc] if loc == 'en': return lmap['en_us'] if loc == 'de': return lmap['de_de'] if loc == 'es': return lmap['es_es'] q = loc + '_' for k, v in iteritems(lmap): if k.startswith(q): return lmap[k]
def update_metadata(opf, mi, apply_null=False, update_timestamp=False, force_identifiers=False): for x in ('guide', 'toc', 'manifest', 'spine'): setattr(mi, x, None) if mi.languages: langs = [] for lc in mi.languages: lc2 = lang_as_iso639_1(lc) if lc2: lc = lc2 langs.append(lc) mi.languages = langs opf.smart_update(mi) if getattr(mi, 'uuid', None): opf.application_id = mi.uuid if apply_null: if not getattr(mi, 'series', None): opf.series = None if not getattr(mi, 'tags', []): opf.tags = [] if not getattr(mi, 'isbn', None): opf.isbn = None if not getattr(mi, 'comments', None): opf.comments = None if apply_null or force_identifiers: opf.set_identifiers(mi.get_identifiers()) else: orig = opf.get_identifiers() orig.update(mi.get_identifiers()) opf.set_identifiers({k:v for k, v in orig.iteritems() if k and v}) if update_timestamp and mi.timestamp is not None: opf.timestamp = mi.timestamp
def __init__(self, path_or_stream, dest_dir=None, log=None, notes_text=None): self.docx = DOCX(path_or_stream, log=log) self.log = self.docx.log self.notes_text = notes_text or _('Notes') self.dest_dir = dest_dir or os.getcwdu() self.mi = self.docx.metadata self.body = BODY() self.styles = Styles() self.images = Images() self.tables = Tables() self.object_map = OrderedDict() self.html = HTML( HEAD( META(charset='utf-8'), TITLE(self.mi.title or _('Unknown')), LINK(rel='stylesheet', type='text/css', href='docx.css'), ), self.body) self.html.text = '\n\t' self.html[0].text = '\n\t\t' self.html[0].tail = '\n' for child in self.html[0]: child.tail = '\n\t\t' self.html[0][-1].tail = '\n\t' self.html[1].text = self.html[1].tail = '\n' lang = canonicalize_lang(self.mi.language) if lang and lang != 'und': lang = lang_as_iso639_1(lang) if lang: self.html.set('lang', lang)
def convert_calibre_md_to_comic_md(self): ''' Maps the entries in the calibre metadata to comictagger metadata ''' from calibre.utils.html2text import html2text from calibre.utils.date import UNDEFINED_DATE from calibre.utils.localization import lang_as_iso639_1 if self.calibre_md_in_comic_format: return self.calibre_md_in_comic_format = GenericMetadata() mi = self.calibre_metadata # shorten some functions role = partial(set_role, credits=self.calibre_md_in_comic_format.credits) update_field = partial(update_comic_field, target=self.calibre_md_in_comic_format) # update the fields of comic metadata update_field("title", mi.title) role("Writer", mi.authors) update_field("series", mi.series) update_field("issue", mi.series_index) update_field("tags", mi.tags) update_field("publisher", mi.publisher) update_field("criticalRating", mi.rating) # need to check for None if mi.comments: update_field("comments", html2text(mi.comments)) if mi.language: update_field("language", lang_as_iso639_1(mi.language)) if mi.pubdate != UNDEFINED_DATE: update_field("year", mi.pubdate.year) update_field("month", mi.pubdate.month) update_field("day", mi.pubdate.day) # custom columns field = partial(self.db.field_for, book_id=self.book_id) # artists role("Penciller", field(prefs['penciller_column'])) role("Inker", field(prefs['inker_column'])) role("Colorist", field(prefs['colorist_column'])) role("Letterer", field(prefs['letterer_column'])) role("CoverArtist", field(prefs['cover_artist_column'])) role("Editor", field(prefs['editor_column'])) # others update_field("storyArc", field(prefs['storyarc_column'])) update_field("characters", field(prefs['characters_column'])) update_field("teams", field(prefs['teams_column'])) update_field("locations", field(prefs['locations_column'])) update_field("volume", field(prefs['volume_column'])) update_field("genre", field(prefs['genre_column'])) update_field("issueCount", field(prefs['count_column'])) update_field("pageCount", field(prefs['pages_column'])) update_field("webLink", get_link(field(prefs['comicvine_column']))) update_field("manga", field(prefs['manga_column']))
def index_of(needle, haystack, lang='en'): with _lock: it = _iterators.get(lang, None) if it is None: it = _iterators[lang] = _icu.BreakIterator( _icu.UBRK_WORD, lang_as_iso639_1(lang) or lang) it.set_text(haystack) return it.index(needle)
def count_words(text, lang='en'): with _lock: it = _iterators.get(lang, None) if it is None: it = _iterators[lang] = _icu.BreakIterator( _icu.UBRK_WORD, lang_as_iso639_1(lang) or lang) it.set_text(text) return len(it.split2())
def split_into_words_and_positions(text, lang='en'): with _lock: it = _iterators.get(lang, None) if it is None: it = _iterators[lang] = _icu.BreakIterator( _icu.UBRK_WORD, lang_as_iso639_1(lang) or lang) it.set_text(text) return it.split2()
def convert_calibre_md_to_comic_md(self): ''' Maps the entries in the calibre metadata to comictagger metadata ''' from calibre.utils.html2text import html2text from calibre.utils.date import UNDEFINED_DATE from calibre.utils.localization import lang_as_iso639_1 if self.calibre_md_in_comic_format: return self.calibre_md_in_comic_format = GenericMetadata() mi = self.calibre_metadata # shorten some functions role = partial(set_role, credits=self.calibre_md_in_comic_format.credits) update_field = partial(update_comic_field, target=self.calibre_md_in_comic_format) # update the fields of comic metadata update_field("title", mi.title) role("Writer", mi.authors) update_field("series", mi.series) update_field("issue", mi.series_index) update_field("tags", mi.tags) update_field("publisher", mi.publisher) update_field("criticalRating", mi.rating) # need to check for None if mi.comments: update_field("comments", html2text(mi.comments)) if mi.language: update_field("language", lang_as_iso639_1(mi.language)) if mi.pubdate != UNDEFINED_DATE: update_field("year", mi.pubdate.year) update_field("month", mi.pubdate.month) update_field("day", mi.pubdate.day) # custom columns field = partial(self.db.field_for, book_id=self.book_id) # artists role("Penciller", field(prefs['penciller_column'])) role("Inker", field(prefs['inker_column'])) role("Colorist", field(prefs['colorist_column'])) role("Letterer", field(prefs['letterer_column'])) role("CoverArtist", field(prefs['cover_artist_column'])) role("Editor", field(prefs['editor_column'])) # others update_field("storyArc", field(prefs['storyarc_column'])) update_field("characters", field(prefs['characters_column'])) update_field("teams", field(prefs['teams_column'])) update_field("locations", field(prefs['locations_column'])) update_field("volume", field(prefs['volume_column'])) update_field("genre", field(prefs['genre_column']))
def lookup(self, word): from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1 from urllib import quote lang = lang_as_iso639_1(self.view.current_language) if not lang: lang = canonicalize_lang(lang) or 'en' word = quote(word.encode('utf-8')) if lang == 'en': prefix = 'https://www.wordnik.com/words/' else: prefix = 'http://%s.wiktionary.org/wiki/' % lang open_url(prefix + word)
def metadataFromString(self, string): cbi_container = json.loads(unicode(string, 'utf-8')) metadata = GenericMetadata() cbi = cbi_container['ComicBookInfo/1.0'] # helper func # If item is not in CBI, return None def xlate(cbi_entry): if cbi_entry in cbi: return cbi[cbi_entry] else: return None metadata.series = xlate('series') metadata.title = xlate('title') metadata.issue = xlate('issue') metadata.publisher = xlate('publisher') metadata.month = xlate('publicationMonth') metadata.year = xlate('publicationYear') metadata.issueCount = xlate('numberOfIssues') metadata.comments = xlate('comments') metadata.credits = xlate('credits') metadata.genre = xlate('genre') metadata.volume = xlate('volume') metadata.volumeCount = xlate('numberOfVolumes') metadata.language = xlate('language') metadata.country = xlate('country') metadata.criticalRating = xlate('rating') metadata.tags = xlate('tags') # make sure credits and tags are at least empty lists and not None if metadata.credits is None: metadata.credits = [] if metadata.tags is None: metadata.tags = [] # need to massage the language string to be ISO # modified to use a calibre function if metadata.language is not None: metadata.language = lang_as_iso639_1(metadata.language) metadata.isEmpty = False return metadata
def update_doc_props(root, mi): def setm(name, text=None, ns='dc'): ans = root.makeelement('{%s}%s' % (namespaces[ns], name)) for child in tuple(root): if child.tag == ans.tag: root.remove(child) ans.text = text root.append(ans) return ans setm('title', mi.title) setm('creator', authors_to_string(mi.authors)) if mi.tags: setm('keywords', ', '.join(mi.tags), ns='cp') if mi.comments: setm('description', mi.comments) if mi.languages: l = canonicalize_lang(mi.languages[0]) setm('language', lang_as_iso639_1(l) or l)
def update_doc_props(root, mi, namespace): def setm(name, text=None, ns='dc'): ans = root.makeelement('{%s}%s' % (namespace.namespaces[ns], name)) for child in tuple(root): if child.tag == ans.tag: root.remove(child) ans.text = text root.append(ans) return ans setm('title', mi.title) setm('creator', authors_to_string(mi.authors)) if mi.tags: setm('keywords', ', '.join(mi.tags), ns='cp') if mi.comments: setm('description', mi.comments) if mi.languages: l = canonicalize_lang(mi.languages[0]) setm('language', lang_as_iso639_1(l) or l)
def iana2mobi(icode): langdict, subtags = IANA_MOBI[None], [] if icode: subtags = list(icode.split('-')) while len(subtags) > 0: lang = subtags.pop(0).lower() lang = lang_as_iso639_1(lang) if lang and lang in IANA_MOBI: langdict = IANA_MOBI[lang] break mcode = langdict[None] while len(subtags) > 0: subtag = subtags.pop(0) if subtag not in langdict: subtag = subtag.title() if subtag not in langdict: subtag = subtag.upper() if subtag in langdict: mcode = langdict[subtag] break return pack('>HBB', 0, mcode[1], mcode[0])
def update_metadata(opf, mi, apply_null=False, update_timestamp=False, force_identifiers=False): for x in ('guide', 'toc', 'manifest', 'spine'): setattr(mi, x, None) if mi.languages: langs = [] for lc in mi.languages: lc2 = lang_as_iso639_1(lc) if lc2: lc = lc2 langs.append(lc) mi.languages = langs opf.smart_update(mi) if getattr(mi, 'uuid', None): opf.application_id = mi.uuid if apply_null: if not getattr(mi, 'series', None): opf.series = None if not getattr(mi, 'tags', []): opf.tags = [] if not getattr(mi, 'isbn', None): opf.isbn = None if not getattr(mi, 'comments', None): opf.comments = None if not getattr(mi, 'publisher', None): opf.publisher = None if apply_null or force_identifiers: opf.set_identifiers(mi.get_identifiers()) else: orig = opf.get_identifiers() orig.update(mi.get_identifiers()) opf.set_identifiers({k: v for k, v in orig.iteritems() if k and v}) if update_timestamp and mi.timestamp is not None: opf.timestamp = mi.timestamp
def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.xhtml', toc_name='toc.ncx'): ''' Create an empty book in the specified format at the specified location. ''' if fmt not in valid_empty_formats: raise ValueError('Cannot create empty book in the %s format' % fmt) if fmt == 'txt': with open(path, 'wb') as f: if not mi.is_null('title'): f.write(as_bytes(mi.title)) return if fmt == 'docx': from calibre.ebooks.conversion.plumber import Plumber from calibre.ebooks.docx.writer.container import DOCX from calibre.utils.logging import default_log p = Plumber('a.docx', 'b.docx', default_log) p.setup_options() # Use the word default of one inch page margins for x in 'left right top bottom'.split(): setattr(p.opts, 'margin_' + x, 72) DOCX(p.opts, default_log).write(path, mi, create_empty_document=True) return path = os.path.abspath(path) lang = 'und' opf = metadata_to_opf(mi, as_string=False) for l in opf.xpath('//*[local-name()="language"]'): if l.text: lang = l.text break lang = lang_as_iso639_1(lang) or lang opfns = OPF_NAMESPACES['opf'] m = opf.makeelement('{%s}manifest' % opfns) opf.insert(1, m) i = m.makeelement('{%s}item' % opfns, href=html_name, id='start') i.set('media-type', guess_type('a.xhtml')) m.append(i) i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx') i.set('media-type', guess_type(toc_name)) m.append(i) s = opf.makeelement('{%s}spine' % opfns, toc="ncx") opf.insert(2, s) i = s.makeelement('{%s}itemref' % opfns, idref='start') s.append(i) CONTAINER = '''\ <?xml version="1.0"?> <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container"> <rootfiles> <rootfile full-path="{0}" media-type="application/oebps-package+xml"/> </rootfiles> </container> '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8') HTML = P('templates/new_book.html', data=True).decode('utf-8').replace( '_LANGUAGE_', prepare_string_for_xml(lang, True)).replace( '_TITLE_', prepare_string_for_xml(mi.title)).replace( '_AUTHORS_', prepare_string_for_xml(authors_to_string( mi.authors))).encode('utf-8') h = parse(HTML) pretty_html_tree(None, h) HTML = serialize(h, 'text/html') ncx = etree.tostring(create_toc(mi, opf, html_name, lang), encoding='utf-8', xml_declaration=True, pretty_print=True) pretty_xml_tree(opf) opf = etree.tostring(opf, encoding='utf-8', xml_declaration=True, pretty_print=True) if fmt == 'azw3': with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir): for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)): with open(name, 'wb') as f: f.write(data) c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name, DevNull()) opf_to_azw3(opf_name, path, c) else: with ZipFile(path, 'w', compression=ZIP_STORED) as zf: zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED) zf.writestr('META-INF/', b'', 0o755) zf.writestr('META-INF/container.xml', CONTAINER) zf.writestr(opf_name, opf) zf.writestr(html_name, HTML) zf.writestr(toc_name, ncx)
def metadata_to_xmp_packet(mi): A = ElementMaker(namespace=NS_MAP["x"], nsmap=nsmap("x")) R = ElementMaker(namespace=NS_MAP["rdf"], nsmap=nsmap("rdf")) root = A.xmpmeta(R.RDF) rdf = root[0] dc = rdf.makeelement(expand("rdf:Description"), nsmap=nsmap("dc")) dc.set(expand("rdf:about"), "") rdf.append(dc) for prop, tag in {"title": "dc:title", "comments": "dc:description"}.iteritems(): val = mi.get(prop) or "" create_alt_property(dc, tag, val) for prop, (tag, ordered) in { "authors": ("dc:creator", True), "tags": ("dc:subject", False), "publisher": ("dc:publisher", False), }.iteritems(): val = mi.get(prop) or () if isinstance(val, basestring): val = [val] create_sequence_property(dc, tag, val, ordered) if not mi.is_null("pubdate"): create_sequence_property( dc, "dc:date", [isoformat(mi.pubdate, as_utc=False)] ) # Adobe spec recommends local time if not mi.is_null("languages"): langs = filter(None, map(lambda x: lang_as_iso639_1(x) or canonicalize_lang(x), mi.languages)) if langs: create_sequence_property(dc, "dc:language", langs, ordered=False) xmp = rdf.makeelement(expand("rdf:Description"), nsmap=nsmap("xmp", "xmpidq")) xmp.set(expand("rdf:about"), "") rdf.append(xmp) extra_ids = {} for x in ("prism", "pdfx"): p = extra_ids[x] = rdf.makeelement(expand("rdf:Description"), nsmap=nsmap(x)) p.set(expand("rdf:about"), "") rdf.append(p) identifiers = mi.get_identifiers() if identifiers: create_identifiers(xmp, identifiers) for scheme, val in identifiers.iteritems(): if scheme in {"isbn", "doi"}: for prefix, parent in extra_ids.iteritems(): ie = parent.makeelement(expand("%s:%s" % (prefix, scheme))) ie.text = val parent.append(ie) d = xmp.makeelement(expand("xmp:MetadataDate")) d.text = isoformat(now(), as_utc=False) xmp.append(d) calibre = rdf.makeelement(expand("rdf:Description"), nsmap=nsmap("calibre", "calibreSI", "calibreCC")) calibre.set(expand("rdf:about"), "") rdf.append(calibre) if not mi.is_null("rating"): try: r = float(mi.rating) except (TypeError, ValueError): pass else: create_simple_property(calibre, "calibre:rating", "%g" % r) if not mi.is_null("series"): create_series(calibre, mi.series, mi.series_index) if not mi.is_null("timestamp"): create_simple_property(calibre, "calibre:timestamp", isoformat(mi.timestamp, as_utc=False)) for x in ("author_link_map", "user_categories"): val = getattr(mi, x, None) if val: create_simple_property(calibre, "calibre:" + x, dump_dict(val)) for x in ("title_sort", "author_sort"): if not mi.is_null(x): create_simple_property(calibre, "calibre:" + x, getattr(mi, x)) all_user_metadata = mi.get_all_user_metadata(True) if all_user_metadata: create_user_metadata(calibre, all_user_metadata) return serialize_xmp_packet(root)
def metadata_to_xmp_packet(mi): A = ElementMaker(namespace=NS_MAP['x'], nsmap=nsmap('x')) R = ElementMaker(namespace=NS_MAP['rdf'], nsmap=nsmap('rdf')) root = A.xmpmeta(R.RDF) rdf = root[0] dc = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('dc')) dc.set(expand('rdf:about'), '') rdf.append(dc) for prop, tag in {'title':'dc:title', 'comments':'dc:description'}.iteritems(): val = mi.get(prop) or '' create_alt_property(dc, tag, val) for prop, (tag, ordered) in { 'authors':('dc:creator', True), 'tags':('dc:subject', False), 'publisher':('dc:publisher', False), }.iteritems(): val = mi.get(prop) or () if isinstance(val, basestring): val = [val] create_sequence_property(dc, tag, val, ordered) if not mi.is_null('pubdate'): create_sequence_property(dc, 'dc:date', [isoformat(mi.pubdate, as_utc=False)]) # Adobe spec recommends local time if not mi.is_null('languages'): langs = filter(None, map(lambda x:lang_as_iso639_1(x) or canonicalize_lang(x), mi.languages)) if langs: create_sequence_property(dc, 'dc:language', langs, ordered=False) xmp = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('xmp', 'xmpidq')) xmp.set(expand('rdf:about'), '') rdf.append(xmp) extra_ids = {} for x in ('prism', 'pdfx'): p = extra_ids[x] = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap(x)) p.set(expand('rdf:about'), '') rdf.append(p) identifiers = mi.get_identifiers() if identifiers: create_identifiers(xmp, identifiers) for scheme, val in identifiers.iteritems(): if scheme in {'isbn', 'doi'}: for prefix, parent in extra_ids.iteritems(): ie = parent.makeelement(expand('%s:%s'%(prefix, scheme))) ie.text = val parent.append(ie) d = xmp.makeelement(expand('xmp:MetadataDate')) d.text = isoformat(now(), as_utc=False) xmp.append(d) calibre = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('calibre', 'calibreSI', 'calibreCC')) calibre.set(expand('rdf:about'), '') rdf.append(calibre) if not mi.is_null('rating'): try: r = float(mi.rating) except (TypeError, ValueError): pass else: create_simple_property(calibre, 'calibre:rating', '%g' % r) if not mi.is_null('series'): create_series(calibre, mi.series, mi.series_index) if not mi.is_null('timestamp'): create_simple_property(calibre, 'calibre:timestamp', isoformat(mi.timestamp, as_utc=False)) for x in ('author_link_map', 'user_categories'): val = getattr(mi, x, None) if val: create_simple_property(calibre, 'calibre:'+x, dump_dict(val)) for x in ('title_sort', 'author_sort'): if not mi.is_null(x): create_simple_property(calibre, 'calibre:'+x, getattr(mi, x)) all_user_metadata = mi.get_all_user_metadata(True) if all_user_metadata: create_user_metadata(calibre, all_user_metadata) return serialize_xmp_packet(root)
def html_lang(docx_lang): lang = canonicalize_lang(docx_lang) if lang and lang != 'und': lang = lang_as_iso639_1(lang) if lang: return lang
def lang_for_tag(tag): for attr in ('lang', '{http://www.w3.org/XML/1998/namespace}lang'): val = lang_as_iso639_1(tag.get(attr)) if val: return val
def update(self, mi, asin=None): mi.title = normalize(mi.title) def update_exth_record(rec): recs.append(rec) if rec[0] in self.original_exth_records: self.original_exth_records.pop(rec[0]) if self.type != b"BOOKMOBI": raise MobiError( "Setting metadata only supported for MOBI files of type 'BOOK'.\n" "\tThis is a %r file of type %r" % (self.type[0:4], self.type[4:8])) recs = [] added_501 = False try: from calibre.ebooks.conversion.config import load_defaults prefs = load_defaults('mobi_output') pas = prefs.get('prefer_author_sort', False) kindle_pdoc = prefs.get('personal_doc', None) share_not_sync = prefs.get('share_not_sync', False) except: pas = False kindle_pdoc = None share_not_sync = False if mi.author_sort and pas: # We want an EXTH field per author... authors = mi.author_sort.split(' & ') for author in authors: update_exth_record( (100, normalize(author).encode(self.codec, 'replace'))) elif mi.authors: authors = mi.authors for author in authors: update_exth_record( (100, normalize(author).encode(self.codec, 'replace'))) if mi.publisher: update_exth_record( (101, normalize(mi.publisher).encode(self.codec, 'replace'))) if mi.comments: # Strip user annotations a_offset = mi.comments.find('<div class="user_annotations">') ad_offset = mi.comments.find('<hr class="annotations_divider" />') if a_offset >= 0: mi.comments = mi.comments[:a_offset] if ad_offset >= 0: mi.comments = mi.comments[:ad_offset] update_exth_record( (103, normalize(mi.comments).encode(self.codec, 'replace'))) if mi.isbn: update_exth_record((104, mi.isbn.encode(self.codec, 'replace'))) if mi.tags: # FIXME: Keep a single subject per EXTH field? subjects = '; '.join(mi.tags) update_exth_record( (105, normalize(subjects).encode(self.codec, 'replace'))) if kindle_pdoc and kindle_pdoc in mi.tags: added_501 = True update_exth_record((501, b'PDOC')) if mi.pubdate: update_exth_record( (106, unicode_type(mi.pubdate).encode(self.codec, 'replace'))) elif mi.timestamp: update_exth_record( (106, unicode_type(mi.timestamp).encode(self.codec, 'replace'))) elif self.timestamp: update_exth_record((106, self.timestamp)) else: update_exth_record( (106, nowf().isoformat().encode(self.codec, 'replace'))) if self.cover_record is not None: update_exth_record((201, pack('>I', self.cover_rindex))) update_exth_record((203, pack('>I', 0))) if self.thumbnail_record is not None: update_exth_record((202, pack('>I', self.thumbnail_rindex))) # Add a 113 record if not present to allow Amazon syncing if (113 not in self.original_exth_records and self.original_exth_records.get(501, None) == 'EBOK' and not added_501 and not share_not_sync): from uuid import uuid4 update_exth_record((113, unicode_type(uuid4()).encode(self.codec))) if asin is not None: update_exth_record((113, asin.encode(self.codec))) update_exth_record((504, asin.encode(self.codec))) # Add a 112 record with actual UUID if getattr(mi, 'uuid', None): update_exth_record( (112, ("calibre:%s" % mi.uuid).encode(self.codec, 'replace'))) if 503 in self.original_exth_records: update_exth_record((503, mi.title.encode(self.codec, 'replace'))) # Update book producer if getattr(mi, 'book_producer', False): update_exth_record( (108, mi.book_producer.encode(self.codec, 'replace'))) # Set langcode in EXTH header if not mi.is_null('language'): lang = canonicalize_lang(mi.language) lang = lang_as_iso639_1(lang) or lang if lang: update_exth_record((524, lang.encode(self.codec, 'replace'))) # Include remaining original EXTH fields for id in sorted(self.original_exth_records): recs.append((id, self.original_exth_records[id])) recs = sorted(recs, key=lambda x: (x[0], x[0])) exth = io.BytesIO() for code, data in recs: exth.write(pack('>II', code, len(data) + 8)) exth.write(data) exth = exth.getvalue() trail = len(exth) % 4 pad = b'\0' * (4 - trail) # Always pad w/ at least 1 byte exth = [b'EXTH', pack('>II', len(exth) + 12, len(recs)), exth, pad] exth = b''.join(exth) if getattr(self, 'exth', None) is None: raise MobiError('No existing EXTH record. Cannot update metadata.') if not mi.is_null('language'): self.record0[92:96] = iana2mobi(mi.language) self.create_exth(exth=exth, new_title=mi.title) # Fetch updated timestamp, cover_record, thumbnail_record self.fetchEXTHFields() if mi.cover_data[1] or mi.cover: try: data = mi.cover_data[1] if not data: with open(mi.cover, 'rb') as f: data = f.read() except: pass else: if is_image(self.cover_record): size = len(self.cover_record) cover = rescale_image(data, size) if len(cover) <= size: cover += b'\0' * (size - len(cover)) self.cover_record[:] = cover if is_image(self.thumbnail_record): size = len(self.thumbnail_record) thumbnail = rescale_image(data, size, dimen=MAX_THUMB_DIMEN) if len(thumbnail) <= size: thumbnail += b'\0' * (size - len(thumbnail)) self.thumbnail_record[:] = thumbnail return
def migrate_lang_code(self, root): # {{{ from calibre.utils.localization import lang_as_iso639_1 for lang in root.xpath('//*[local-name() = "language"]'): clc = lang_as_iso639_1(lang.text) if clc: lang.text = clc
def set_metadata(stream, mi, apply_null=False, update_timestamp=False): stream.seek(0) reader = OCFZipReader(stream, root=os.getcwdu()) raster_cover = reader.opf.raster_cover mi = MetaInformation(mi) new_cdata = None replacements = {} try: new_cdata = mi.cover_data[1] if not new_cdata: raise Exception('no cover') except: try: new_cdata = open(mi.cover, 'rb').read() except: pass new_cover = cpath = None if new_cdata and raster_cover: try: cpath = posixpath.join(posixpath.dirname(reader.opf_path), raster_cover) cover_replacable = not reader.encryption_meta.is_encrypted(cpath) and \ os.path.splitext(cpath)[1].lower() in ('.png', '.jpg', '.jpeg') if cover_replacable: new_cover = _write_new_cover(new_cdata, cpath) replacements[cpath] = open(new_cover.name, 'rb') except: import traceback traceback.print_exc() for x in ('guide', 'toc', 'manifest', 'spine'): setattr(mi, x, None) if mi.languages: langs = [] for lc in mi.languages: lc2 = lang_as_iso639_1(lc) if lc2: lc = lc2 langs.append(lc) mi.languages = langs reader.opf.smart_update(mi) if apply_null: if not getattr(mi, 'series', None): reader.opf.series = None if not getattr(mi, 'tags', []): reader.opf.tags = [] if not getattr(mi, 'isbn', None): reader.opf.isbn = None if update_timestamp and mi.timestamp is not None: reader.opf.timestamp = mi.timestamp newopf = StringIO(reader.opf.render()) safe_replace(stream, reader.container[OPF.MIMETYPE], newopf, extra_replacements=replacements) try: if cpath is not None: replacements[cpath].close() os.remove(replacements[cpath].name) except: pass
def create_inline_toc(container, title=None): lang = get_book_language(container) default_title = 'Table of Contents' if lang: lang = lang_as_iso639_1(lang) or lang default_title = translate(lang, default_title) title = title or default_title toc = get_toc(container) if len(toc) == 0: return None toc_name = find_inline_toc(container) def process_node(html_parent, toc, level=1, indent=' ', style_level=2): li = html_parent.makeelement(XHTML('li')) li.tail = '\n'+ (indent*level) html_parent.append(li) name, frag = toc.dest, toc.frag href = '#' if name: href = container.name_to_href(name, toc_name) if frag: href += '#' + frag a = li.makeelement(XHTML('a'), href=href) a.text = toc.title li.append(a) if len(toc) > 0: parent = li.makeelement(XHTML('ul')) parent.set('class', 'level%d' % (style_level)) li.append(parent) a.tail = '\n\n' + (indent*(level+2)) parent.text = '\n'+(indent*(level+3)) parent.tail = '\n\n' + (indent*(level+1)) for child in toc: process_node(parent, child, level+3, style_level=style_level + 1) parent[-1].tail = '\n' + (indent*(level+2)) E = ElementMaker(namespace=XHTML_NS, nsmap={None:XHTML_NS}) html = E.html( E.head( E.title(title), E.style(''' li { list-style-type: none; padding-left: 2em; margin-left: 0} a { text-decoration: none } a:hover { color: red }''', type='text/css'), ), E.body( E.h2(title), E.ul(), id="calibre_generated_inline_toc", ) ) name = toc_name ul = html[1][1] ul.set('class', 'level1') for child in toc: process_node(ul, child) if lang: html.set('lang', lang) pretty_html_tree(container, html) raw = serialize(html, 'text/html') if name is None: name, c = 'toc.xhtml', 0 while container.has_name(name): c += 1 name = 'toc%d.xhtml' % c container.add_file(name, raw, spine_index=0) else: with container.open(name, 'wb') as f: f.write(raw) set_guide_item(container, 'toc', title, name, frag='calibre_generated_inline_toc') return name
def create_inline_toc(container, title=None): ''' Create an inline (HTML) Table of Contents from an existing NCX table of contents. :param title: The title for this table of contents. ''' lang = get_book_language(container) default_title = 'Table of Contents' if lang: lang = lang_as_iso639_1(lang) or lang default_title = translate(lang, default_title) title = title or default_title toc = get_toc(container) if len(toc) == 0: return None toc_name = find_inline_toc(container) def process_node(html_parent, toc, level=1, indent=' ', style_level=2): li = html_parent.makeelement(XHTML('li')) li.tail = '\n' + (indent * level) html_parent.append(li) name, frag = toc.dest, toc.frag href = '#' if name: href = container.name_to_href(name, toc_name) if frag: href += '#' + frag a = li.makeelement(XHTML('a'), href=href) a.text = toc.title li.append(a) if len(toc) > 0: parent = li.makeelement(XHTML('ul')) parent.set('class', 'level%d' % (style_level)) li.append(parent) a.tail = '\n\n' + (indent * (level + 2)) parent.text = '\n' + (indent * (level + 3)) parent.tail = '\n\n' + (indent * (level + 1)) for child in toc: process_node(parent, child, level + 3, style_level=style_level + 1) parent[-1].tail = '\n' + (indent * (level + 2)) E = ElementMaker(namespace=XHTML_NS, nsmap={None: XHTML_NS}) html = E.html( E.head( E.title(title), E.style(''' li { list-style-type: none; padding-left: 2em; margin-left: 0} a { text-decoration: none } a:hover { color: red }''', type='text/css'), ), E.body( E.h2(title), E.ul(), id="calibre_generated_inline_toc", )) name = toc_name ul = html[1][1] ul.set('class', 'level1') for child in toc: process_node(ul, child) if lang: html.set('lang', lang) pretty_html_tree(container, html) raw = serialize(html, 'text/html') if name is None: name, c = 'toc.xhtml', 0 while container.has_name(name): c += 1 name = 'toc%d.xhtml' % c container.add_file(name, raw, spine_index=0) else: with container.open(name, 'wb') as f: f.write(raw) set_guide_item(container, 'toc', title, name, frag='calibre_generated_inline_toc') return name
def fb2_header(self): from calibre.ebooks.oeb.base import OPF metadata = {} metadata['title'] = self.oeb_book.metadata.title[0].value metadata['appname'] = __appname__ metadata['version'] = __version__ metadata['date'] = '%i.%i.%i' % (datetime.now().day, datetime.now().month, datetime.now().year) if self.oeb_book.metadata.language: lc = lang_as_iso639_1(self.oeb_book.metadata.language[0].value) if not lc: lc = self.oeb_book.metadata.language[0].value metadata['lang'] = lc or 'en' else: metadata['lang'] = u'en' metadata['id'] = None metadata['cover'] = self.get_cover() metadata['genre'] = self.opts.fb2_genre metadata['author'] = u'' for auth in self.oeb_book.metadata.creator: author_first = u'' author_middle = u'' author_last = u'' author_parts = auth.value.split(' ') if len(author_parts) == 1: author_last = author_parts[0] elif len(author_parts) == 2: author_first = author_parts[0] author_last = author_parts[1] else: author_first = author_parts[0] author_middle = ' '.join(author_parts[1:-1]) author_last = author_parts[-1] metadata['author'] += '<author>' metadata['author'] += '<first-name>%s</first-name>' % prepare_string_for_xml(author_first) if author_middle: metadata['author'] += '<middle-name>%s</middle-name>' % prepare_string_for_xml(author_middle) metadata['author'] += '<last-name>%s</last-name>' % prepare_string_for_xml(author_last) metadata['author'] += '</author>' if not metadata['author']: metadata['author'] = u'<author><first-name></first-name><last-name></last-name></author>' metadata['keywords'] = u'' tags = list(map(unicode, self.oeb_book.metadata.subject)) if tags: tags = ', '.join(prepare_string_for_xml(x) for x in tags) metadata['keywords'] = '<keywords>%s</keywords>'%tags metadata['sequence'] = u'' if self.oeb_book.metadata.series: index = '1' if self.oeb_book.metadata.series_index: index = self.oeb_book.metadata.series_index[0] metadata['sequence'] = u'<sequence name="%s" number="%s" />' % (prepare_string_for_xml(u'%s' % self.oeb_book.metadata.series[0]), index) year = publisher = isbn = u'' identifiers = self.oeb_book.metadata['identifier'] for x in identifiers: if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'): metadata['id'] = unicode(x).split(':')[-1] break if metadata['id'] is None: self.log.warn('No UUID identifier found') metadata['id'] = str(uuid.uuid4()) try: date = self.oeb_book.metadata['date'][0] except IndexError: pass else: year = '<year>%s</year>' % prepare_string_for_xml(date.value.partition('-')[0]) try: publisher = self.oeb_book.metadata['publisher'][0] except IndexError: pass else: publisher = '<publisher>%s</publisher>' % prepare_string_for_xml(publisher.value) for x in identifiers: if x.get(OPF('scheme'), None).lower() == 'isbn': isbn = '<isbn>%s</isbn>' % prepare_string_for_xml(x.value) metadata['year'], metadata['isbn'], metadata['publisher'] = year, isbn, publisher for key, value in metadata.items(): if key not in ('author', 'cover', 'sequence', 'keywords', 'year', 'publisher', 'isbn'): metadata[key] = prepare_string_for_xml(value) return textwrap.dedent(u''' <FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:xlink="http://www.w3.org/1999/xlink"> <description> <title-info> <genre>%(genre)s</genre> %(author)s <book-title>%(title)s</book-title> %(cover)s <lang>%(lang)s</lang> %(keywords)s %(sequence)s </title-info> <document-info> %(author)s <program-used>%(appname)s %(version)s</program-used> <date>%(date)s</date> <id>%(id)s</id> <version>1.0</version> </document-info> <publish-info> %(publisher)s %(year)s %(isbn)s </publish-info> </description>\n''') % metadata
def fb2_header(self): from calibre.ebooks.oeb.base import OPF metadata = {} metadata['title'] = self.oeb_book.metadata.title[0].value metadata['appname'] = __appname__ metadata['version'] = __version__ metadata['date'] = '%i.%i.%i' % ( datetime.now().day, datetime.now().month, datetime.now().year) if self.oeb_book.metadata.language: lc = lang_as_iso639_1(self.oeb_book.metadata.language[0].value) if not lc: lc = self.oeb_book.metadata.language[0].value metadata['lang'] = lc or 'en' else: metadata['lang'] = u'en' metadata['id'] = None metadata['cover'] = self.get_cover() metadata['genre'] = self.opts.fb2_genre metadata['author'] = '' for auth in self.oeb_book.metadata.creator: author_first = '' author_middle = '' author_last = '' author_parts = auth.value.split(' ') if len(author_parts) == 1: author_last = author_parts[0] elif len(author_parts) == 2: author_first = author_parts[0] author_last = author_parts[1] else: author_first = author_parts[0] author_middle = ' '.join(author_parts[1:-1]) author_last = author_parts[-1] metadata['author'] += '<author>' metadata[ 'author'] += '<first-name>%s</first-name>' % prepare_string_for_xml( author_first) if author_middle: metadata[ 'author'] += '<middle-name>%s</middle-name>' % prepare_string_for_xml( author_middle) metadata[ 'author'] += '<last-name>%s</last-name>' % prepare_string_for_xml( author_last) metadata['author'] += '</author>' if not metadata['author']: metadata[ 'author'] = '<author><first-name></first-name><last-name></last-name></author>' metadata['keywords'] = '' tags = list(map(unicode_type, self.oeb_book.metadata.subject)) if tags: tags = ', '.join(prepare_string_for_xml(x) for x in tags) metadata['keywords'] = '<keywords>%s</keywords>' % tags metadata['sequence'] = '' if self.oeb_book.metadata.series: index = '1' if self.oeb_book.metadata.series_index: index = self.oeb_book.metadata.series_index[0] metadata['sequence'] = '<sequence name="%s" number="%s"/>' % ( prepare_string_for_xml( '%s' % self.oeb_book.metadata.series[0]), index) year = publisher = isbn = '' identifiers = self.oeb_book.metadata['identifier'] for x in identifiers: if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode_type( x).startswith('urn:uuid:'): metadata['id'] = unicode_type(x).split(':')[-1] break if metadata['id'] is None: self.log.warn('No UUID identifier found') metadata['id'] = unicode_type(uuid.uuid4()) try: date = self.oeb_book.metadata['date'][0] except IndexError: pass else: year = '<year>%s</year>' % prepare_string_for_xml( date.value.partition('-')[0]) try: publisher = self.oeb_book.metadata['publisher'][0] except IndexError: pass else: publisher = '<publisher>%s</publisher>' % prepare_string_for_xml( publisher.value) for x in identifiers: if x.get(OPF('scheme'), None).lower() == 'isbn': isbn = '<isbn>%s</isbn>' % prepare_string_for_xml(x.value) metadata['year'], metadata['isbn'], metadata[ 'publisher'] = year, isbn, publisher for key, value in metadata.items(): if key not in ('author', 'cover', 'sequence', 'keywords', 'year', 'publisher', 'isbn'): metadata[key] = prepare_string_for_xml(value) try: comments = self.oeb_book.metadata['description'][0] except Exception: metadata['comments'] = '' else: from calibre.utils.html2text import html2text metadata['comments'] = '<annotation><p>{}</p></annotation>'.format( prepare_string_for_xml(html2text(comments.value).strip())) # Keep the indentation level of the description the same as the body. header = textwrap.dedent('''\ <FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink"> <description> <title-info> <genre>%(genre)s</genre> %(author)s <book-title>%(title)s</book-title> %(cover)s <lang>%(lang)s</lang> %(keywords)s %(sequence)s %(comments)s </title-info> <document-info> %(author)s <program-used>%(appname)s %(version)s</program-used> <date>%(date)s</date> <id>%(id)s</id> <version>1.0</version> </document-info> <publish-info> %(publisher)s %(year)s %(isbn)s </publish-info> </description>''') % metadata # Remove empty lines. return '\n'.join(filter(unicode_type.strip, header.splitlines()))
def metadata_to_xmp_packet(mi): A = ElementMaker(namespace=NS_MAP['x'], nsmap=nsmap('x')) R = ElementMaker(namespace=NS_MAP['rdf'], nsmap=nsmap('rdf')) root = A.xmpmeta(R.RDF) rdf = root[0] dc = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('dc')) dc.set(expand('rdf:about'), '') rdf.append(dc) for prop, tag in iteritems({ 'title': 'dc:title', 'comments': 'dc:description' }): val = mi.get(prop) or '' create_alt_property(dc, tag, val) for prop, (tag, ordered) in iteritems({ 'authors': ('dc:creator', True), 'tags': ('dc:subject', False), 'publisher': ('dc:publisher', False), }): val = mi.get(prop) or () if isinstance(val, string_or_bytes): val = [val] create_sequence_property(dc, tag, val, ordered) if not mi.is_null('pubdate'): create_sequence_property(dc, 'dc:date', [isoformat(mi.pubdate, as_utc=False) ]) # Adobe spec recommends local time if not mi.is_null('languages'): langs = list( filter( None, map(lambda x: lang_as_iso639_1(x) or canonicalize_lang(x), mi.languages))) if langs: create_sequence_property(dc, 'dc:language', langs, ordered=False) xmp = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('xmp', 'xmpidq')) xmp.set(expand('rdf:about'), '') rdf.append(xmp) extra_ids = {} for x in ('prism', 'pdfx'): p = extra_ids[x] = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap(x)) p.set(expand('rdf:about'), '') rdf.append(p) identifiers = mi.get_identifiers() if identifiers: create_identifiers(xmp, identifiers) for scheme, val in iteritems(identifiers): if scheme in {'isbn', 'doi'}: for prefix, parent in iteritems(extra_ids): ie = parent.makeelement(expand('%s:%s' % (prefix, scheme))) ie.text = val parent.append(ie) d = xmp.makeelement(expand('xmp:MetadataDate')) d.text = isoformat(now(), as_utc=False) xmp.append(d) calibre = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('calibre', 'calibreSI', 'calibreCC')) calibre.set(expand('rdf:about'), '') rdf.append(calibre) if not mi.is_null('rating'): try: r = float(mi.rating) except (TypeError, ValueError): pass else: create_simple_property(calibre, 'calibre:rating', '%g' % r) if not mi.is_null('series'): create_series(calibre, mi.series, mi.series_index) if not mi.is_null('timestamp'): create_simple_property(calibre, 'calibre:timestamp', isoformat(mi.timestamp, as_utc=False)) for x in ('author_link_map', 'user_categories'): val = getattr(mi, x, None) if val: create_simple_property(calibre, 'calibre:' + x, dump_dict(val)) for x in ('title_sort', 'author_sort'): if not mi.is_null(x): create_simple_property(calibre, 'calibre:' + x, getattr(mi, x)) all_user_metadata = mi.get_all_user_metadata(True) if all_user_metadata: create_user_metadata(calibre, all_user_metadata) return serialize_xmp_packet(root)
def commit_nav_toc(container, toc, lang=None, landmarks=None, previous_nav=None): from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree tocname = find_existing_nav_toc(container) if previous_nav is not None: nav_name = container.href_to_name(previous_nav[0]) if nav_name and container.exists(nav_name): tocname = nav_name container.apply_unique_properties(tocname, 'nav') if tocname is None: item = container.generate_item('nav.xhtml', id_prefix='nav') item.set('properties', 'nav') tocname = container.href_to_name(item.get('href'), base=container.opf_name) if previous_nav is not None: root = previous_nav[1] else: root = container.parse_xhtml(P('templates/new_nav.html', data=True).decode('utf-8')) container.replace(tocname, root) else: root = container.parsed(tocname) if lang: lang = lang_as_iso639_1(lang) or lang root.set('lang', lang) root.set('{%s}lang' % XML_NS, lang) nav = ensure_single_nav_of_type(root, 'toc') if toc.toc_title: nav.append(nav.makeelement(XHTML('h1'))) nav[-1].text = toc.toc_title rnode = nav.makeelement(XHTML('ol')) nav.append(rnode) to_href = partial(container.name_to_href, base=tocname) spat = re.compile(r'\s+') def process_node(xml_parent, toc_parent): for child in toc_parent: li = xml_parent.makeelement(XHTML('li')) xml_parent.append(li) title = child.title or '' title = spat.sub(' ', title).strip() a = li.makeelement(XHTML('a' if child.dest else 'span')) a.text = title li.append(a) if child.dest: href = to_href(child.dest) if child.frag: href += '#'+child.frag a.set('href', href) if len(child): ol = li.makeelement(XHTML('ol')) li.append(ol) process_node(ol, child) process_node(rnode, toc) pretty_xml_tree(nav) def collapse_li(parent): for li in parent.iterdescendants(XHTML('li')): if len(li) == 1: li.text = None li[0].tail = None collapse_li(nav) nav.tail = '\n' def create_li(ol, entry): li = ol.makeelement(XHTML('li')) ol.append(li) a = li.makeelement(XHTML('a')) li.append(a) href = container.name_to_href(entry['dest'], tocname) if entry['frag']: href += '#' + entry['frag'] a.set('href', href) return a if landmarks is not None: nav = ensure_single_nav_of_type(root, 'landmarks') nav.set('hidden', '') ol = nav.makeelement(XHTML('ol')) nav.append(ol) for entry in landmarks: if entry['type'] and container.has_name(entry['dest']) and container.mime_map[entry['dest']] in OEB_DOCS: a = create_li(ol, entry) a.set('{%s}type' % EPUB_NS, entry['type']) a.text = entry['title'] or None pretty_xml_tree(nav) collapse_li(nav) if toc.page_list: nav = ensure_single_nav_of_type(root, 'page-list') nav.set('hidden', '') ol = nav.makeelement(XHTML('ol')) nav.append(ol) for entry in toc.page_list: if container.has_name(entry['dest']) and container.mime_map[entry['dest']] in OEB_DOCS: a = create_li(ol, entry) a.text = str(entry['pagenum']) pretty_xml_tree(nav) collapse_li(nav) container.replace(tocname, root)
def update(self, mi): mi.title = normalize(mi.title) def update_exth_record(rec): recs.append(rec) if rec[0] in self.original_exth_records: self.original_exth_records.pop(rec[0]) if self.type != "BOOKMOBI": raise MobiError("Setting metadata only supported for MOBI files of type 'BOOK'.\n" "\tThis is a %r file of type %r" % (self.type[0:4], self.type[4:8])) recs = [] added_501 = False try: from calibre.ebooks.conversion.config import load_defaults prefs = load_defaults('mobi_output') pas = prefs.get('prefer_author_sort', False) kindle_pdoc = prefs.get('personal_doc', None) share_not_sync = prefs.get('share_not_sync', False) except: pas = False kindle_pdoc = None share_not_sync = False if mi.author_sort and pas: # We want an EXTH field per author... authors = mi.author_sort.split(' & ') for author in authors: update_exth_record((100, normalize(author).encode(self.codec, 'replace'))) elif mi.authors: authors = mi.authors for author in authors: update_exth_record((100, normalize(author).encode(self.codec, 'replace'))) if mi.publisher: update_exth_record((101, normalize(mi.publisher).encode(self.codec, 'replace'))) if mi.comments: # Strip user annotations a_offset = mi.comments.find('<div class="user_annotations">') ad_offset = mi.comments.find('<hr class="annotations_divider" />') if a_offset >= 0: mi.comments = mi.comments[:a_offset] if ad_offset >= 0: mi.comments = mi.comments[:ad_offset] update_exth_record((103, normalize(mi.comments).encode(self.codec, 'replace'))) if mi.isbn: update_exth_record((104, mi.isbn.encode(self.codec, 'replace'))) if mi.tags: # FIXME: Keep a single subject per EXTH field? subjects = '; '.join(mi.tags) update_exth_record((105, normalize(subjects).encode(self.codec, 'replace'))) if kindle_pdoc and kindle_pdoc in mi.tags: added_501 = True update_exth_record((501, b'PDOC')) if mi.pubdate: update_exth_record((106, str(mi.pubdate).encode(self.codec, 'replace'))) elif mi.timestamp: update_exth_record((106, str(mi.timestamp).encode(self.codec, 'replace'))) elif self.timestamp: update_exth_record((106, self.timestamp)) else: update_exth_record((106, nowf().isoformat().encode(self.codec, 'replace'))) if self.cover_record is not None: update_exth_record((201, pack('>I', self.cover_rindex))) update_exth_record((203, pack('>I', 0))) if self.thumbnail_record is not None: update_exth_record((202, pack('>I', self.thumbnail_rindex))) # Add a 113 record if not present to allow Amazon syncing if (113 not in self.original_exth_records and self.original_exth_records.get(501, None) == 'EBOK' and not added_501 and not share_not_sync): from uuid import uuid4 update_exth_record((113, str(uuid4()))) # Add a 112 record with actual UUID if getattr(mi, 'uuid', None): update_exth_record((112, (u"calibre:%s" % mi.uuid).encode(self.codec, 'replace'))) if 503 in self.original_exth_records: update_exth_record((503, mi.title.encode(self.codec, 'replace'))) # Update book producer if getattr(mi, 'book_producer', False): update_exth_record((108, mi.book_producer.encode(self.codec, 'replace'))) # Set langcode in EXTH header if not mi.is_null('language'): lang = canonicalize_lang(mi.language) lang = lang_as_iso639_1(lang) or lang if lang: update_exth_record((524, lang.encode(self.codec, 'replace'))) # Include remaining original EXTH fields for id in sorted(self.original_exth_records): recs.append((id, self.original_exth_records[id])) recs = sorted(recs, key=lambda x:(x[0],x[0])) exth = StringIO() for code, data in recs: exth.write(pack('>II', code, len(data) + 8)) exth.write(data) exth = exth.getvalue() trail = len(exth) % 4 pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte exth = ['EXTH', pack('>II', len(exth) + 12, len(recs)), exth, pad] exth = ''.join(exth) if getattr(self, 'exth', None) is None: raise MobiError('No existing EXTH record. Cannot update metadata.') if not mi.is_null('language'): self.record0[92:96] = iana2mobi(mi.language) self.create_exth(exth=exth, new_title=mi.title) # Fetch updated timestamp, cover_record, thumbnail_record self.fetchEXTHFields() if mi.cover_data[1] or mi.cover: try: data = mi.cover_data[1] if mi.cover_data[1] else open(mi.cover, 'rb').read() except: pass else: if is_image(self.cover_record): size = len(self.cover_record) cover = rescale_image(data, size) if len(cover) <= size: cover += b'\0' * (size - len(cover)) self.cover_record[:] = cover if is_image(self.thumbnail_record): size = len(self.thumbnail_record) thumbnail = rescale_image(data, size, dimen=MAX_THUMB_DIMEN) if len(thumbnail) <= size: thumbnail += b'\0' * (size - len(thumbnail)) self.thumbnail_record[:] = thumbnail return
def __init__(self, namespace, log, document_lang): self.namespace = namespace self.document_lang = lang_as_iso639_1(document_lang) or 'en' self.log = log self.block_styles, self.text_styles = {}, {} self.styles_for_html_blocks = {}
def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.xhtml', toc_name='toc.ncx'): ''' Create an empty book in the specified format at the specified location. ''' if fmt not in valid_empty_formats: raise ValueError('Cannot create empty book in the %s format' % fmt) if fmt == 'txt': with open(path, 'wb') as f: if not mi.is_null('title'): f.write(mi.title) return if fmt == 'docx': from calibre.ebooks.conversion.plumber import Plumber from calibre.ebooks.docx.writer.container import DOCX from calibre.utils.logging import default_log p = Plumber('a.docx', 'b.docx', default_log) p.setup_options() # Use the word default of one inch page margins for x in 'left right top bottom'.split(): setattr(p.opts, 'margin_' + x, 72) DOCX(p.opts, default_log).write(path, mi, create_empty_document=True) return path = os.path.abspath(path) lang = 'und' opf = metadata_to_opf(mi, as_string=False) for l in opf.xpath('//*[local-name()="language"]'): if l.text: lang = l.text break lang = lang_as_iso639_1(lang) or lang opfns = OPF_NAMESPACES['opf'] m = opf.makeelement('{%s}manifest' % opfns) opf.insert(1, m) i = m.makeelement('{%s}item' % opfns, href=html_name, id='start') i.set('media-type', guess_type('a.xhtml')) m.append(i) i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx') i.set('media-type', guess_type(toc_name)) m.append(i) s = opf.makeelement('{%s}spine' % opfns, toc="ncx") opf.insert(2, s) i = s.makeelement('{%s}itemref' % opfns, idref='start') s.append(i) CONTAINER = '''\ <?xml version="1.0"?> <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container"> <rootfiles> <rootfile full-path="{0}" media-type="application/oebps-package+xml"/> </rootfiles> </container> '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8') HTML = P('templates/new_book.html', data=True).decode('utf-8').replace( '_LANGUAGE_', prepare_string_for_xml(lang, True) ).replace( '_TITLE_', prepare_string_for_xml(mi.title) ).replace( '_AUTHORS_', prepare_string_for_xml(authors_to_string(mi.authors)) ).encode('utf-8') h = parse(HTML) pretty_html_tree(None, h) HTML = serialize(h, 'text/html') ncx = etree.tostring(create_toc(mi, opf, html_name, lang), encoding='utf-8', xml_declaration=True, pretty_print=True) pretty_xml_tree(opf) opf = etree.tostring(opf, encoding='utf-8', xml_declaration=True, pretty_print=True) if fmt == 'azw3': with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir): for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)): with open(name, 'wb') as f: f.write(data) c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name, DevNull()) opf_to_azw3(opf_name, path, c) else: with ZipFile(path, 'w', compression=ZIP_STORED) as zf: zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED) zf.writestr('META-INF/', b'', 0755) zf.writestr('META-INF/container.xml', CONTAINER) zf.writestr(opf_name, opf) zf.writestr(html_name, HTML) zf.writestr(toc_name, ncx)