def sanitize_comments_html(html): from calibre.ebooks.markdown import Markdown text = html2text(html) md = Markdown(safe_mode="remove") cleansed = re.sub("\n+", "", md.convert(text)) return cleansed
def convert_calibre_md_to_comic_md(self): ''' Maps the entries in the calibre metadata to comictagger metadata ''' from calibre.utils.html2text import html2text from calibre.utils.date import UNDEFINED_DATE from calibre.utils.localization import lang_as_iso639_1 if self.calibre_md_in_comic_format: return self.calibre_md_in_comic_format = GenericMetadata() mi = self.calibre_metadata # shorten some functions role = partial(set_role, credits=self.calibre_md_in_comic_format.credits) update_field = partial(update_comic_field, target=self.calibre_md_in_comic_format) # update the fields of comic metadata update_field("title", mi.title) role("Writer", mi.authors) update_field("series", mi.series) update_field("issue", mi.series_index) update_field("tags", mi.tags) update_field("publisher", mi.publisher) update_field("criticalRating", mi.rating) # need to check for None if mi.comments: update_field("comments", html2text(mi.comments)) if mi.language: update_field("language", lang_as_iso639_1(mi.language)) if mi.pubdate != UNDEFINED_DATE: update_field("year", mi.pubdate.year) update_field("month", mi.pubdate.month) update_field("day", mi.pubdate.day) # custom columns field = partial(self.db.field_for, book_id=self.book_id) # artists role("Penciller", field(prefs['penciller_column'])) role("Inker", field(prefs['inker_column'])) role("Colorist", field(prefs['colorist_column'])) role("Letterer", field(prefs['letterer_column'])) role("CoverArtist", field(prefs['cover_artist_column'])) role("Editor", field(prefs['editor_column'])) # others update_field("storyArc", field(prefs['storyarc_column'])) update_field("characters", field(prefs['characters_column'])) update_field("teams", field(prefs['teams_column'])) update_field("locations", field(prefs['locations_column'])) update_field("volume", field(prefs['volume_column'])) update_field("genre", field(prefs['genre_column'])) update_field("issueCount", field(prefs['count_column'])) update_field("pageCount", field(prefs['pages_column'])) update_field("webLink", get_link(field(prefs['comicvine_column']))) update_field("manga", field(prefs['manga_column']))
def sanitize_comments_html(html): from calibre.ebooks.markdown import Markdown import bleach text = html2text(html) md = Markdown() html = md.convert(text) cleansed = re.sub(u'\n+', u'', bleach.clean(html)) return cleansed
def chapter_head(self, match): from calibre.utils.html2text import html2text chap = match.group('chap') title = match.group('title') if not title: self.html_preprocess_sections = self.html_preprocess_sections + 1 self.log.debug("marked " + unicode_type(self.html_preprocess_sections) + " chapters. - " + unicode_type(chap)) return '<h2>'+chap+'</h2>\n' else: delete_whitespace = re.compile('^\\s*(?P<c>.*?)\\s*$') delete_quotes = re.compile('\'\"') txt_chap = delete_quotes.sub('', delete_whitespace.sub('\\g<c>', html2text(chap))) txt_title = delete_quotes.sub('', delete_whitespace.sub('\\g<c>', html2text(title))) self.html_preprocess_sections = self.html_preprocess_sections + 1 self.log.debug("marked " + unicode_type(self.html_preprocess_sections) + " chapters & titles. - " + unicode_type(chap) + ", " + unicode_type(title)) return '<h2 title="'+txt_chap+', '+txt_title+'">'+chap+'</h2>\n<h3 class="sigilNotInTOC">'+title+'</h3>\n'
def chapter_head(self, match): from calibre.utils.html2text import html2text chap = match.group('chap') title = match.group('title') if not title: self.html_preprocess_sections = self.html_preprocess_sections + 1 self.log.debug("marked " + unicode(self.html_preprocess_sections) + " chapters. - " + unicode(chap)) return '<h2>'+chap+'</h2>\n' else: delete_whitespace = re.compile('^\s*(?P<c>.*?)\s*$') delete_quotes = re.compile('\'\"') txt_chap = delete_quotes.sub('', delete_whitespace.sub('\g<c>', html2text(chap))) txt_title = delete_quotes.sub('', delete_whitespace.sub('\g<c>', html2text(title))) self.html_preprocess_sections = self.html_preprocess_sections + 1 self.log.debug("marked " + unicode(self.html_preprocess_sections) + " chapters & titles. - " + unicode(chap) + ", " + unicode(title)) return '<h2 title="'+txt_chap+', '+txt_title+'">'+chap+'</h2>\n<h3 class="sigilNotInTOC">'+title+'</h3>\n'
def convert_calibre_md_to_comic_md(self): ''' Maps the entries in the calibre metadata to comictagger metadata ''' from calibre.utils.html2text import html2text from calibre.utils.date import UNDEFINED_DATE from calibre.utils.localization import lang_as_iso639_1 if self.calibre_md_in_comic_format: return self.calibre_md_in_comic_format = GenericMetadata() mi = self.calibre_metadata # shorten some functions role = partial(set_role, credits=self.calibre_md_in_comic_format.credits) update_field = partial(update_comic_field, target=self.calibre_md_in_comic_format) # update the fields of comic metadata update_field("title", mi.title) role("Writer", mi.authors) update_field("series", mi.series) update_field("issue", mi.series_index) update_field("tags", mi.tags) update_field("publisher", mi.publisher) update_field("criticalRating", mi.rating) # need to check for None if mi.comments: update_field("comments", html2text(mi.comments)) if mi.language: update_field("language", lang_as_iso639_1(mi.language)) if mi.pubdate != UNDEFINED_DATE: update_field("year", mi.pubdate.year) update_field("month", mi.pubdate.month) update_field("day", mi.pubdate.day) # custom columns field = partial(self.db.field_for, book_id=self.book_id) # artists role("Penciller", field(prefs['penciller_column'])) role("Inker", field(prefs['inker_column'])) role("Colorist", field(prefs['colorist_column'])) role("Letterer", field(prefs['letterer_column'])) role("CoverArtist", field(prefs['cover_artist_column'])) role("Editor", field(prefs['editor_column'])) # others update_field("storyArc", field(prefs['storyarc_column'])) update_field("characters", field(prefs['characters_column'])) update_field("teams", field(prefs['teams_column'])) update_field("locations", field(prefs['locations_column'])) update_field("volume", field(prefs['volume_column'])) update_field("genre", field(prefs['genre_column']))
def markup_user_break(self, replacement_break): ''' Takes string a user supplies and wraps it in markup that will be centered with appropriate margins. <hr> and <img> tags are allowed. If the user specifies a style with width attributes in the <hr> tag then the appropriate margins are applied to wrapping divs. This is because many ebook devices don't support margin:auto All other html is converted to text. ''' hr_open = '<div id="scenebreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em; page-break-before:avoid">' if re.findall('(<|>)', replacement_break): if re.match('^<hr', replacement_break): if replacement_break.find('width') != -1: try: width = int( re.sub('.*?width(:|=)(?P<wnum>\\d+).*', '\\g<wnum>', replacement_break)) except: scene_break = hr_open + '<hr style="height: 3px; background:#505050" /></div>' self.log.warn('Invalid replacement scene break' ' expression, using default') else: replacement_break = re.sub( '(?i)(width=\\d+\\%?|width:\\s*\\d+(\\%|px|pt|em)?;?)', '', replacement_break) divpercent = (100 - width) // 2 hr_open = re.sub('45', unicode_type(divpercent), hr_open) scene_break = hr_open + replacement_break + '</div>' else: scene_break = hr_open + '<hr style="height: 3px; background:#505050" /></div>' elif re.match('^<img', replacement_break): scene_break = self.scene_break_open + replacement_break + '</p>' else: from calibre.utils.html2text import html2text replacement_break = html2text(replacement_break) replacement_break = re.sub('\\s', ' ', replacement_break) scene_break = self.scene_break_open + replacement_break + '</p>' else: replacement_break = re.sub('\\s', ' ', replacement_break) scene_break = self.scene_break_open + replacement_break + '</p>' return scene_break
def markup_user_break(self, replacement_break): ''' Takes string a user supplies and wraps it in markup that will be centered with appropriate margins. <hr> and <img> tags are allowed. If the user specifies a style with width attributes in the <hr> tag then the appropriate margins are applied to wrapping divs. This is because many ebook devices don't support margin:auto All other html is converted to text. ''' hr_open = '<div id="scenebreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em; page-break-before:avoid">' if re.findall('(<|>)', replacement_break): if re.match('^<hr', replacement_break): if replacement_break.find('width') != -1: try: width = int(re.sub('.*?width(:|=)(?P<wnum>\\d+).*', '\\g<wnum>', replacement_break)) except: scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>' self.log.warn('Invalid replacement scene break' ' expression, using default') else: replacement_break = re.sub('(?i)(width=\\d+\\%?|width:\\s*\\d+(\\%|px|pt|em)?;?)', '', replacement_break) divpercent = (100 - width) / 2 hr_open = re.sub('45', unicode_type(divpercent), hr_open) scene_break = hr_open+replacement_break+'</div>' else: scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>' elif re.match('^<img', replacement_break): scene_break = self.scene_break_open+replacement_break+'</p>' else: from calibre.utils.html2text import html2text replacement_break = html2text(replacement_break) replacement_break = re.sub('\\s', ' ', replacement_break) scene_break = self.scene_break_open+replacement_break+'</p>' else: replacement_break = re.sub('\\s', ' ', replacement_break) scene_break = self.scene_break_open+replacement_break+'</p>' return scene_break
def run(self, path_to_output, opts, db, notification=DummyReporter()): from calibre.library import current_library_name from calibre.utils.date import isoformat from calibre.utils.html2text import html2text from calibre.utils.logging import default_log as log from lxml import etree from calibre.ebooks.metadata import authors_to_string self.fmt = path_to_output.rpartition('.')[2] self.notification = notification current_library = current_library_name() if getattr(opts, 'library_path', None): current_library = os.path.basename(opts.library_path) if opts.verbose: opts_dict = vars(opts) log("%s('%s'): Generating %s" % (self.name, current_library, self.fmt.upper())) if opts.connected_device['is_device_connected']: log(" connected_device: %s" % opts.connected_device['name']) if opts_dict['search_text']: log(" --search='%s'" % opts_dict['search_text']) if opts_dict['ids']: log(" Book count: %d" % len(opts_dict['ids'])) if opts_dict['search_text']: log(" (--search ignored when a subset of the database is specified)") if opts_dict['fields']: if opts_dict['fields'] == 'all': log(" Fields: %s" % ', '.join(FIELDS[1:])) else: log(" Fields: %s" % opts_dict['fields']) # If a list of ids are provided, don't use search_text if opts.ids: opts.search_text = None data = self.search_sort_db(db, opts) if not len(data): log.error("\nNo matching database entries for search criteria '%s'" % opts.search_text) # raise SystemExit(1) # Get the requested output fields as a list fields = self.get_output_fields(db, opts) # If connected device, add 'On Device' values to data if opts.connected_device['is_device_connected'] and 'ondevice' in fields: for entry in data: entry['ondevice'] = db.catalog_plugin_on_device_temp_mapping[entry['id']]['ondevice'] fm = {x: db.field_metadata.get(x, {}) for x in fields} if self.fmt == 'csv': outfile = codecs.open(path_to_output, 'w', 'utf8') # Write a UTF-8 BOM outfile.write('\xef\xbb\xbf') # Output the field headers outfile.write(u'%s\n' % u','.join(fields)) # Output the entry fields for entry in data: outstr = [] for field in fields: if field.startswith('#'): item = db.get_field(entry['id'], field, index_is_id=True) if isinstance(item, (list, tuple)): if fm.get(field, {}).get('display', {}).get('is_names', False): item = ' & '.join(item) else: item = ', '.join(item) elif field == 'library_name': item = current_library elif field == 'title_sort': item = entry['sort'] else: item = entry[field] if item is None: outstr.append('""') continue elif field == 'formats': fmt_list = [] for format in item: fmt_list.append(format.rpartition('.')[2].lower()) item = ', '.join(fmt_list) elif field == 'authors': item = authors_to_string(item) elif field == 'tags': item = ', '.join(item) elif field == 'isbn': # Could be 9, 10 or 13 digits, with hyphens, possibly ending in 'X' item = u'%s' % re.sub(r'[^\dX-]', '', item) elif fm.get(field, {}).get('datatype') == 'datetime': item = isoformat(item, as_utc=False) elif field == 'comments': item = item.replace(u'\r\n', u' ') item = item.replace(u'\n', u' ') elif fm.get(field, {}).get('datatype', None) == 'rating' and item: item = u'%.2g' % (item / 2.0) # Convert HTML to markdown text if type(item) is unicode: opening_tag = re.search('<(\w+)(\x20|>)', item) if opening_tag: closing_tag = re.search('<\/%s>$' % opening_tag.group(1), item) if closing_tag: item = html2text(item) outstr.append(u'"%s"' % unicode(item).replace('"', '""')) outfile.write(u','.join(outstr) + u'\n') outfile.close() elif self.fmt == 'xml': from lxml.builder import E root = E.calibredb() for r in data: record = E.record() root.append(record) for field in fields: if field.startswith('#'): val = db.get_field(r['id'], field, index_is_id=True) if not isinstance(val, (str, unicode)): val = unicode(val) item = getattr(E, field.replace('#', '_'))(val) record.append(item) for field in ('id', 'uuid', 'publisher', 'rating', 'size', 'isbn', 'ondevice', 'identifiers'): if field in fields: val = r[field] if not val: continue if not isinstance(val, (str, unicode)): if (fm.get(field, {}).get('datatype', None) == 'rating' and val): val = u'%.2g' % (val / 2.0) val = unicode(val) item = getattr(E, field)(val) record.append(item) if 'title' in fields: title = E.title(r['title'], sort=r['sort']) record.append(title) if 'authors' in fields: aus = E.authors(sort=r['author_sort']) for au in r['authors']: aus.append(E.author(au)) record.append(aus) for field in ('timestamp', 'pubdate'): if field in fields: record.append(getattr(E, field)(isoformat(r[field], as_utc=False))) if 'tags' in fields and r['tags']: tags = E.tags() for tag in r['tags']: tags.append(E.tag(tag)) record.append(tags) if 'comments' in fields and r['comments']: record.append(E.comments(r['comments'])) if 'series' in fields and r['series']: record.append(E.series(r['series'], index=str(r['series_index']))) if 'cover' in fields and r['cover']: record.append(E.cover(r['cover'].replace(os.sep, '/'))) if 'formats' in fields and r['formats']: fmt = E.formats() for f in r['formats']: fmt.append(E.format(f.replace(os.sep, '/'))) record.append(fmt) if 'library_name' in fields: record.append(E.library_name(current_library)) with open(path_to_output, 'w') as f: f.write(etree.tostring(root, encoding='utf-8', xml_declaration=True, pretty_print=True))
def send_by_mail(self, to, fmts, delete_from_library, subject='', send_ids=None, do_auto_convert=True, specific_format=None): ids = [self.library_view.model().id(r) for r in self.library_view.selectionModel().selectedRows()] if send_ids is None else send_ids if not ids or len(ids) == 0: return files, _auto_ids = self.library_view.model().get_preferred_formats_from_ids(ids, fmts, set_metadata=True, specific_format=specific_format, exclude_auto=do_auto_convert, use_plugboard=plugboard_email_value, plugboard_formats=plugboard_email_formats) if do_auto_convert: nids = list(set(ids).difference(_auto_ids)) ids = [i for i in ids if i in nids] else: _auto_ids = [] full_metadata = self.library_view.model().metadata_for(ids, get_cover=False) bad, remove_ids, jobnames = [], [], [] texts, subjects, attachments, attachment_names = [], [], [], [] for f, mi, id in zip(files, full_metadata, ids): t = mi.title if not t: t = _('Unknown') if f is None: bad.append(t) else: remove_ids.append(id) jobnames.append(t) attachments.append(f) if not subject: subjects.append(_('E-book:')+ ' '+t) else: components = get_components(subject, mi, id) if not components: components = [mi.title] subjects.append(os.path.join(*components)) a = authors_to_string(mi.authors if mi.authors else [_('Unknown')]) texts.append(_('Attached, you will find the e-book') + '\n\n' + t + '\n\t' + _('by') + ' ' + a + '\n\n' + _('in the %s format.') % os.path.splitext(f)[1][1:].upper()) if mi.comments and gprefs['add_comments_to_email']: from calibre.utils.html2text import html2text texts[-1] += '\n\n' + _('About this book:') + '\n\n' + textwrap.fill(html2text(mi.comments)) prefix = ascii_filename(t+' - '+a) if not isinstance(prefix, unicode): prefix = prefix.decode(preferred_encoding, 'replace') attachment_names.append(prefix + os.path.splitext(f)[1]) remove = remove_ids if delete_from_library else [] to_s = list(repeat(to, len(attachments))) if attachments: send_mails(jobnames, Dispatcher(partial(self.email_sent, remove=remove)), attachments, to_s, subjects, texts, attachment_names, self.job_manager) self.status_bar.show_message(_('Sending email to')+' '+to, 3000) auto = [] if _auto_ids != []: for id in _auto_ids: if specific_format is None: dbfmts = self.library_view.model().db.formats(id, index_is_id=True) formats = [f.lower() for f in (dbfmts.split(',') if dbfmts else [])] if list(set(formats).intersection(available_input_formats())) != [] and list(set(fmts).intersection(available_output_formats())) != []: auto.append(id) else: bad.append(self.library_view.model().db.title(id, index_is_id=True)) else: if specific_format in list(set(fmts).intersection(set(available_output_formats()))): auto.append(id) else: bad.append(self.library_view.model().db.title(id, index_is_id=True)) if auto != []: format = specific_format if specific_format in list(set(fmts).intersection(set(available_output_formats()))) else None if not format: for fmt in fmts: if fmt in list(set(fmts).intersection(set(available_output_formats()))): format = fmt break if format is None: bad += auto else: autos = [self.library_view.model().db.title(id, index_is_id=True) for id in auto] if self.auto_convert_question( _('Auto convert the following books to %s before sending via ' 'email?') % format.upper(), autos): self.iactions['Convert Books'].auto_convert_mail(to, fmts, delete_from_library, auto, format, subject) if bad: bad = '\n'.join('%s'%(i,) for i in bad) d = warning_dialog(self, _('No suitable formats'), _('Could not email the following books ' 'as no suitable formats were found:'), bad) d.exec_()
def sanitize_comments_html(html): text = html2text(html) md = Markdown(safe_mode='remove') cleansed = re.sub('\n+', '', md.convert(text)) return cleansed
def sanitize_comments_html(html): from calibre.ebooks.markdown import Markdown text = html2text(html) md = Markdown() html = md.convert(text) return html
def sanitize_comments_html(html): text = html2text(html) md = markdown.Markdown(safe_mode=True) cleansed = re.sub('\n+', '', md.convert(text)) cleansed = cleansed.replace(markdown.HTML_REMOVED_TEXT, '') return cleansed
def sanitize_comments_html(html): from calibre.ebooks.markdown import Markdown text = html2text(html) md = Markdown(safe_mode='remove') cleansed = re.sub('\n+', '', md.convert(text)) return cleansed
def _set_comments(title_info, mi, ctx): if not mi.is_null('comments'): from calibre.utils.html2text import html2text ctx.clear_meta_tags(title_info, 'annotation') title = ctx.get_or_create(title_info, 'annotation') ctx.text2fb2(title, html2text(mi.comments))
def create_bibtex_entry(entry, fields, mode, template_citation, bibtexdict, db, citation_bibtex=True, calibre_files=True): #Bibtex doesn't like UTF-8 but keep unicode until writing #Define starting chain or if book valid strict and not book return a Fail string bibtex_entry = [] if mode != "misc" and check_entry_book_valid(entry) : bibtex_entry.append(u'@book{') elif mode != "book" : bibtex_entry.append(u'@misc{') else : #case strict book return '' if citation_bibtex : # Citation tag bibtex_entry.append(make_bibtex_citation(entry, template_citation, bibtexdict)) bibtex_entry = [u' '.join(bibtex_entry)] for field in fields: if field.startswith('#'): item = db.get_field(entry['id'],field,index_is_id=True) if isinstance(item, (bool, float, int)): item = repr(item) elif field == 'title_sort': item = entry['sort'] elif field == 'library_name': item = library_name else: item = entry[field] #check if the field should be included (none or empty) if item is None: continue try: if len(item) == 0 : continue except TypeError: pass if field == 'authors' : bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item)) elif field == 'id' : bibtex_entry.append(u'calibreid = "%s"' % int(item)) elif field == 'rating' : bibtex_entry.append(u'rating = "%s"' % int(item)) elif field == 'size' : bibtex_entry.append(u'%s = "%s octets"' % (field, int(item))) elif field == 'tags' : #A list to flatten bibtex_entry.append(u'tags = "%s"' % bibtexdict.utf8ToBibtex(u', '.join(item))) elif field == 'comments' : #\n removal item = item.replace(u'\r\n',u' ') item = item.replace(u'\n',u' ') # unmatched brace removal (users should use \leftbrace or \rightbrace for single braces) item = bibtexdict.stripUnmatchedSyntax(item, u'{', u'}') #html to text try: item = html2text(item) except: log.warn("Failed to convert comments to text") bibtex_entry.append(u'note = "%s"' % bibtexdict.utf8ToBibtex(item)) elif field == 'isbn' : # Could be 9, 10 or 13 digits bibtex_entry.append(u'isbn = "%s"' % format_isbn(item)) elif field == 'formats' : #Add file path if format is selected formats = [format.rpartition('.')[2].lower() for format in item] bibtex_entry.append(u'formats = "%s"' % u', '.join(formats)) if calibre_files: files = [u':%s:%s' % (format, format.rpartition('.')[2].upper())\ for format in item] bibtex_entry.append(u'file = "%s"' % u', '.join(files)) elif field == 'series_index' : bibtex_entry.append(u'volume = "%s"' % int(item)) elif field == 'timestamp' : bibtex_entry.append(u'timestamp = "%s"' % isoformat(item).partition('T')[0]) elif field == 'pubdate' : bibtex_entry.append(u'year = "%s"' % item.year) bibtex_entry.append(u'month = "%s"' % bibtexdict.utf8ToBibtex(strftime("%b", item))) elif field.startswith('#') and isinstance(item, basestring): bibtex_entry.append(u'custom_%s = "%s"' % (field[1:], bibtexdict.utf8ToBibtex(item))) elif isinstance(item, basestring): # elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice', # 'author_sort', 'series', 'title_sort'] : bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item))) bibtex_entry = u',\n '.join(bibtex_entry) bibtex_entry += u' }\n\n' return bibtex_entry
def fb2_header(self): from calibre.ebooks.oeb.base import OPF metadata = {} metadata['title'] = self.oeb_book.metadata.title[0].value metadata['appname'] = __appname__ metadata['version'] = __version__ metadata['date'] = '%i.%i.%i' % ( datetime.now().day, datetime.now().month, datetime.now().year) if self.oeb_book.metadata.language: lc = lang_as_iso639_1(self.oeb_book.metadata.language[0].value) if not lc: lc = self.oeb_book.metadata.language[0].value metadata['lang'] = lc or 'en' else: metadata['lang'] = u'en' metadata['id'] = None metadata['cover'] = self.get_cover() metadata['genre'] = self.opts.fb2_genre metadata['author'] = '' for auth in self.oeb_book.metadata.creator: author_first = '' author_middle = '' author_last = '' author_parts = auth.value.split(' ') if len(author_parts) == 1: author_last = author_parts[0] elif len(author_parts) == 2: author_first = author_parts[0] author_last = author_parts[1] else: author_first = author_parts[0] author_middle = ' '.join(author_parts[1:-1]) author_last = author_parts[-1] metadata['author'] += '<author>' metadata[ 'author'] += '<first-name>%s</first-name>' % prepare_string_for_xml( author_first) if author_middle: metadata[ 'author'] += '<middle-name>%s</middle-name>' % prepare_string_for_xml( author_middle) metadata[ 'author'] += '<last-name>%s</last-name>' % prepare_string_for_xml( author_last) metadata['author'] += '</author>' if not metadata['author']: metadata[ 'author'] = '<author><first-name></first-name><last-name></last-name></author>' metadata['keywords'] = '' tags = list(map(unicode_type, self.oeb_book.metadata.subject)) if tags: tags = ', '.join(prepare_string_for_xml(x) for x in tags) metadata['keywords'] = '<keywords>%s</keywords>' % tags metadata['sequence'] = '' if self.oeb_book.metadata.series: index = '1' if self.oeb_book.metadata.series_index: index = self.oeb_book.metadata.series_index[0] metadata['sequence'] = '<sequence name="%s" number="%s"/>' % ( prepare_string_for_xml( '%s' % self.oeb_book.metadata.series[0]), index) year = publisher = isbn = '' identifiers = self.oeb_book.metadata['identifier'] for x in identifiers: if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode_type( x).startswith('urn:uuid:'): metadata['id'] = unicode_type(x).split(':')[-1] break if metadata['id'] is None: self.log.warn('No UUID identifier found') metadata['id'] = unicode_type(uuid.uuid4()) try: date = self.oeb_book.metadata['date'][0] except IndexError: pass else: year = '<year>%s</year>' % prepare_string_for_xml( date.value.partition('-')[0]) try: publisher = self.oeb_book.metadata['publisher'][0] except IndexError: pass else: publisher = '<publisher>%s</publisher>' % prepare_string_for_xml( publisher.value) for x in identifiers: if x.get(OPF('scheme'), None).lower() == 'isbn': isbn = '<isbn>%s</isbn>' % prepare_string_for_xml(x.value) metadata['year'], metadata['isbn'], metadata[ 'publisher'] = year, isbn, publisher for key, value in metadata.items(): if key not in ('author', 'cover', 'sequence', 'keywords', 'year', 'publisher', 'isbn'): metadata[key] = prepare_string_for_xml(value) try: comments = self.oeb_book.metadata['description'][0] except Exception: metadata['comments'] = '' else: from calibre.utils.html2text import html2text metadata['comments'] = '<annotation><p>{}</p></annotation>'.format( prepare_string_for_xml(html2text(comments.value).strip())) # Keep the indentation level of the description the same as the body. header = textwrap.dedent('''\ <FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink"> <description> <title-info> <genre>%(genre)s</genre> %(author)s <book-title>%(title)s</book-title> %(cover)s <lang>%(lang)s</lang> %(keywords)s %(sequence)s %(comments)s </title-info> <document-info> %(author)s <program-used>%(appname)s %(version)s</program-used> <date>%(date)s</date> <id>%(id)s</id> <version>1.0</version> </document-info> <publish-info> %(publisher)s %(year)s %(isbn)s </publish-info> </description>''') % metadata # Remove empty lines. return '\n'.join(filter(unicode_type.strip, header.splitlines()))
def create_bibtex_entry(entry, fields, mode, template_citation, bibtexdict, db, citation_bibtex=True, calibre_files=True): #Bibtex doesn't like UTF-8 but keep unicode until writing #Define starting chain or if book valid strict and not book return a Fail string bibtex_entry = [] if mode != "misc" and check_entry_book_valid(entry): bibtex_entry.append(u'@book{') elif mode != "book": bibtex_entry.append(u'@misc{') else: #case strict book return '' if citation_bibtex: # Citation tag bibtex_entry.append( make_bibtex_citation(entry, template_citation, bibtexdict)) bibtex_entry = [u' '.join(bibtex_entry)] for field in fields: if field.startswith('#'): item = db.get_field(entry['id'], field, index_is_id=True) if isinstance(item, (bool, float, int)): item = repr(item) elif field == 'title_sort': item = entry['sort'] elif field == 'library_name': item = library_name else: item = entry[field] #check if the field should be included (none or empty) if item is None: continue try: if len(item) == 0: continue except TypeError: pass if field == 'authors': bibtex_entry.append(u'author = "%s"' % bibtexdict.bibtex_author_format(item)) elif field == 'id': bibtex_entry.append(u'calibreid = "%s"' % int(item)) elif field == 'rating': bibtex_entry.append(u'rating = "%s"' % int(item)) elif field == 'size': bibtex_entry.append(u'%s = "%s octets"' % (field, int(item))) elif field == 'tags': #A list to flatten bibtex_entry.append( u'tags = "%s"' % bibtexdict.utf8ToBibtex(u', '.join(item))) elif field == 'comments': #\n removal item = item.replace(u'\r\n', u' ') item = item.replace(u'\n', u' ') # unmatched brace removal (users should use \leftbrace or \rightbrace for single braces) item = bibtexdict.stripUnmatchedSyntax(item, u'{', u'}') #html to text try: item = html2text(item) except: log.warn("Failed to convert comments to text") bibtex_entry.append(u'note = "%s"' % bibtexdict.utf8ToBibtex(item)) elif field == 'isbn': # Could be 9, 10 or 13 digits bibtex_entry.append(u'isbn = "%s"' % format_isbn(item)) elif field == 'formats': #Add file path if format is selected formats = [ format.rpartition('.')[2].lower() for format in item ] bibtex_entry.append(u'formats = "%s"' % u', '.join(formats)) if calibre_files: files = [u':%s:%s' % (format, format.rpartition('.')[2].upper())\ for format in item] bibtex_entry.append(u'file = "%s"' % u', '.join(files)) elif field == 'series_index': bibtex_entry.append(u'volume = "%s"' % int(item)) elif field == 'timestamp': bibtex_entry.append(u'timestamp = "%s"' % isoformat(item).partition('T')[0]) elif field == 'pubdate': bibtex_entry.append(u'year = "%s"' % item.year) bibtex_entry.append( u'month = "%s"' % bibtexdict.utf8ToBibtex(strftime("%b", item))) elif field.startswith('#') and isinstance(item, basestring): bibtex_entry.append( u'custom_%s = "%s"' % (field[1:], bibtexdict.utf8ToBibtex(item))) elif isinstance(item, basestring): # elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice', # 'author_sort', 'series', 'title_sort'] : bibtex_entry.append(u'%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item))) bibtex_entry = u',\n '.join(bibtex_entry) bibtex_entry += u' }\n\n' return bibtex_entry
def fb2_header(self): from calibre.ebooks.oeb.base import OPF metadata = {} metadata['title'] = self.oeb_book.metadata.title[0].value metadata['appname'] = __appname__ metadata['version'] = __version__ metadata['date'] = '%i.%i.%i' % (datetime.now().day, datetime.now().month, datetime.now().year) if self.oeb_book.metadata.language: lc = lang_as_iso639_1(self.oeb_book.metadata.language[0].value) if not lc: lc = self.oeb_book.metadata.language[0].value metadata['lang'] = lc or 'en' else: metadata['lang'] = u'en' metadata['id'] = None metadata['cover'] = self.get_cover() metadata['genre'] = self.opts.fb2_genre metadata['author'] = u'' for auth in self.oeb_book.metadata.creator: author_first = u'' author_middle = u'' author_last = u'' author_parts = auth.value.split(' ') if len(author_parts) == 1: author_last = author_parts[0] elif len(author_parts) == 2: author_first = author_parts[0] author_last = author_parts[1] else: author_first = author_parts[0] author_middle = ' '.join(author_parts[1:-1]) author_last = author_parts[-1] metadata['author'] += '<author>' metadata['author'] += '<first-name>%s</first-name>' % prepare_string_for_xml(author_first) if author_middle: metadata['author'] += '<middle-name>%s</middle-name>' % prepare_string_for_xml(author_middle) metadata['author'] += '<last-name>%s</last-name>' % prepare_string_for_xml(author_last) metadata['author'] += '</author>' if not metadata['author']: metadata['author'] = u'<author><first-name></first-name><last-name></last-name></author>' metadata['keywords'] = u'' tags = list(map(unicode_type, self.oeb_book.metadata.subject)) if tags: tags = ', '.join(prepare_string_for_xml(x) for x in tags) metadata['keywords'] = '<keywords>%s</keywords>'%tags metadata['sequence'] = u'' if self.oeb_book.metadata.series: index = '1' if self.oeb_book.metadata.series_index: index = self.oeb_book.metadata.series_index[0] metadata['sequence'] = u'<sequence name="%s" number="%s" />' % (prepare_string_for_xml(u'%s' % self.oeb_book.metadata.series[0]), index) year = publisher = isbn = u'' identifiers = self.oeb_book.metadata['identifier'] for x in identifiers: if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode_type(x).startswith('urn:uuid:'): metadata['id'] = unicode_type(x).split(':')[-1] break if metadata['id'] is None: self.log.warn('No UUID identifier found') metadata['id'] = str(uuid.uuid4()) try: date = self.oeb_book.metadata['date'][0] except IndexError: pass else: year = '<year>%s</year>' % prepare_string_for_xml(date.value.partition('-')[0]) try: publisher = self.oeb_book.metadata['publisher'][0] except IndexError: pass else: publisher = '<publisher>%s</publisher>' % prepare_string_for_xml(publisher.value) for x in identifiers: if x.get(OPF('scheme'), None).lower() == 'isbn': isbn = '<isbn>%s</isbn>' % prepare_string_for_xml(x.value) metadata['year'], metadata['isbn'], metadata['publisher'] = year, isbn, publisher for key, value in metadata.items(): if key not in ('author', 'cover', 'sequence', 'keywords', 'year', 'publisher', 'isbn'): metadata[key] = prepare_string_for_xml(value) try: comments = self.oeb_book.metadata['description'][0] except Exception: metadata['comments'] = '' else: from calibre.utils.html2text import html2text metadata['comments'] = '<annotation>{}</annotation>'.format(prepare_string_for_xml(html2text(comments.value.strip()))) return textwrap.dedent(u''' <FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:xlink="http://www.w3.org/1999/xlink"> <description> <title-info> <genre>%(genre)s</genre> %(author)s <book-title>%(title)s</book-title> %(cover)s <lang>%(lang)s</lang> %(keywords)s %(sequence)s %(comments)s </title-info> <document-info> %(author)s <program-used>%(appname)s %(version)s</program-used> <date>%(date)s</date> <id>%(id)s</id> <version>1.0</version> </document-info> <publish-info> %(publisher)s %(year)s %(isbn)s </publish-info> </description>\n''') % metadata
def send_by_mail(self, to, fmts, delete_from_library, subject='', send_ids=None, do_auto_convert=True, specific_format=None): ids = [ self.library_view.model().id(r) for r in self.library_view.selectionModel().selectedRows() ] if send_ids is None else send_ids if not ids or len(ids) == 0: return files, _auto_ids = self.library_view.model( ).get_preferred_formats_from_ids( ids, fmts, set_metadata=True, specific_format=specific_format, exclude_auto=do_auto_convert, use_plugboard=plugboard_email_value, plugboard_formats=plugboard_email_formats) if do_auto_convert: nids = list(set(ids).difference(_auto_ids)) ids = [i for i in ids if i in nids] else: _auto_ids = [] full_metadata = self.library_view.model().metadata_for(ids, get_cover=False) bad, remove_ids, jobnames = [], [], [] texts, subjects, attachments, attachment_names = [], [], [], [] for f, mi, id in zip(files, full_metadata, ids): t = mi.title if not t: t = _('Unknown') if f is None: bad.append(t) else: remove_ids.append(id) jobnames.append(t) attachments.append(f) if not subject: subjects.append(_('E-book:') + ' ' + t) else: components = get_components(subject, mi, id) if not components: components = [mi.title] subjects.append(os.path.join(*components)) a = authors_to_string( mi.authors if mi.authors else [_('Unknown')]) texts.append( _('Attached, you will find the e-book') + '\n\n' + t + '\n\t' + _('by') + ' ' + a + '\n\n' + _('in the %s format.') % os.path.splitext(f)[1][1:].upper()) if mi.comments and gprefs['add_comments_to_email']: from calibre.utils.html2text import html2text texts[-1] += '\n\n' + _( 'About this book:') + '\n\n' + textwrap.fill( html2text(mi.comments)) prefix = ascii_filename(t + ' - ' + a) if not isinstance(prefix, unicode_type): prefix = prefix.decode(preferred_encoding, 'replace') attachment_names.append(prefix + os.path.splitext(f)[1]) remove = remove_ids if delete_from_library else [] to_s = list(repeat(to, len(attachments))) if attachments: send_mails(jobnames, Dispatcher(partial(self.email_sent, remove=remove)), attachments, to_s, subjects, texts, attachment_names, self.job_manager) self.status_bar.show_message( _('Sending email to') + ' ' + to, 3000) auto = [] if _auto_ids != []: for id in _auto_ids: if specific_format is None: dbfmts = self.library_view.model().db.formats( id, index_is_id=True) formats = [ f.lower() for f in (dbfmts.split(',') if dbfmts else []) ] if set(formats).intersection(available_input_formats( )) and set(fmts).intersection(available_output_formats()): auto.append(id) else: bad.append(self.library_view.model().db.title( id, index_is_id=True)) else: if specific_format in list( set(fmts).intersection( set(available_output_formats()))): auto.append(id) else: bad.append(self.library_view.model().db.title( id, index_is_id=True)) if auto != []: format = specific_format if specific_format in list( set(fmts).intersection(set( available_output_formats()))) else None if not format: for fmt in fmts: if fmt in list( set(fmts).intersection( set(available_output_formats()))): format = fmt break if format is None: bad += auto else: autos = [ self.library_view.model().db.title(id, index_is_id=True) for id in auto ] if self.auto_convert_question( _('Auto convert the following books to %s before sending via ' 'email?') % format.upper(), autos): self.iactions['Convert Books'].auto_convert_mail( to, fmts, delete_from_library, auto, format, subject) if bad: bad = '\n'.join('%s' % (i, ) for i in bad) d = warning_dialog( self, _('No suitable formats'), _('Could not email the following books ' 'as no suitable formats were found:'), bad) d.exec_()
def identify(log, abort, # {{{ title=None, authors=None, identifiers={}, timeout=30, allowed_plugins=None): if title == _('Unknown'): title = None if authors == [_('Unknown')]: authors = None start_time = time.time() plugins = [p for p in metadata_plugins(['identify']) if p.is_configured() and (allowed_plugins is None or p.name in allowed_plugins)] kwargs = { 'title': title, 'authors': authors, 'identifiers': identifiers, 'timeout': timeout, } log('Running identify query with parameters:') log(kwargs) log('Using plugins:', ', '.join(['%s %s' % (p.name, p.version) for p in plugins])) log('The log from individual plugins is below') workers = [Worker(p, kwargs, abort) for p in plugins] for w in workers: w.start() first_result_at = None results = {} for p in plugins: results[p] = [] logs = dict([(w.plugin, w.buf) for w in workers]) def get_results(): found = False for w in workers: try: result = w.rq.get_nowait() except Empty: pass else: results[w.plugin].append(result) found = True return found wait_time = msprefs['wait_after_first_identify_result'] while True: time.sleep(0.2) if get_results() and first_result_at is None: first_result_at = time.time() if not is_worker_alive(workers): break if (first_result_at is not None and time.time() - first_result_at > wait_time): log.warn('Not waiting any longer for more results. Still running' ' sources:') for worker in workers: if worker.is_alive(): log.debug('\t' + worker.name) abort.set() break while not abort.is_set() and get_results(): pass sort_kwargs = dict(kwargs) for k in list(sort_kwargs.iterkeys()): if k not in ('title', 'authors', 'identifiers'): sort_kwargs.pop(k) longest, lp = -1, '' for plugin, presults in results.iteritems(): presults.sort(key=plugin.identify_results_keygen(**sort_kwargs)) # Throw away lower priority results from the same source that have exactly the same # title and authors as a higher priority result filter_results = set() filtered_results = [] for r in presults: key = (r.title, tuple(r.authors)) if key not in filter_results: filtered_results.append(r) filter_results.add(key) results[plugin] = presults = filtered_results plog = logs[plugin].getvalue().strip() log('\n'+'*'*30, plugin.name, '%s' % (plugin.version,), '*'*30) log('Found %d results'%len(presults)) time_spent = getattr(plugin, 'dl_time_spent', None) if time_spent is None: log('Downloading was aborted') longest, lp = -1, plugin.name else: log('Downloading from', plugin.name, 'took', time_spent) if time_spent > longest: longest, lp = time_spent, plugin.name for r in presults: log('\n\n---') try: log(unicode(r)) except TypeError: log(repr(r)) if plog: log(plog) log('\n'+'*'*80) dummy = Metadata(_('Unknown')) for i, result in enumerate(presults): for f in plugin.prefs['ignore_fields']: if ':' not in f: setattr(result, f, getattr(dummy, f)) if f == 'series': result.series_index = dummy.series_index result.relevance_in_source = i result.has_cached_cover_url = ( plugin.cached_cover_url_is_reliable and plugin.get_cached_cover_url(result.identifiers) is not None) result.identify_plugin = plugin if msprefs['txt_comments']: if plugin.has_html_comments and result.comments: result.comments = html2text(result.comments) log('The identify phase took %.2f seconds'%(time.time() - start_time)) log('The longest time (%f) was taken by:'%longest, lp) log('Merging results from different sources and finding earliest ', 'publication dates from the worldcat.org service') start_time = time.time() results = merge_identify_results(results, log) log('We have %d merged results, merging took: %.2f seconds' % (len(results), time.time() - start_time)) tm_rules = msprefs['tag_map_rules'] if tm_rules: from calibre.ebooks.metadata.tag_mapper import map_tags max_tags = msprefs['max_tags'] for r in results: if tm_rules: r.tags = map_tags(r.tags, tm_rules) r.tags = r.tags[:max_tags] if getattr(r.pubdate, 'year', 2000) <= UNDEFINED_DATE.year: r.pubdate = None if msprefs['swap_author_names']: for r in results: def swap_to_ln_fn(a): if ',' in a: return a parts = a.split(None) if len(parts) <= 1: return a surname = parts[-1] return '%s, %s' % (surname, ' '.join(parts[:-1])) r.authors = [swap_to_ln_fn(a) for a in r.authors] return results
def run(self, path_to_output, opts, db, notification=DummyReporter()): from calibre.library import current_library_name from calibre.utils.date import isoformat from calibre.utils.html2text import html2text from calibre.utils.logging import default_log as log from lxml import etree from calibre.ebooks.metadata import authors_to_string self.fmt = path_to_output.rpartition('.')[2] self.notification = notification current_library = current_library_name() if getattr(opts, 'library_path', None): current_library = os.path.basename(opts.library_path) if opts.verbose: opts_dict = vars(opts) log("%s('%s'): Generating %s" % (self.name, current_library, self.fmt.upper())) if opts.connected_device['is_device_connected']: log(" connected_device: %s" % opts.connected_device['name']) if opts_dict['search_text']: log(" --search='%s'" % opts_dict['search_text']) if opts_dict['ids']: log(" Book count: %d" % len(opts_dict['ids'])) if opts_dict['search_text']: log(" (--search ignored when a subset of the database is specified)" ) if opts_dict['fields']: if opts_dict['fields'] == 'all': log(" Fields: %s" % ', '.join(FIELDS[1:])) else: log(" Fields: %s" % opts_dict['fields']) # If a list of ids are provided, don't use search_text if opts.ids: opts.search_text = None data = self.search_sort_db(db, opts) if not len(data): log.error( "\nNo matching database entries for search criteria '%s'" % opts.search_text) # raise SystemExit(1) # Get the requested output fields as a list fields = self.get_output_fields(db, opts) # If connected device, add 'On Device' values to data if opts.connected_device[ 'is_device_connected'] and 'ondevice' in fields: for entry in data: entry['ondevice'] = db.catalog_plugin_on_device_temp_mapping[ entry['id']]['ondevice'] fm = {x: db.field_metadata.get(x, {}) for x in fields} if self.fmt == 'csv': outfile = codecs.open(path_to_output, 'w', 'utf8') # Write a UTF-8 BOM outfile.write('\ufeff') # Output the field headers outfile.write('%s\n' % ','.join(fields)) # Output the entry fields for entry in data: outstr = [] for field in fields: if field.startswith('#'): item = db.get_field(entry['id'], field, index_is_id=True) if isinstance(item, (list, tuple)): if fm.get(field, {}).get('display', {}).get('is_names', False): item = ' & '.join(item) else: item = ', '.join(item) elif field == 'library_name': item = current_library elif field == 'title_sort': item = entry['sort'] else: item = entry[field] if item is None: outstr.append('""') continue elif field == 'formats': fmt_list = [] for format in item: fmt_list.append(format.rpartition('.')[2].lower()) item = ', '.join(fmt_list) elif field == 'authors': item = authors_to_string(item) elif field == 'tags': item = ', '.join(item) elif field == 'isbn': # Could be 9, 10 or 13 digits, with hyphens, possibly ending in 'X' item = '%s' % re.sub(r'[^\dX-]', '', item) elif fm.get(field, {}).get('datatype') == 'datetime': item = isoformat(item, as_utc=False) elif field == 'comments': item = item.replace('\r\n', ' ') item = item.replace('\n', ' ') elif fm.get(field, {}).get('datatype', None) == 'rating' and item: item = '%.2g' % (item / 2) # Convert HTML to markdown text if isinstance(item, str): opening_tag = re.search(r'<(\w+)( |>)', item) if opening_tag: closing_tag = re.search( r'<\/%s>$' % opening_tag.group(1), item) if closing_tag: item = html2text(item) outstr.append('"%s"' % str(item).replace('"', '""')) outfile.write(','.join(outstr) + '\n') outfile.close() elif self.fmt == 'xml': from lxml.builder import E if getattr(opts, 'catalog_title', None): root = E.calibredb(title=opts.catalog_title) else: root = E.calibredb() for r in data: record = E.record() root.append(record) for field in fields: if field.startswith('#'): val = db.get_field(r['id'], field, index_is_id=True) if not isinstance(val, str): val = str(val) item = getattr(E, field.replace('#', '_'))(val) record.append(item) for field in ('id', 'uuid', 'publisher', 'rating', 'size', 'isbn', 'ondevice', 'identifiers'): if field in fields: val = r[field] if not val: continue if not isinstance(val, (bytes, str)): if (fm.get(field, {}).get('datatype', None) == 'rating' and val): val = '%.2g' % (val / 2) val = str(val) item = getattr(E, field)(val) record.append(item) if 'title' in fields: title = E.title(r['title'], sort=r['sort']) record.append(title) if 'authors' in fields: aus = E.authors(sort=r['author_sort']) for au in r['authors']: aus.append(E.author(au)) record.append(aus) for field in ('timestamp', 'pubdate'): if field in fields: record.append( getattr(E, field)(isoformat(r[field], as_utc=False))) if 'tags' in fields and r['tags']: tags = E.tags() for tag in r['tags']: tags.append(E.tag(tag)) record.append(tags) if 'comments' in fields and r['comments']: record.append(E.comments(r['comments'])) if 'series' in fields and r['series']: record.append( E.series(r['series'], index=str(r['series_index']))) if 'languages' in fields and r['languages']: record.append(E.languages(r['languages'])) if 'cover' in fields and r['cover']: record.append(E.cover(r['cover'].replace(os.sep, '/'))) if 'formats' in fields and r['formats']: fmt = E.formats() for f in r['formats']: fmt.append(E.format(f.replace(os.sep, '/'))) record.append(fmt) if 'library_name' in fields: record.append(E.library_name(current_library)) with open(path_to_output, 'wb') as f: f.write( etree.tostring(root, encoding='utf-8', xml_declaration=True, pretty_print=True))
def run(self, path_to_output, opts, db, notification=DummyReporter()): from calibre.library import current_library_name from calibre.utils.date import isoformat from calibre.utils.html2text import html2text from calibre.utils.logging import default_log as log from lxml import etree self.fmt = path_to_output.rpartition(".")[2] self.notification = notification current_library = current_library_name() if getattr(opts, "library_path", None): current_library = os.path.basename(opts.library_path) if opts.verbose: opts_dict = vars(opts) log("%s('%s'): Generating %s" % (self.name, current_library, self.fmt.upper())) if opts.connected_device["is_device_connected"]: log(" connected_device: %s" % opts.connected_device["name"]) if opts_dict["search_text"]: log(" --search='%s'" % opts_dict["search_text"]) if opts_dict["ids"]: log(" Book count: %d" % len(opts_dict["ids"])) if opts_dict["search_text"]: log(" (--search ignored when a subset of the database is specified)") if opts_dict["fields"]: if opts_dict["fields"] == "all": log(" Fields: %s" % ", ".join(FIELDS[1:])) else: log(" Fields: %s" % opts_dict["fields"]) # If a list of ids are provided, don't use search_text if opts.ids: opts.search_text = None data = self.search_sort_db(db, opts) if not len(data): log.error("\nNo matching database entries for search criteria '%s'" % opts.search_text) # raise SystemExit(1) # Get the requested output fields as a list fields = self.get_output_fields(db, opts) # If connected device, add 'On Device' values to data if opts.connected_device["is_device_connected"] and "ondevice" in fields: for entry in data: entry["ondevice"] = db.catalog_plugin_on_device_temp_mapping[entry["id"]]["ondevice"] fm = {x: db.field_metadata.get(x, {}) for x in fields} if self.fmt == "csv": outfile = codecs.open(path_to_output, "w", "utf8") # Write a UTF-8 BOM outfile.write("\xef\xbb\xbf") # Output the field headers outfile.write(u"%s\n" % u",".join(fields)) # Output the entry fields for entry in data: outstr = [] for field in fields: if field.startswith("#"): item = db.get_field(entry["id"], field, index_is_id=True) elif field == "library_name": item = current_library elif field == "title_sort": item = entry["sort"] else: item = entry[field] if item is None: outstr.append('""') continue elif field == "formats": fmt_list = [] for format in item: fmt_list.append(format.rpartition(".")[2].lower()) item = ", ".join(fmt_list) elif field in ["authors", "tags"]: item = ", ".join(item) elif field == "isbn": # Could be 9, 10 or 13 digits, with hyphens, possibly ending in 'X' item = u"%s" % re.sub(r"[^\dX-]", "", item) elif field in ["pubdate", "timestamp"]: item = isoformat(item, as_utc=False) elif field == "comments": item = item.replace(u"\r\n", u" ") item = item.replace(u"\n", u" ") elif fm.get(field, {}).get("datatype", None) == "rating" and item: item = u"%.2g" % (item / 2.0) # Convert HTML to markdown text if type(item) is unicode: opening_tag = re.search("<(\w+)(\x20|>)", item) if opening_tag: closing_tag = re.search("<\/%s>$" % opening_tag.group(1), item) if closing_tag: item = html2text(item) outstr.append(u'"%s"' % unicode(item).replace('"', '""')) outfile.write(u",".join(outstr) + u"\n") outfile.close() elif self.fmt == "xml": from lxml.builder import E root = E.calibredb() for r in data: record = E.record() root.append(record) for field in fields: if field.startswith("#"): val = db.get_field(r["id"], field, index_is_id=True) if not isinstance(val, (str, unicode)): val = unicode(val) item = getattr(E, field.replace("#", "_"))(val) record.append(item) for field in ("id", "uuid", "publisher", "rating", "size", "isbn", "ondevice", "identifiers"): if field in fields: val = r[field] if not val: continue if not isinstance(val, (str, unicode)): if fm.get(field, {}).get("datatype", None) == "rating" and val: val = u"%.2g" % (val / 2.0) val = unicode(val) item = getattr(E, field)(val) record.append(item) if "title" in fields: title = E.title(r["title"], sort=r["sort"]) record.append(title) if "authors" in fields: aus = E.authors(sort=r["author_sort"]) for au in r["authors"]: aus.append(E.author(au)) record.append(aus) for field in ("timestamp", "pubdate"): if field in fields: record.append(getattr(E, field)(isoformat(r[field], as_utc=False))) if "tags" in fields and r["tags"]: tags = E.tags() for tag in r["tags"]: tags.append(E.tag(tag)) record.append(tags) if "comments" in fields and r["comments"]: record.append(E.comments(r["comments"])) if "series" in fields and r["series"]: record.append(E.series(r["series"], index=str(r["series_index"]))) if "cover" in fields and r["cover"]: record.append(E.cover(r["cover"].replace(os.sep, "/"))) if "formats" in fields and r["formats"]: fmt = E.formats() for f in r["formats"]: fmt.append(E.format(f.replace(os.sep, "/"))) record.append(fmt) if "library_name" in fields: record.append(E.library_name(current_library)) with open(path_to_output, "w") as f: f.write(etree.tostring(root, encoding="utf-8", xml_declaration=True, pretty_print=True))