def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(None, [_('Unknown')]) stream.seek(0) pheader = PdbHeaderReader(stream) # Only Dropbook produced 132 byte record0 files are supported if len(pheader.section_data(0)) == 132: hr = HeaderRecord(pheader.section_data(0)) if hr.compression in (2, 10) and hr.has_metadata == 1: try: mdata = pheader.section_data(hr.metadata_offset) mdata = mdata.split('\x00') mi.title = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[0]) mi.authors = [re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[1])] mi.publisher = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[3]) mi.isbn = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[4]) except: pass if extract_cover: mi.cover_data = get_cover(pheader, hr) if not mi.title: mi.title = pheader.title if pheader.title else _('Unknown') return mi
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(None, [_('Unknown')]) stream.seek(0) pheader = PdbHeaderReader(stream) # Only Dropbook produced 132 byte record0 files are supported if len(pheader.section_data(0)) == 132: hr = HeaderRecord(pheader.section_data(0)) if hr.compression in (2, 10) and hr.has_metadata == 1: try: mdata = pheader.section_data(hr.metadata_offset) mdata = mdata.decode('cp1252', 'replace').split('\x00') mi.title = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[0]) mi.authors = [re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[1])] mi.publisher = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[3]) mi.isbn = re.sub(r'[^a-zA-Z0-9 \._=\+\-!\?,\'\"]', '', mdata[4]) except Exception: pass if extract_cover: mi.cover_data = get_cover(pheader, hr) if not mi.title: mi.title = pheader.title if pheader.title else _('Unknown') return mi
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(_('Unknown'), [_('Unknown')]) snbFile = SNBFile() try: if not hasattr(stream, 'write'): snbFile.Parse(io.BytesIO(stream), True) else: stream.seek(0) snbFile.Parse(stream, True) meta = snbFile.GetFileStream('snbf/book.snbf') if meta is not None: meta = etree.fromstring(meta) mi.title = meta.find('.//head/name').text mi.authors = [meta.find('.//head/author').text] mi.language = meta.find('.//head/language').text.lower().replace('_', '-') mi.publisher = meta.find('.//head/publisher').text if extract_cover: cover = meta.find('.//head/cover') if cover is not None and cover.text is not None: root, ext = os.path.splitext(cover.text) if ext == '.jpeg': ext = '.jpg' mi.cover_data = (ext[-3:], snbFile.GetFileStream('snbc/images/' + cover.text)) except Exception: import traceback traceback.print_exc() return mi
def _metadata_from_formats(formats, force_read_metadata=False, pattern=None): mi = MetaInformation(None, None) formats.sort(cmp=lambda x,y: cmp(METADATA_PRIORITIES[path_to_ext(x)], METADATA_PRIORITIES[path_to_ext(y)])) extensions = list(map(path_to_ext, formats)) if 'opf' in extensions: opf = formats[extensions.index('opf')] mi2 = opf_metadata(opf) if mi2 is not None and mi2.title: return mi2 for path, ext in zip(formats, extensions): with lopen(path, 'rb') as stream: try: newmi = get_metadata(stream, stream_type=ext, use_libprs_metadata=True, force_read_metadata=force_read_metadata, pattern=pattern) mi.smart_update(newmi) except: continue if getattr(mi, 'application_id', None) is not None: return mi if not mi.title: mi.title = _('Unknown') if not mi.authors: mi.authors = [_('Unknown')] return mi
def get_metadata(stream, extract_cover=True): zin = zipfile.ZipFile(stream, 'r') odfs = odfmetaparser() parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_namespaces, 1) parser.setContentHandler(odfs) content = zin.read('meta.xml') parser.parse(StringIO(content)) data = odfs.seenfields mi = MetaInformation(None, []) if data.has_key('title'): mi.title = data['title'] if data.get('initial-creator', '').strip(): mi.authors = string_to_authors(data['initial-creator']) elif data.has_key('creator'): mi.authors = string_to_authors(data['creator']) if data.has_key('description'): mi.comments = data['description'] if data.has_key('language'): mi.language = data['language'] if data.get('keywords', ''): mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()] opfmeta = False # we need this later for the cover opfnocover = False if data.get('opf.metadata','') == 'true': # custom metadata contains OPF information opfmeta = True if data.get('opf.titlesort', ''): mi.title_sort = data['opf.titlesort'] if data.get('opf.authors', ''): mi.authors = string_to_authors(data['opf.authors']) if data.get('opf.authorsort', ''): mi.author_sort = data['opf.authorsort'] if data.get('opf.isbn', ''): isbn = check_isbn(data['opf.isbn']) if isbn is not None: mi.isbn = isbn if data.get('opf.publisher', ''): mi.publisher = data['opf.publisher'] if data.get('opf.pubdate', ''): mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True) if data.get('opf.series', ''): mi.series = data['opf.series'] if data.get('opf.seriesindex', ''): try: mi.series_index = float(data['opf.seriesindex']) except ValueError: mi.series_index = 1.0 if data.get('opf.language', ''): cl = canonicalize_lang(data['opf.language']) if cl: mi.languages = [cl] opfnocover = data.get('opf.nocover', 'false') == 'true' if not opfnocover: try: read_cover(stream, zin, mi, opfmeta, extract_cover) except: pass # Do not let an error reading the cover prevent reading other data return mi
def _metadata_from_formats(formats, force_read_metadata=False, pattern=None): mi = MetaInformation(None, None) formats.sort(key=lambda x: METADATA_PRIORITIES[path_to_ext(x)]) extensions = list(map(path_to_ext, formats)) if 'opf' in extensions: opf = formats[extensions.index('opf')] mi2 = opf_metadata(opf) if mi2 is not None and mi2.title: return mi2 for path, ext in zip(formats, extensions): with lopen(path, 'rb') as stream: try: newmi = get_metadata(stream, stream_type=ext, use_libprs_metadata=True, force_read_metadata=force_read_metadata, pattern=pattern) mi.smart_update(newmi) except: continue if getattr(mi, 'application_id', None) is not None: return mi if not mi.title: mi.title = _('Unknown') if not mi.authors: mi.authors = [_('Unknown')] return mi
def get_metadata(stream, extract_cover=True): zin = zipfile.ZipFile(stream, 'r') odfs = odfmetaparser() parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_namespaces, 1) parser.setContentHandler(odfs) content = zin.read('meta.xml') parser.parse(StringIO(content)) data = odfs.seenfields mi = MetaInformation(None, []) if 'title' in data: mi.title = data['title'] if data.get('initial-creator', '').strip(): mi.authors = string_to_authors(data['initial-creator']) elif 'creator' in data: mi.authors = string_to_authors(data['creator']) if 'description' in data: mi.comments = data['description'] if 'language' in data: mi.language = data['language'] if data.get('keywords', ''): mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()] opfmeta = False # we need this later for the cover opfnocover = False if data.get('opf.metadata','') == 'true': # custom metadata contains OPF information opfmeta = True if data.get('opf.titlesort', ''): mi.title_sort = data['opf.titlesort'] if data.get('opf.authors', ''): mi.authors = string_to_authors(data['opf.authors']) if data.get('opf.authorsort', ''): mi.author_sort = data['opf.authorsort'] if data.get('opf.isbn', ''): isbn = check_isbn(data['opf.isbn']) if isbn is not None: mi.isbn = isbn if data.get('opf.publisher', ''): mi.publisher = data['opf.publisher'] if data.get('opf.pubdate', ''): mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True) if data.get('opf.series', ''): mi.series = data['opf.series'] if data.get('opf.seriesindex', ''): try: mi.series_index = float(data['opf.seriesindex']) except ValueError: mi.series_index = 1.0 if data.get('opf.language', ''): cl = canonicalize_lang(data['opf.language']) if cl: mi.languages = [cl] opfnocover = data.get('opf.nocover', 'false') == 'true' if not opfnocover: try: read_cover(stream, zin, mi, opfmeta, extract_cover) except: pass # Do not let an error reading the cover prevent reading other data return mi
def get_metadata(stream, extract_cover=True): ''' Return metadata as a L{MetaInfo} object ''' name = getattr(stream, 'name', '').rpartition('.')[0] if name: name = os.path.basename(name) mi = MetaInformation(name or _('Unknown'), [_('Unknown')]) stream.seek(0) mdata = u'' for x in range(0, 4): line = stream.readline().decode('utf-8', 'replace') if line == '': break else: mdata += line mdata = mdata[:100] mo = re.search('(?u)^[ ]*(?P<title>.+)[ ]*(\n{3}|(\r\n){3}|\r{3})[ ]*(?P<author>.+)[ ]*(\n|\r\n|\r)$', mdata) if mo is not None: mi.title = mo.group('title') mi.authors = mo.group('author').split(',') return mi
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(_('Unknown'), [_('Unknown')]) snbFile = SNBFile() try: if not hasattr(stream, 'write'): snbFile.Parse(StringIO(stream), True) else: stream.seek(0) snbFile.Parse(stream, True) meta = snbFile.GetFileStream('snbf/book.snbf') if meta is not None: meta = etree.fromstring(meta) mi.title = meta.find('.//head/name').text mi.authors = [meta.find('.//head/author').text] mi.language = meta.find('.//head/language').text.lower().replace('_', '-') mi.publisher = meta.find('.//head/publisher').text if extract_cover: cover = meta.find('.//head/cover') if cover is not None and cover.text is not None: root, ext = os.path.splitext(cover.text) if ext == '.jpeg': ext = '.jpg' mi.cover_data = (ext[-3:], snbFile.GetFileStream('snbc/images/' + cover.text)) except Exception: import traceback traceback.print_exc() return mi
def get_metadata(stream): from calibre.ebooks.metadata import MetaInformation from calibre.ptempfile import TemporaryDirectory from calibre.ebooks.mobi.reader.headers import MetadataHeader from calibre.ebooks.mobi.reader.mobi6 import MobiReader from calibre.utils.img import save_cover_data_to from calibre import CurrentDir stream.seek(0) try: raw = stream.read(3) except Exception: raw = b'' stream.seek(0) if raw == b'TPZ': from calibre.ebooks.metadata.topaz import get_metadata return get_metadata(stream) from calibre.utils.logging import Log log = Log() try: mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')]) except: mi = MetaInformation(_('Unknown'), [_('Unknown')]) mh = MetadataHeader(stream, log) if mh.title and mh.title != _('Unknown'): mi.title = mh.title if mh.exth is not None: if mh.exth.mi is not None: mi = mh.exth.mi else: size = 1024**3 if hasattr(stream, 'seek') and hasattr(stream, 'tell'): pos = stream.tell() stream.seek(0, 2) size = stream.tell() stream.seek(pos) if size < 4 * 1024 * 1024: with TemporaryDirectory('_mobi_meta_reader') as tdir: with CurrentDir(tdir): mr = MobiReader(stream, log) parse_cache = {} mr.extract_content(tdir, parse_cache) if mr.embedded_mi is not None: mi = mr.embedded_mi if hasattr(mh.exth, 'cover_offset'): cover_index = mh.first_image_index + mh.exth.cover_offset data = mh.section_data(int(cover_index)) else: try: data = mh.section_data(mh.first_image_index) except Exception: data = b'' if data and what(None, data) in {'jpg', 'jpeg', 'gif', 'png', 'bmp', 'webp'}: try: mi.cover_data = ('jpg', save_cover_data_to(data)) except Exception: log.exception('Failed to read MOBI cover') return mi
def get_metadata(stream, extract_cover=True): ''' Return metadata as a L{MetaInfo} object ''' name = getattr(stream, 'name', '').rpartition('.')[0] if name: name = os.path.basename(name) mi = MetaInformation(name or _('Unknown'), [_('Unknown')]) stream.seek(0) mdata = '' for x in range(0, 4): line = stream.readline().decode('utf-8', 'replace') if not line: break else: mdata += line mdata = mdata[:1024] mo = re.search( '(?u)^[ ]*(?P<title>.+)[ ]*(\n{3}|(\r\n){3}|\r{3})[ ]*(?P<author>.+)[ ]*(\n|\r\n|\r)$', mdata) if mo is not None: mi.title = mo.group('title') mi.authors = mo.group('author').split(',') return mi
def get_metadata(stream): """ Return metadata as a L{MetaInfo} object """ title = 'Unknown' mi = MetaInformation(title, ['Unknown']) stream.seek(0) try: if stream.read(10) not in MAGIC: print >>sys.stderr, u'Couldn\'t read IMP header from file' return mi def cString(skip=0): result = '' while 1: data = stream.read(1) if data == '\x00': if not skip: return result skip -= 1 result, data = '', '' result += data stream.read(38) # skip past some uninteresting headers _, category, title, author = cString(), cString(), cString(1), cString(2) if title: mi.title = title if author: mi.authors = string_to_authors(author) mi.author = author if category: mi.category = category except Exception as err: msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode(err)) print >>sys.stderr, msg.encode('utf8') return mi
def get_metadata(stream): from calibre.ebooks.metadata import MetaInformation from calibre.ptempfile import TemporaryDirectory from calibre.ebooks.mobi.reader.headers import MetadataHeader from calibre.ebooks.mobi.reader.mobi6 import MobiReader from calibre.utils.magick.draw import save_cover_data_to from calibre import CurrentDir stream.seek(0) try: raw = stream.read(3) except: raw = '' stream.seek(0) if raw == b'TPZ': from calibre.ebooks.metadata.topaz import get_metadata return get_metadata(stream) from calibre.utils.logging import Log log = Log() try: mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')]) except: mi = MetaInformation(_('Unknown'), [_('Unknown')]) mh = MetadataHeader(stream, log) if mh.title and mh.title != _('Unknown'): mi.title = mh.title if mh.exth is not None: if mh.exth.mi is not None: mi = mh.exth.mi else: size = 1024**3 if hasattr(stream, 'seek') and hasattr(stream, 'tell'): pos = stream.tell() stream.seek(0, 2) size = stream.tell() stream.seek(pos) if size < 4*1024*1024: with TemporaryDirectory('_mobi_meta_reader') as tdir: with CurrentDir(tdir): mr = MobiReader(stream, log) parse_cache = {} mr.extract_content(tdir, parse_cache) if mr.embedded_mi is not None: mi = mr.embedded_mi if hasattr(mh.exth, 'cover_offset'): cover_index = mh.first_image_index + mh.exth.cover_offset data = mh.section_data(int(cover_index)) else: try: data = mh.section_data(mh.first_image_index) except: data = '' if data and what(None, data) in {'jpg', 'jpeg', 'gif', 'png', 'bmp', 'webp'}: try: mi.cover_data = ('jpg', save_cover_data_to(data, 'cover.jpg', return_data=True)) except Exception: log.exception('Failed to read MOBI cover') return mi
def get_social_metadata(title, authors, publisher, isbn, username=None, password=None): from calibre.ebooks.metadata import MetaInformation mi = MetaInformation(title, authors) if isbn: br = get_browser() try: login(br, username, password) raw = br.open_novisit('http://www.librarything.com/isbn/' +isbn).read() except: return mi if '/wiki/index.php/HelpThing:Verify' in raw: raise Exception('LibraryThing is blocking calibre.') if not raw: return mi raw = raw.decode('utf-8', 'replace') raw = strip_encoding_declarations(raw) root = html.fromstring(raw) h1 = root.xpath('//div[@class="headsummary"]/h1') if h1 and not mi.title: mi.title = html.tostring(h1[0], method='text', encoding=unicode) h2 = root.xpath('//div[@class="headsummary"]/h2/a') if h2 and not mi.authors: mi.authors = [html.tostring(x, method='text', encoding=unicode) for x in h2] h3 = root.xpath('//div[@class="headsummary"]/h3/a') if h3: match = None for h in h3: series = html.tostring(h, method='text', encoding=unicode) match = re.search(r'(.+) \((.+)\)', series) if match is not None: break if match is not None: mi.series = match.group(1).strip() match = re.search(r'[0-9.]+', match.group(2)) si = 1.0 if match is not None: si = float(match.group()) mi.series_index = si #tags = root.xpath('//div[@class="tags"]/span[@class="tag"]/a') #if tags: # mi.tags = [html.tostring(x, method='text', encoding=unicode) for x # in tags] span = root.xpath( '//table[@class="wsltable"]/tr[@class="wslcontent"]/td[4]//span') if span: raw = html.tostring(span[0], method='text', encoding=unicode) match = re.search(r'([0-9.]+)', raw) if match is not None: rating = float(match.group()) if rating > 0 and rating <= 5: mi.rating = rating return mi
def get_metadata(stream, extract_cover=True): ''' Return metadata as a L{MetaInfo} object ''' mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) pheader = PdbHeaderReader(stream) section_data = None for i in range(1, pheader.num_sections): raw_data = pheader.section_data(i) section_header = SectionHeader(raw_data) if section_header.type == DATATYPE_METADATA: section_data = raw_data[8:] break if not section_data: return mi default_encoding = 'latin-1' record_count, = struct.unpack('>H', section_data[0:2]) adv = 0 title = None author = None pubdate = 0 for i in range(record_count): try: type, length = struct.unpack_from('>HH', section_data, 2 + adv) except struct.error: break # CharSet if type == 1: val, = struct.unpack('>H', section_data[6+adv:8+adv]) default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1') # Author elif type == 4: author = section_data[6+adv+(2*length)] # Title elif type == 5: title = section_data[6+adv+(2*length)] # Publication Date elif type == 6: pubdate, = struct.unpack('>I', section_data[6+adv:6+adv+4]) adv += 2*length if title: mi.title = title.replace('\0', '').decode(default_encoding, 'replace') if author: author = author.replace('\0', '').decode(default_encoding, 'replace') mi.author = author.split(',') mi.pubdate = datetime.fromtimestamp(pubdate) return mi
def get_metadata(stream, extract_cover=True): ''' Return metadata as a L{MetaInfo} object ''' mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) pheader = PdbHeaderReader(stream) section_data = None for i in range(1, pheader.num_sections): raw_data = pheader.section_data(i) section_header = SectionHeader(raw_data) if section_header.type == DATATYPE_METADATA: section_data = raw_data[8:] break if not section_data: return mi default_encoding = 'latin-1' record_count, = struct.unpack('>H', section_data[0:2]) adv = 0 title = None author = None pubdate = 0 for i in xrange(record_count): try: type, length = struct.unpack_from('>HH', section_data, 2 + adv) except struct.error: break # CharSet if type == 1: val, = struct.unpack('>H', section_data[6+adv:8+adv]) default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1') # Author elif type == 4: author = section_data[6+adv+(2*length)] # Title elif type == 5: title = section_data[6+adv+(2*length)] # Publication Date elif type == 6: pubdate, = struct.unpack('>I', section_data[6+adv:6+adv+4]) adv += 2*length if title: mi.title = title.replace('\0', '').decode(default_encoding, 'replace') if author: author = author.replace('\0', '').decode(default_encoding, 'replace') mi.author = author.split(',') mi.pubdate = datetime.fromtimestamp(pubdate) return mi
def do_add_empty(db, title, authors, isbn, tags, series, series_index): from calibre.ebooks.metadata import MetaInformation mi = MetaInformation(None) if title is not None: mi.title = title if authors: mi.authors = authors if isbn: mi.isbn = isbn if tags: mi.tags = tags if series: mi.series, mi.series_index = series, series_index db.import_book(mi, []) write_dirtied(db) send_message()
def add_document(self, document): from calibre.ebooks.metadata import MetaInformation mi = MetaInformation('', [_('Unknown')]) mi.title = document['title'] mi.authors = document['authors'] mi.tags = ["Mendeley"] mendeley_id = {} mendeley_id['mendeley'] = document['mendeley_id'] mi.identifiers = mendeley_id mi.series_index = 1 # needed? self.db.add_books([document['path']], ['pdf'], [mi], False, True) os.remove(document['path'])
def add_document(self,document): from calibre.ebooks.metadata import MetaInformation mi = MetaInformation('', [_('Unknown')]) mi.title = document['title'] mi.authors = document['authors'] mi.tags = ["Mendeley"] mendeley_id = {} mendeley_id['mendeley'] = document['mendeley_id'] mi.identifiers = mendeley_id mi.series_index = 1 # needed? self.db.add_books([document['path']], ['pdf'], [mi], False, True) os.remove(document['path'])
def get_metadata(stream): """ Return basic meta-data about the LRF file in C{stream} as a L{MetaInformation} object. @param stream: A file like object or an instance of L{LRFMetaFile} """ lrf = stream if isinstance(stream, LRFMetaFile) else LRFMetaFile(stream) authors = string_to_authors(lrf.author) mi = MetaInformation(lrf.title.strip(), authors) mi.author = lrf.author.strip() mi.comments = lrf.free_text.strip() mi.category = lrf.category.strip()+', '+lrf.classification.strip() tags = [x.strip() for x in mi.category.split(',') if x.strip()] if tags: mi.tags = tags if mi.category.strip() == ',': mi.category = None mi.publisher = lrf.publisher.strip() mi.cover_data = lrf.get_cover() try: mi.title_sort = lrf.title_reading.strip() if not mi.title_sort: mi.title_sort = None except: pass try: mi.author_sort = lrf.author_reading.strip() if not mi.author_sort: mi.author_sort = None except: pass if not mi.title or 'unknown' in mi.title.lower(): mi.title = None if not mi.authors: mi.authors = None if not mi.author or 'unknown' in mi.author.lower(): mi.author = None if not mi.category or 'unknown' in mi.category.lower(): mi.category = None if not mi.publisher or 'unknown' in mi.publisher.lower() or \ 'some publisher' in mi.publisher.lower(): mi.publisher = None return mi
def get_metadata(stream): """ Return basic meta-data about the LRF file in C{stream} as a L{MetaInformation} object. @param stream: A file like object or an instance of L{LRFMetaFile} """ lrf = stream if isinstance(stream, LRFMetaFile) else LRFMetaFile(stream) authors = string_to_authors(lrf.author) mi = MetaInformation(lrf.title.strip(), authors) mi.author = lrf.author.strip() mi.comments = lrf.free_text.strip() mi.category = lrf.category.strip() + ', ' + lrf.classification.strip() tags = [x.strip() for x in mi.category.split(',') if x.strip()] if tags: mi.tags = tags if mi.category.strip() == ',': mi.category = None mi.publisher = lrf.publisher.strip() mi.cover_data = lrf.get_cover() try: mi.title_sort = lrf.title_reading.strip() if not mi.title_sort: mi.title_sort = None except: pass try: mi.author_sort = lrf.author_reading.strip() if not mi.author_sort: mi.author_sort = None except: pass if not mi.title or 'unknown' in mi.title.lower(): mi.title = None if not mi.authors: mi.authors = None if not mi.author or 'unknown' in mi.author.lower(): mi.author = None if not mi.category or 'unknown' in mi.category.lower(): mi.category = None if not mi.publisher or 'unknown' in mi.publisher.lower() or \ 'some publisher' in mi.publisher.lower(): mi.publisher = None return mi
def get_metadata(stream): """ Return metadata as a L{MetaInfo} object """ title = 'Unknown' mi = MetaInformation(title, ['Unknown']) stream.seek(0) try: if not stream.read(14) == MAGIC: print >> sys.stderr, u'Couldn\'t read RB header from file' return mi stream.read(10) read_i32 = lambda: struct.unpack('<I', stream.read(4))[0] stream.seek(read_i32()) toc_count = read_i32() for i in range(toc_count): stream.read(32) length, offset, flag = read_i32(), read_i32(), read_i32() if flag == 2: break else: print >> sys.stderr, u'Couldn\'t find INFO from RB file' return mi stream.seek(offset) info = stream.read(length).splitlines() for line in info: if '=' not in line: continue key, value = line.split('=') if key.strip() == 'TITLE': mi.title = value.strip() elif key.strip() == 'AUTHOR': mi.author = value mi.authors = string_to_authors(value) except Exception as err: msg = u'Couldn\'t read metadata from rb: %s with error %s' % ( mi.title, unicode(err)) print >> sys.stderr, msg.encode('utf8') raise return mi
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) pml = b'' if stream.name.endswith('.pmlz'): with TemporaryDirectory('_unpmlz') as tdir: zf = ZipFile(stream) zf.extractall(tdir) pmls = glob.glob(os.path.join(tdir, '*.pml')) for p in pmls: with open(p, 'r+b') as p_stream: pml += p_stream.read() if extract_cover: mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], tdir, True) else: pml = stream.read() if extract_cover: mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], os.path.abspath(os.path.dirname(stream.name))) for comment in re.findall(br'(?ms)\\v.*?\\v', pml): m = re.search(br'TITLE="(.*?)"', comment) if m: mi.title = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) m = re.search(br'AUTHOR="(.*?)"', comment) if m: if mi.authors == [_('Unknown')]: mi.authors = [] mi.authors.append(re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))) m = re.search(br'PUBLISHER="(.*?)"', comment) if m: mi.publisher = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) m = re.search(br'COPYRIGHT="(.*?)"', comment) if m: mi.rights = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) m = re.search(br'ISBN="(.*?)"', comment) if m: mi.isbn = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) return mi
def get_metadata(stream, extract_cover=True): """ Return metadata as a L{MetaInfo} object """ mi = MetaInformation(_('Unknown'), [_('Unknown')]) stream.seek(0) pml = '' if stream.name.endswith('.pmlz'): with TemporaryDirectory('_unpmlz') as tdir: zf = ZipFile(stream) zf.extractall(tdir) pmls = glob.glob(os.path.join(tdir, '*.pml')) for p in pmls: with open(p, 'r+b') as p_stream: pml += p_stream.read() if extract_cover: mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], tdir, True) else: pml = stream.read() if extract_cover: mi.cover_data = get_cover(os.path.splitext(os.path.basename(stream.name))[0], os.path.abspath(os.path.dirname(stream.name))) for comment in re.findall(r'(?mus)\\v.*?\\v', pml): m = re.search(r'TITLE="(.*?)"', comment) if m: mi.title = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) m = re.search(r'AUTHOR="(.*?)"', comment) if m: if mi.authors == [_('Unknown')]: mi.authors = [] mi.authors.append(re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))) m = re.search(r'PUBLISHER="(.*?)"', comment) if m: mi.publisher = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) m = re.search(r'COPYRIGHT="(.*?)"', comment) if m: mi.rights = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) m = re.search(r'ISBN="(.*?)"', comment) if m: mi.isbn = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))) return mi
def get_metadata(stream): """ Return metadata as a L{MetaInfo} object """ title = 'Unknown' mi = MetaInformation(title, ['Unknown']) stream.seek(0) try: if not stream.read(14) == MAGIC: print(u'Couldn\'t read RB header from file', file=sys.stderr) return mi stream.read(10) read_i32 = lambda: struct.unpack('<I', stream.read(4))[0] stream.seek(read_i32()) toc_count = read_i32() for i in range(toc_count): stream.read(32) length, offset, flag = read_i32(), read_i32(), read_i32() if flag == 2: break else: print(u'Couldn\'t find INFO from RB file', file=sys.stderr) return mi stream.seek(offset) info = stream.read(length).splitlines() for line in info: if '=' not in line: continue key, value = line.split('=') if key.strip() == 'TITLE': mi.title = value.strip() elif key.strip() == 'AUTHOR': mi.author = value mi.authors = string_to_authors(value) except Exception as err: msg = u'Couldn\'t read metadata from rb: %s with error %s'%(mi.title, unicode(err)) print(msg.encode('utf8'), file=sys.stderr) raise return mi
def do_add_empty(dbctx, title, authors, isbn, tags, series, series_index, cover, identifiers, languages): mi = MetaInformation(None) if title is not None: mi.title = title if authors: mi.authors = authors if identifiers: mi.set_identifiers(identifiers) if isbn: mi.isbn = isbn if tags: mi.tags = tags if series: mi.series, mi.series_index = series, series_index if cover: mi.cover = cover if languages: mi.languages = languages ids, duplicates = dbctx.run('add', 'empty', read_cover(mi)) prints(_('Added book ids: %s') % ','.join(map(str, ids)))
def do_add_empty( dbctx, title, authors, isbn, tags, series, series_index, cover, identifiers, languages ): mi = MetaInformation(None) if title is not None: mi.title = title if authors: mi.authors = authors if identifiers: mi.set_identifiers(identifiers) if isbn: mi.isbn = isbn if tags: mi.tags = tags if series: mi.series, mi.series_index = series, series_index if cover: mi.cover = cover if languages: mi.languages = languages ids, duplicates = dbctx.run('add', 'empty', read_cover(mi)) prints(_('Added book ids: %s') % ','.join(map(str, ids)))
def get_metadata(stream): zin = zipfile.ZipFile(stream, 'r') odfs = odfmetaparser() parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_namespaces, 1) parser.setContentHandler(odfs) content = zin.read('meta.xml') parser.parse(StringIO(content)) data = odfs.seenfields mi = MetaInformation(None, []) if data.has_key('title'): mi.title = data['title'] if data.get('initial-creator', '').strip(): mi.authors = string_to_authors(data['initial-creator']) elif data.has_key('creator'): mi.authors = string_to_authors(data['creator']) if data.has_key('description'): mi.comments = data['description'] if data.has_key('language'): mi.language = data['language'] if data.get('keywords', ''): mi.tags = data['keywords'].split(',') return mi
def add(self, id, opf, cover, name): formats = self.ids.pop(id) if opf.endswith('.error'): mi = MetaInformation('', [_('Unknown')]) self.critical[name] = open(opf, 'rb').read().decode('utf-8', 'replace') else: try: mi = OPF(opf).to_book_metadata() except: import traceback mi = MetaInformation('', [_('Unknown')]) self.critical[name] = traceback.format_exc() formats = self.process_formats(opf, formats) if not mi.title: mi.title = os.path.splitext(name)[0] mi.title = mi.title if isinstance(mi.title, unicode) else \ mi.title.decode(preferred_encoding, 'replace') if mi.application_id == '__calibre_dummy__': mi.application_id = None if self.db is not None: if cover: with open(cover, 'rb') as f: cover = f.read() orig_formats = formats formats = [f for f in formats if not f.lower().endswith('.opf')] if prefs['add_formats_to_existing']: #automerge is on identical_book_list = self.db.find_identical_books(mi) if identical_book_list: # books with same author and nearly same title exist in db self.merged_books.add(mi.title) seen_fmts = set([]) for identical_book in identical_book_list: ib_fmts = self.db.formats(identical_book, index_is_id=True) if ib_fmts: seen_fmts |= set(ib_fmts.split(',')) replace = gprefs['automerge'] == 'overwrite' self.add_formats(identical_book, formats, replace=replace) if gprefs['automerge'] == 'new record': incoming_fmts = \ set([os.path.splitext(path)[-1].replace('.', '').upper() for path in formats]) if incoming_fmts.intersection(seen_fmts): # There was at least one duplicate format # so create a new record and put the # incoming formats into it # We should arguably put only the duplicate # formats, but no real harm is done by having # all formats id_ = self.db.create_book_entry( mi, cover=cover, add_duplicates=True) self.number_of_books_added += 1 self.add_formats(id_, formats) else: # books with same author and nearly same title do not exist in db id_ = self.db.create_book_entry(mi, cover=cover, add_duplicates=True) self.number_of_books_added += 1 self.add_formats(id_, formats) else: #automerge is off id_ = self.db.create_book_entry(mi, cover=cover, add_duplicates=False) if id_ is None: self.duplicates.append((mi, cover, orig_formats)) else: self.add_formats(id_, formats) self.number_of_books_added += 1 else: self.names.append(name) self.paths.append(formats[0]) self.infos.append(mi) return mi.title
def add(self, id, opf, cover, name): formats = self.ids.pop(id) if opf.endswith(".error"): mi = MetaInformation("", [_("Unknown")]) self.critical[name] = open(opf, "rb").read().decode("utf-8", "replace") else: try: mi = OPF(opf).to_book_metadata() except: import traceback mi = MetaInformation("", [_("Unknown")]) self.critical[name] = traceback.format_exc() formats = self.process_formats(opf, formats) if not mi.title: mi.title = os.path.splitext(name)[0] mi.title = mi.title if isinstance(mi.title, unicode) else mi.title.decode(preferred_encoding, "replace") if mi.application_id == "__calibre_dummy__": mi.application_id = None if self.db is not None: if cover: with open(cover, "rb") as f: cover = f.read() orig_formats = formats formats = [f2 for f2 in formats if not f2.lower().endswith(".opf")] if prefs["add_formats_to_existing"]: # automerge is on identical_book_list = self.db.find_identical_books(mi) if identical_book_list: # books with same author and nearly same title exist in db self.merged_books.add((mi.title, " & ".join(mi.authors))) seen_fmts = set([]) for identical_book in identical_book_list: ib_fmts = self.db.formats(identical_book, index_is_id=True) if ib_fmts: seen_fmts |= set(ib_fmts.split(",")) replace = gprefs["automerge"] == "overwrite" self.add_formats(identical_book, formats, replace=replace) if gprefs["automerge"] == "new record": incoming_fmts = set([os.path.splitext(path)[-1].replace(".", "").upper() for path in formats]) if incoming_fmts.intersection(seen_fmts): # There was at least one duplicate format # so create a new record and put the # incoming formats into it # We should arguably put only the duplicate # formats, but no real harm is done by having # all formats id_ = self.db.create_book_entry(mi, cover=cover, add_duplicates=True) self.number_of_books_added += 1 self.add_formats(id_, formats) else: # books with same author and nearly same title do not exist in db id_ = self.db.create_book_entry(mi, cover=cover, add_duplicates=True) self.number_of_books_added += 1 self.add_formats(id_, formats) else: # automerge is off id_ = self.db.create_book_entry(mi, cover=cover, add_duplicates=False) if id_ is None: self.duplicates.append((mi, cover, orig_formats)) else: self.add_formats(id_, formats) self.auto_convert_books.add(id_) self.number_of_books_added += 1 else: self.names.append(name) self.paths.append(formats[0]) self.infos.append(mi) return mi.title
def metadata_from_filename(name, pat=None, fallback_pat=None): if isbytestring(name): name = name.decode(filesystem_encoding, 'replace') name = name.rpartition('.')[0] mi = MetaInformation(None, None) if pat is None: pat = re.compile(prefs.get('filename_pattern')) name = name.replace('_', ' ') match = pat.search(name) if match is None and fallback_pat is not None: match = fallback_pat.search(name) if match is not None: try: mi.title = match.group('title') except IndexError: pass try: au = match.group('author') aus = string_to_authors(au) if aus: mi.authors = aus if prefs['swap_author_names'] and mi.authors: def swap(a): if ',' in a: parts = a.split(',', 1) else: parts = a.split(None, 1) if len(parts) > 1: t = parts[-1] parts = parts[:-1] parts.insert(0, t) return ' '.join(parts) mi.authors = [swap(x) for x in mi.authors] except (IndexError, ValueError): pass try: mi.series = match.group('series') except IndexError: pass try: si = match.group('series_index') mi.series_index = float(si) except (IndexError, ValueError, TypeError): pass try: si = match.group('isbn') mi.isbn = si except (IndexError, ValueError): pass try: publisher = match.group('publisher') mi.publisher = publisher except (IndexError, ValueError): pass try: pubdate = match.group('published') if pubdate: from calibre.utils.date import parse_only_date mi.pubdate = parse_only_date(pubdate) except: pass try: comments = match.group('comments') mi.comments = comments except (IndexError, ValueError): pass if mi.is_null('title'): mi.title = name return mi
def _start_merge(self,book_list): db=self.gui.current_db self.previous = self.gui.library_view.currentIndex() # if any bad, bail. bad_list = filter(lambda x : not x['good'], book_list) if len(bad_list) > 0: d = error_dialog(self.gui, _('Cannot Merge Epubs'), _('%s books failed.')%len(bad_list), det_msg='\n'.join(map(lambda x : x['error'] , bad_list))) d.exec_() else: d = OrderEPUBsDialog(self.gui, _('Order EPUBs to Merge'), prefs, self.qaction.icon(), book_list, ) d.exec_() if d.result() != d.Accepted: return book_list = d.get_books() print("2:%s"%(time.time()-self.t)) self.t = time.time() deftitle = "%s %s" % (book_list[0]['title'],prefs['mergeword']) mi = MetaInformation(deftitle,["Temp Author"]) # if all same series, use series for name. But only if all. serieslist = map(lambda x : x['series'], filter(lambda x : x['series'] != None, book_list)) if len(serieslist) == len(book_list): mi.title = serieslist[0] for sr in serieslist: if mi.title != sr: mi.title = deftitle; break # print("======================= mi.title:\n%s\n========================="%mi.title) mi.authors = list() authorslists = map(lambda x : x['authors'], book_list) for l in authorslists: for a in l: if a not in mi.authors: mi.authors.append(a) #mi.authors = [item for sublist in authorslists for item in sublist] # print("======================= mi.authors:\n%s\n========================="%mi.authors) #mi.author_sort = ' & '.join(map(lambda x : x['author_sort'], book_list)) # print("======================= mi.author_sort:\n%s\n========================="%mi.author_sort) # set publisher if all from same publisher. publishers = set(map(lambda x : x['publisher'], book_list)) if len(publishers) == 1: mi.publisher = publishers.pop() # print("======================= mi.publisher:\n%s\n========================="%mi.publisher) tagslists = map(lambda x : x['tags'], book_list) mi.tags = [item for sublist in tagslists for item in sublist] mi.tags.extend(prefs['mergetags'].split(',')) # print("======================= mergetags:\n%s\n========================="%prefs['mergetags']) # print("======================= m.tags:\n%s\n========================="%mi.tags) languageslists = map(lambda x : x['languages'], book_list) mi.languages = [item for sublist in languageslists for item in sublist] mi.series = '' # ======================= make book comments ========================= if len(mi.authors) > 1: booktitle = lambda x : _("%s by %s") % (x['title'],' & '.join(x['authors'])) else: booktitle = lambda x : x['title'] mi.comments = (_("%s containing:")+"\n\n") % prefs['mergeword'] if prefs['includecomments']: def bookcomments(x): if x['comments']: return '<b>%s</b>\n\n%s'%(booktitle(x),x['comments']) else: return '<b>%s</b>\n'%booktitle(x) mi.comments += ('<div class="mergedbook">' + '<hr></div><div class="mergedbook">'.join([ bookcomments(x) for x in book_list]) + '</div>') else: mi.comments += '\n'.join( [ booktitle(x) for x in book_list ] ) # ======================= make book entry ========================= book_id = db.create_book_entry(mi, add_duplicates=True) # set default cover to same as first book coverdata = db.cover(book_list[0]['calibre_id'],index_is_id=True) if coverdata: db.set_cover(book_id, coverdata) # ======================= custom columns =================== print("3:%s"%(time.time()-self.t)) self.t = time.time() # have to get custom from db for each book. idslist = map(lambda x : x['calibre_id'], book_list) custom_columns = self.gui.library_view.model().custom_columns for col, action in prefs['custom_cols'].iteritems(): #print("col: %s action: %s"%(col,action)) if col not in custom_columns: print("%s not an existing column, skipping."%col) continue coldef = custom_columns[col] #print("coldef:%s"%coldef) if action not in permitted_values[coldef['datatype']]: print("%s not a valid column type for %s, skipping."%(col,action)) continue label = coldef['label'] found = False value = None idx = None if action == 'first': idx = 0 if action == 'last': idx = -1 if action in ['first','last']: value = db.get_custom(idslist[idx], label=label, index_is_id=True) if coldef['datatype'] == 'series' and value != None: # get the number-in-series, too. value = "%s [%s]"%(value, db.get_custom_extra(idslist[idx], label=label, index_is_id=True)) found = True if action in ('add','average','averageall'): value = 0.0 count = 0 for bid in idslist: try: value += db.get_custom(bid, label=label, index_is_id=True) found = True # only count ones with values unless averageall count += 1 except: # if not set, it's None and fails. # only count ones with values unless averageall if action == 'averageall': count += 1 if found and action in ('average','averageall'): value = value / count if coldef['datatype'] == 'int': value += 0.5 # so int rounds instead of truncs. if action == 'and': value = True for bid in idslist: try: value = value and db.get_custom(bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass if action == 'or': value = False for bid in idslist: try: value = value or db.get_custom(bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass if action == 'newest': value = None for bid in idslist: try: ivalue = db.get_custom(bid, label=label, index_is_id=True) if not value or ivalue > value: value = ivalue found = True except: # if not set, it's None and fails. pass if action == 'oldest': value = None for bid in idslist: try: ivalue = db.get_custom(bid, label=label, index_is_id=True) if not value or ivalue < value: value = ivalue found = True except: # if not set, it's None and fails. pass if action == 'union': if not coldef['is_multiple']: action = 'concat' else: value = set() for bid in idslist: try: value = value.union(db.get_custom(bid, label=label, index_is_id=True)) found = True except: # if not set, it's None and fails. pass if action == 'concat': value = "" for bid in idslist: try: value = value + ' ' + db.get_custom(bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass value = value.strip() if found and value != None: db.set_custom(book_id,value,label=label,commit=False) db.commit() print("4:%s"%(time.time()-self.t)) self.t = time.time() self.gui.library_view.model().books_added(1) self.gui.library_view.select_rows([book_id]) print("5:%s"%(time.time()-self.t)) self.t = time.time() confirm('\n'+_('''The book for the new Merged EPUB has been created and default metadata filled in. However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.'''), 'epubmerge_created_now_edit_again', self.gui) self.gui.iactions['Edit Metadata'].edit_metadata(False) print("5:%s"%(time.time()-self.t)) self.t = time.time() self.gui.tags_view.recount() totalsize = sum(map(lambda x : x['epub_size'], book_list)) print("merging %s EPUBs totaling %s"%(len(book_list),gethumanreadable(totalsize))) if len(book_list) > 100 or totalsize > 5*1024*1024: confirm('\n'+_('''You're merging %s EPUBs totaling %s. Calibre will be locked until the merge is finished.''')%(len(book_list),gethumanreadable(totalsize)), 'epubmerge_edited_now_merge_again', self.gui) self.gui.status_bar.show_message(_('Merging %s EPUBs...')%len(book_list), 60000) mi = db.get_metadata(book_id,index_is_id=True) mergedepub = PersistentTemporaryFile(suffix='.epub') epubstomerge = map(lambda x : x['epub'] , book_list) coverjpgpath = None if mi.has_cover: # grab the path to the real image. coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') self.do_merge( mergedepub, epubstomerge, authoropts=mi.authors, titleopt=mi.title, descopt=mi.comments, tags=mi.tags, languages=mi.languages, titlenavpoints=prefs['titlenavpoints'], flattentoc=prefs['flattentoc'], printtimes=True, coverjpgpath=coverjpgpath, keepmetadatafiles=prefs['keepmeta'] ) print("6:%s"%(time.time()-self.t)) print(_("Merge finished, output in:\n%s")%mergedepub.name) self.t = time.time() db.add_format_with_hooks(book_id, 'EPUB', mergedepub, index_is_id=True) print("7:%s"%(time.time()-self.t)) self.t = time.time() self.gui.status_bar.show_message(_('Finished merging %s EPUBs.')%len(book_list), 3000) self.gui.library_view.model().refresh_ids([book_id]) self.gui.tags_view.recount() current = self.gui.library_view.currentIndex() self.gui.library_view.model().current_changed(current, self.previous)
def get_metadata(stream, extract_cover=True): whitespace = re.compile(r'\s+') def normalize(s): return whitespace.sub(' ', s).strip() with ZipFile(stream) as zf: meta = zf.read('meta.xml') root = fromstring(meta) def find(field): ns, tag = fields[field] ans = root.xpath('//ns0:{}'.format(tag), namespaces={'ns0': ns}) if ans: return normalize( tostring(ans[0], method='text', encoding='unicode', with_tail=False)).strip() mi = MetaInformation(None, []) title = find('title') if title: mi.title = title creator = find('initial-creator') or find('creator') if creator: mi.authors = string_to_authors(creator) desc = find('description') if desc: mi.comments = desc lang = find('language') if lang and canonicalize_lang(lang): mi.languages = [canonicalize_lang(lang)] kw = find('keyword') or find('keywords') if kw: mi.tags = [x.strip() for x in kw.split(',') if x.strip()] data = {} for tag in root.xpath('//ns0:user-defined', namespaces={'ns0': fields['user-defined'][0]}): name = (tag.get('{%s}name' % METANS) or '').lower() vtype = tag.get('{%s}value-type' % METANS) or 'string' val = tag.text if name and val: if vtype == 'boolean': val = val == 'true' data[name] = val opfmeta = False # we need this later for the cover opfnocover = False if data.get('opf.metadata'): # custom metadata contains OPF information opfmeta = True if data.get('opf.titlesort', ''): mi.title_sort = data['opf.titlesort'] if data.get('opf.authors', ''): mi.authors = string_to_authors(data['opf.authors']) if data.get('opf.authorsort', ''): mi.author_sort = data['opf.authorsort'] if data.get('opf.isbn', ''): isbn = check_isbn(data['opf.isbn']) if isbn is not None: mi.isbn = isbn if data.get('opf.publisher', ''): mi.publisher = data['opf.publisher'] if data.get('opf.pubdate', ''): mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True) if data.get('opf.identifiers'): try: mi.identifiers = json.loads(data['opf.identifiers']) except Exception: pass if data.get('opf.rating'): try: mi.rating = max(0, min(float(data['opf.rating']), 10)) except Exception: pass if data.get('opf.series', ''): mi.series = data['opf.series'] if data.get('opf.seriesindex', ''): try: mi.series_index = float(data['opf.seriesindex']) except Exception: mi.series_index = 1.0 if data.get('opf.language', ''): cl = canonicalize_lang(data['opf.language']) if cl: mi.languages = [cl] opfnocover = data.get('opf.nocover', False) if not opfnocover: try: read_cover(stream, zf, mi, opfmeta, extract_cover) except Exception: pass # Do not let an error reading the cover prevent reading other data return mi
def _start_merge(self,book_list,tdir=None): db=self.gui.current_db self.previous = self.gui.library_view.currentIndex() # if any bad, bail. bad_list = [ x for x in book_list if not x['good'] ] if len(bad_list) > 0: d = error_dialog(self.gui, _('Cannot Merge Epubs'), _('%s books failed.')%len(bad_list), det_msg='\n'.join( [ x['error'] for x in bad_list ])) d.exec_() else: d = OrderEPUBsDialog(self.gui, _('Order EPUBs to Merge'), prefs, self.qaction.icon(), book_list, ) d.exec_() if d.result() != d.Accepted: return book_list = d.get_books() logger.debug("2:%s"%(time.time()-self.t)) self.t = time.time() deftitle = "%s %s" % (book_list[0]['title'],prefs['mergeword']) mi = MetaInformation(deftitle,["Temp Author"]) # if all same series, use series for name. But only if all. serieslist = [ x['series'] for x in book_list if x['series'] != None ] if len(serieslist) == len(book_list): mi.title = serieslist[0] for sr in serieslist: if mi.title != sr: mi.title = deftitle; break # logger.debug("======================= mi.title:\n%s\n========================="%mi.title) mi.authors = list() authorslists = [ x['authors'] for x in book_list ] for l in authorslists: for a in l: if a not in mi.authors: mi.authors.append(a) #mi.authors = [item for sublist in authorslists for item in sublist] # logger.debug("======================= mi.authors:\n%s\n========================="%mi.authors) #mi.author_sort = ' & '.join([ x['author_sort'] for x in book_list ]) # logger.debug("======================= mi.author_sort:\n%s\n========================="%mi.author_sort) # set publisher if all from same publisher. publishers = set([ x['publisher'] for x in book_list ]) if len(publishers) == 1: mi.publisher = publishers.pop() # logger.debug("======================= mi.publisher:\n%s\n========================="%mi.publisher) tagslists = [ x['tags'] for x in book_list ] mi.tags = [item for sublist in tagslists for item in sublist] mi.tags.extend(prefs['mergetags'].split(',')) # logger.debug("======================= mergetags:\n%s\n========================="%prefs['mergetags']) # logger.debug("======================= m.tags:\n%s\n========================="%mi.tags) languageslists = [ x['languages'] for x in book_list ] mi.languages = [item for sublist in languageslists for item in sublist] mi.series = '' if prefs['firstseries'] and book_list[0]['series']: mi.series = book_list[0]['series'] mi.series_index = book_list[0]['series_index'] # ======================= make book comments ========================= if len(mi.authors) > 1: booktitle = lambda x : _("%s by %s") % (x['title'],' & '.join(x['authors'])) else: booktitle = lambda x : x['title'] mi.comments = ("<p>"+_("%s containing:")+"</p>") % prefs['mergeword'] if prefs['includecomments']: def bookcomments(x): if x['comments']: return '<p><b>%s</b></p>%s'%(booktitle(x),x['comments']) else: return '<b>%s</b><br/>'%booktitle(x) mi.comments += ('<div class="mergedbook">' + '<hr></div><div class="mergedbook">'.join([ bookcomments(x) for x in book_list]) + '</div>') else: mi.comments += '<br/>'.join( [ booktitle(x) for x in book_list ] ) # ======================= make book entry ========================= book_id = db.create_book_entry(mi, add_duplicates=True) # set default cover to same as first book coverdata = db.cover(book_list[0]['calibre_id'],index_is_id=True) if coverdata: db.set_cover(book_id, coverdata) # ======================= custom columns =================== logger.debug("3:%s"%(time.time()-self.t)) self.t = time.time() # have to get custom from db for each book. idslist = [ x['calibre_id'] for x in book_list ] custom_columns = self.gui.library_view.model().custom_columns for col, action in six.iteritems(prefs['custom_cols']): #logger.debug("col: %s action: %s"%(col,action)) if col not in custom_columns: logger.debug("%s not an existing column, skipping."%col) continue coldef = custom_columns[col] #logger.debug("coldef:%s"%coldef) if action not in permitted_values[coldef['datatype']]: logger.debug("%s not a valid column type for %s, skipping."%(col,action)) continue label = coldef['label'] found = False value = None idx = None if action == 'first': idx = 0 if action == 'last': idx = -1 if action in ['first','last']: value = db.get_custom(idslist[idx], label=label, index_is_id=True) if coldef['datatype'] == 'series' and value != None: # get the number-in-series, too. value = "%s [%s]"%(value, db.get_custom_extra(idslist[idx], label=label, index_is_id=True)) found = True if action in ('add','average','averageall'): value = 0.0 count = 0 for bid in idslist: try: value += db.get_custom(bid, label=label, index_is_id=True) found = True # only count ones with values unless averageall count += 1 except: # if not set, it's None and fails. # only count ones with values unless averageall if action == 'averageall': count += 1 if found and action in ('average','averageall'): value = value / count if coldef['datatype'] == 'int': value += 0.5 # so int rounds instead of truncs. if action == 'and': value = True for bid in idslist: try: value = value and db.get_custom(bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass if action == 'or': value = False for bid in idslist: try: value = value or db.get_custom(bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass if action == 'newest': value = None for bid in idslist: try: ivalue = db.get_custom(bid, label=label, index_is_id=True) if not value or ivalue > value: value = ivalue found = True except: # if not set, it's None and fails. pass if action == 'oldest': value = None for bid in idslist: try: ivalue = db.get_custom(bid, label=label, index_is_id=True) if not value or ivalue < value: value = ivalue found = True except: # if not set, it's None and fails. pass if action == 'union': if not coldef['is_multiple']: action = 'concat' else: value = set() for bid in idslist: try: value = value.union(db.get_custom(bid, label=label, index_is_id=True)) found = True except: # if not set, it's None and fails. pass if action == 'concat': value = "" for bid in idslist: try: value = value + ' ' + db.get_custom(bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass value = value.strip() if action == 'now': value = datetime.now() found = True logger.debug("now: %s"%value) if found and value != None: logger.debug("value: %s"%value) db.set_custom(book_id,value,label=label,commit=False) db.commit() logger.debug("4:%s"%(time.time()-self.t)) self.t = time.time() self.gui.library_view.model().books_added(1) self.gui.library_view.select_rows([book_id]) logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() confirm('\n'+_('''The book for the new Merged EPUB has been created and default metadata filled in. However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.'''), 'epubmerge_created_now_edit_again', self.gui, title=_("EpubMerge"), show_cancel_button=False) self.gui.iactions['Edit Metadata'].edit_metadata(False) logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() self.gui.tags_view.recount() totalsize = sum([ x['epub_size'] for x in book_list ]) logger.debug("merging %s EPUBs totaling %s"%(len(book_list),gethumanreadable(totalsize))) confirm('\n'+_('''EpubMerge will be done in a Background job. The merged EPUB will not appear in the Library until finished. You are merging %s EPUBs totaling %s.''')%(len(book_list),gethumanreadable(totalsize)), 'epubmerge_background_merge_again', self.gui, title=_("EpubMerge"), show_cancel_button=False) # if len(book_list) > 100 or totalsize > 5*1024*1024: # confirm('\n'+_('''You're merging %s EPUBs totaling %s. Calibre will be locked until the merge is finished.''')%(len(book_list),gethumanreadable(totalsize)), # 'epubmerge_edited_now_merge_again', # self.gui) self.gui.status_bar.show_message(_('Merging %s EPUBs...')%len(book_list), 60000) mi = db.get_metadata(book_id,index_is_id=True) mergedepub = PersistentTemporaryFile(prefix="output_", suffix='.epub', dir=tdir) epubstomerge = [ x['epub'] for x in book_list ] epubtitles = {} for x in book_list: # save titles indexed by epub for reporting from BG epubtitles[x['epub']]=_("%s by %s") % (x['title'],' & '.join(x['authors'])) coverjpgpath = None if mi.has_cover: # grab the path to the real image. coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') func = 'arbitrary_n' cpus = self.gui.job_manager.server.pool_size args = ['calibre_plugins.epubmerge.jobs', 'do_merge_bg', ({'book_id':book_id, 'book_count':len(book_list), 'tdir':tdir, 'outputepubfn':mergedepub.name, 'inputepubfns':epubstomerge, # already .name'ed 'epubtitles':epubtitles, # for reporting 'authoropts':mi.authors, 'titleopt':mi.title, 'descopt':mi.comments, 'tags':mi.tags, 'languages':mi.languages, 'titlenavpoints':prefs['titlenavpoints'], 'originalnavpoints':prefs['originalnavpoints'], 'flattentoc':prefs['flattentoc'], 'printtimes':True, 'coverjpgpath':coverjpgpath, 'keepmetadatafiles':prefs['keepmeta'] }, cpus)] desc = _('EpubMerge: %s')%mi.title job = self.gui.job_manager.run_job( self.Dispatcher(self.merge_done), func, args=args, description=desc) self.gui.jobs_pointer.start() self.gui.status_bar.show_message(_('Starting EpubMerge'),3000)
def get_social_metadata(title, authors, publisher, isbn, username=None, password=None): from calibre.ebooks.metadata import MetaInformation mi = MetaInformation(title, authors) if isbn: br = get_browser() try: login(br, username, password) raw = br.open_novisit('http://www.librarything.com/isbn/' + isbn).read() except: return mi if '/wiki/index.php/HelpThing:Verify' in raw: raise Exception('LibraryThing is blocking calibre.') if not raw: return mi raw = raw.decode('utf-8', 'replace') raw = strip_encoding_declarations(raw) root = html.fromstring(raw) h1 = root.xpath('//div[@class="headsummary"]/h1') if h1 and not mi.title: mi.title = html.tostring(h1[0], method='text', encoding=unicode) h2 = root.xpath('//div[@class="headsummary"]/h2/a') if h2 and not mi.authors: mi.authors = [ html.tostring(x, method='text', encoding=unicode) for x in h2 ] h3 = root.xpath('//div[@class="headsummary"]/h3/a') if h3: match = None for h in h3: series = html.tostring(h, method='text', encoding=unicode) match = re.search(r'(.+) \((.+)\)', series) if match is not None: break if match is not None: mi.series = match.group(1).strip() match = re.search(r'[0-9.]+', match.group(2)) si = 1.0 if match is not None: si = float(match.group()) mi.series_index = si #tags = root.xpath('//div[@class="tags"]/span[@class="tag"]/a') #if tags: # mi.tags = [html.tostring(x, method='text', encoding=unicode) for x # in tags] span = root.xpath( '//table[@class="wsltable"]/tr[@class="wslcontent"]/td[4]//span') if span: raw = html.tostring(span[0], method='text', encoding=unicode) match = re.search(r'([0-9.]+)', raw) if match is not None: rating = float(match.group()) if rating > 0 and rating <= 5: mi.rating = rating return mi
def convert_comic_md_to_calibre_md(self, comic_metadata): ''' Maps the entries in the comic_metadata to calibre metadata ''' import unicodedata from calibre.ebooks.metadata import MetaInformation from calibre.utils.date import parse_only_date from datetime import date from calibre.utils.localization import calibre_langcode_to_name if self.comic_md_in_calibre_format: return # synonyms for artists WRITER = ['writer', 'plotter', 'scripter'] PENCILLER = ['artist', 'penciller', 'penciler', 'breakdowns'] INKER = ['inker', 'artist', 'finishes'] COLORIST = ['colorist', 'colourist', 'colorer', 'colourer'] LETTERER = ['letterer'] COVER_ARTIST = ['cover', 'covers', 'coverartist', 'cover artist'] EDITOR = ['editor'] # start with a fresh calibre metadata mi = MetaInformation(None, None) co = comic_metadata # shorten some functions role = partial(get_role, credits=co.credits) update_field = partial(update_calibre_field, target=mi) # Get title, if no title, try to assign series infos if co.title: mi.title = co.title elif co.series: mi.title = co.series if co.issue: mi.title += " " + str(co.issue) else: mi.title = "" # tags if co.tags != [] and prefs['import_tags']: if prefs['overwrite_calibre_tags']: mi.tags = co.tags else: mi.tags = list(set(self.calibre_metadata.tags + co.tags)) # simple metadata update_field("authors", role(WRITER)) update_field("series", co.series) update_field("rating", co.criticalRating) update_field("publisher", co.publisher) # special cases if co.language: update_field("language", calibre_langcode_to_name(co.language)) if co.comments: update_field("comments", co.comments.strip()) # issue if co.issue: if isinstance(co.issue, unicode): mi.series_index = unicodedata.numeric(co.issue) else: mi.series_index = float(co.issue) # pub date puby = co.year pubm = co.month if puby is not None: try: dt = date(int(puby), 6 if pubm is None else int(pubm), 15) dt = parse_only_date(str(dt)) mi.pubdate = dt except: pass # custom columns custom_cols = self.db.field_metadata.custom_field_metadata() update_column = partial(update_custom_column, calibre_metadata=mi, custom_cols=custom_cols) # artists update_column(prefs['penciller_column'], role(PENCILLER)) update_column(prefs['inker_column'], role(INKER)) update_column(prefs['colorist_column'], role(COLORIST)) update_column(prefs['letterer_column'], role(LETTERER)) update_column(prefs['cover_artist_column'], role(COVER_ARTIST)) update_column(prefs['editor_column'], role(EDITOR)) # others update_column(prefs['storyarc_column'], co.storyArc) update_column(prefs['characters_column'], co.characters) update_column(prefs['teams_column'], co.teams) update_column(prefs['locations_column'], co.locations) update_column(prefs['volume_column'], co.volume) update_column(prefs['genre_column'], co.genre) self.comic_md_in_calibre_format = mi
def get_metadata(stream): from calibre.ebooks.metadata import MetaInformation from calibre.ptempfile import TemporaryDirectory from calibre.ebooks.mobi.reader.headers import MetadataHeader from calibre.ebooks.mobi.reader.mobi6 import MobiReader from calibre import CurrentDir try: from PIL import Image as PILImage PILImage except ImportError: import Image as PILImage stream.seek(0) try: raw = stream.read(3) except: raw = '' stream.seek(0) if raw == b'TPZ': from calibre.ebooks.metadata.topaz import get_metadata return get_metadata(stream) from calibre.utils.logging import Log log = Log() try: mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')]) except: mi = MetaInformation(_('Unknown'), [_('Unknown')]) mh = MetadataHeader(stream, log) if mh.title and mh.title != _('Unknown'): mi.title = mh.title if mh.exth is not None: if mh.exth.mi is not None: mi = mh.exth.mi else: size = 1024**3 if hasattr(stream, 'seek') and hasattr(stream, 'tell'): pos = stream.tell() stream.seek(0, 2) size = stream.tell() stream.seek(pos) if size < 4 * 1024 * 1024: with TemporaryDirectory('_mobi_meta_reader') as tdir: with CurrentDir(tdir): mr = MobiReader(stream, log) parse_cache = {} mr.extract_content(tdir, parse_cache) if mr.embedded_mi is not None: mi = mr.embedded_mi if hasattr(mh.exth, 'cover_offset'): cover_index = mh.first_image_index + mh.exth.cover_offset data = mh.section_data(int(cover_index)) else: try: data = mh.section_data(mh.first_image_index) except: data = '' buf = cStringIO.StringIO(data) try: im = PILImage.open(buf) except: log.exception('Failed to read MOBI cover') else: obuf = cStringIO.StringIO() im.convert('RGB').save(obuf, format='JPEG') mi.cover_data = ('jpg', obuf.getvalue()) return mi
def metadata_from_filename(name, pat=None, fallback_pat=None): if isbytestring(name): name = name.decode(filesystem_encoding, "replace") name = name.rpartition(".")[0] mi = MetaInformation(None, None) if pat is None: pat = re.compile(prefs.get("filename_pattern")) name = name.replace("_", " ") match = pat.search(name) if match is None and fallback_pat is not None: match = fallback_pat.search(name) if match is not None: try: mi.title = match.group("title") except IndexError: pass try: au = match.group("author") aus = string_to_authors(au) if aus: mi.authors = aus if prefs["swap_author_names"] and mi.authors: def swap(a): if "," in a: parts = a.split(",", 1) else: parts = a.split(None, 1) if len(parts) > 1: t = parts[-1] parts = parts[:-1] parts.insert(0, t) return " ".join(parts) mi.authors = [swap(x) for x in mi.authors] except (IndexError, ValueError): pass try: mi.series = match.group("series") except IndexError: pass try: si = match.group("series_index") mi.series_index = float(si) except (IndexError, ValueError, TypeError): pass try: si = match.group("isbn") mi.isbn = si except (IndexError, ValueError): pass try: publisher = match.group("publisher") mi.publisher = publisher except (IndexError, ValueError): pass try: pubdate = match.group("published") if pubdate: from calibre.utils.date import parse_only_date mi.pubdate = parse_only_date(pubdate) except: pass if mi.is_null("title"): mi.title = name return mi
def convert_comic_md_to_calibre_md(self, comic_metadata): ''' Maps the entries in the comic_metadata to calibre metadata ''' import unicodedata from calibre.ebooks.metadata import MetaInformation from calibre.utils.date import parse_only_date from datetime import date from calibre.utils.localization import calibre_langcode_to_name if self.comic_md_in_calibre_format: return # start with a fresh calibre metadata mi = MetaInformation(None, None) co = comic_metadata # shorten some functions role = partial(get_role, credits=co.credits) update_field = partial(update_calibre_field, target=mi) # Get title, if no title, try to assign series infos if co.title: mi.title = co.title elif co.series: mi.title = co.series if co.issue: mi.title += " " + str(co.issue) else: mi.title = "" # tags if co.tags != [] and prefs['import_tags']: if prefs['overwrite_calibre_tags']: mi.tags = co.tags else: mi.tags = list(set(self.calibre_metadata.tags + co.tags)) # simple metadata update_field("authors", role(WRITER)) update_field("series", co.series) update_field("rating", co.criticalRating) update_field("publisher", co.publisher) # special cases if co.language: update_field("language", calibre_langcode_to_name(co.language)) if co.comments: update_field("comments", co.comments.strip()) # issue if co.issue: try: if not python3 and isinstance(co.issue, unicode): mi.series_index = unicodedata.numeric(co.issue) else: mi.series_index = float(co.issue) except ValueError: pass # pub date puby = co.year pubm = co.month if puby is not None: try: dt = date(int(puby), 6 if pubm is None else int(pubm), 15) dt = parse_only_date(str(dt)) mi.pubdate = dt except: pass # custom columns update_column = partial( update_custom_column, calibre_metadata=mi, custom_cols=self.db.field_metadata.custom_field_metadata()) # artists update_column(prefs['penciller_column'], role(PENCILLER)) update_column(prefs['inker_column'], role(INKER)) update_column(prefs['colorist_column'], role(COLORIST)) update_column(prefs['letterer_column'], role(LETTERER)) update_column(prefs['cover_artist_column'], role(COVER_ARTIST)) update_column(prefs['editor_column'], role(EDITOR)) # others update_column(prefs['storyarc_column'], co.storyArc) update_column(prefs['characters_column'], co.characters) update_column(prefs['teams_column'], co.teams) update_column(prefs['locations_column'], co.locations) update_column(prefs['genre_column'], co.genre) ensure_int(co.issueCount, update_column, prefs['count_column'], co.issueCount) ensure_int(co.volume, update_column, prefs['volume_column'], co.volume) if prefs['auto_count_pages']: update_column(prefs['pages_column'], self.count_pages()) else: update_column(prefs['pages_column'], co.pageCount) if prefs['get_image_sizes']: update_column(prefs['image_size_column'], self.get_picture_size()) update_column(prefs['comicvine_column'], '<a href="{}">Comic Vine</a>'.format(co.webLink)) update_column(prefs['manga_column'], co.manga) self.comic_md_in_calibre_format = mi
def _start_merge(self, book_list): db = self.gui.current_db self.previous = self.gui.library_view.currentIndex() # if any bad, bail. bad_list = filter(lambda x: not x['good'], book_list) if len(bad_list) > 0: d = error_dialog(self.gui, _('Cannot Merge Epubs'), _('%s books failed.') % len(bad_list), det_msg='\n'.join( map(lambda x: x['error'], bad_list))) d.exec_() else: d = OrderEPUBsDialog( self.gui, _('Order EPUBs to Merge'), prefs, self.qaction.icon(), book_list, ) d.exec_() if d.result() != d.Accepted: return book_list = d.get_books() logger.debug("2:%s" % (time.time() - self.t)) self.t = time.time() deftitle = "%s %s" % (book_list[0]['title'], prefs['mergeword']) mi = MetaInformation(deftitle, ["Temp Author"]) # if all same series, use series for name. But only if all. serieslist = map(lambda x: x['series'], filter(lambda x: x['series'] != None, book_list)) if len(serieslist) == len(book_list): mi.title = serieslist[0] for sr in serieslist: if mi.title != sr: mi.title = deftitle break # logger.debug("======================= mi.title:\n%s\n========================="%mi.title) mi.authors = list() authorslists = map(lambda x: x['authors'], book_list) for l in authorslists: for a in l: if a not in mi.authors: mi.authors.append(a) #mi.authors = [item for sublist in authorslists for item in sublist] # logger.debug("======================= mi.authors:\n%s\n========================="%mi.authors) #mi.author_sort = ' & '.join(map(lambda x : x['author_sort'], book_list)) # logger.debug("======================= mi.author_sort:\n%s\n========================="%mi.author_sort) # set publisher if all from same publisher. publishers = set(map(lambda x: x['publisher'], book_list)) if len(publishers) == 1: mi.publisher = publishers.pop() # logger.debug("======================= mi.publisher:\n%s\n========================="%mi.publisher) tagslists = map(lambda x: x['tags'], book_list) mi.tags = [item for sublist in tagslists for item in sublist] mi.tags.extend(prefs['mergetags'].split(',')) # logger.debug("======================= mergetags:\n%s\n========================="%prefs['mergetags']) # logger.debug("======================= m.tags:\n%s\n========================="%mi.tags) languageslists = map(lambda x: x['languages'], book_list) mi.languages = [ item for sublist in languageslists for item in sublist ] mi.series = '' # ======================= make book comments ========================= if len(mi.authors) > 1: booktitle = lambda x: _("%s by %s") % (x['title'], ' & '.join( x['authors'])) else: booktitle = lambda x: x['title'] mi.comments = (_("%s containing:") + "\n\n") % prefs['mergeword'] if prefs['includecomments']: def bookcomments(x): if x['comments']: return '<b>%s</b>\n\n%s' % (booktitle(x), x['comments']) else: return '<b>%s</b>\n' % booktitle(x) mi.comments += ('<div class="mergedbook">' + '<hr></div><div class="mergedbook">'.join( [bookcomments(x) for x in book_list]) + '</div>') else: mi.comments += '\n'.join([booktitle(x) for x in book_list]) # ======================= make book entry ========================= book_id = db.create_book_entry(mi, add_duplicates=True) # set default cover to same as first book coverdata = db.cover(book_list[0]['calibre_id'], index_is_id=True) if coverdata: db.set_cover(book_id, coverdata) # ======================= custom columns =================== logger.debug("3:%s" % (time.time() - self.t)) self.t = time.time() # have to get custom from db for each book. idslist = map(lambda x: x['calibre_id'], book_list) custom_columns = self.gui.library_view.model().custom_columns for col, action in prefs['custom_cols'].iteritems(): #logger.debug("col: %s action: %s"%(col,action)) if col not in custom_columns: logger.debug("%s not an existing column, skipping." % col) continue coldef = custom_columns[col] #logger.debug("coldef:%s"%coldef) if action not in permitted_values[coldef['datatype']]: logger.debug( "%s not a valid column type for %s, skipping." % (col, action)) continue label = coldef['label'] found = False value = None idx = None if action == 'first': idx = 0 if action == 'last': idx = -1 if action in ['first', 'last']: value = db.get_custom(idslist[idx], label=label, index_is_id=True) if coldef['datatype'] == 'series' and value != None: # get the number-in-series, too. value = "%s [%s]" % ( value, db.get_custom_extra( idslist[idx], label=label, index_is_id=True)) found = True if action in ('add', 'average', 'averageall'): value = 0.0 count = 0 for bid in idslist: try: value += db.get_custom(bid, label=label, index_is_id=True) found = True # only count ones with values unless averageall count += 1 except: # if not set, it's None and fails. # only count ones with values unless averageall if action == 'averageall': count += 1 if found and action in ('average', 'averageall'): value = value / count if coldef['datatype'] == 'int': value += 0.5 # so int rounds instead of truncs. if action == 'and': value = True for bid in idslist: try: value = value and db.get_custom( bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass if action == 'or': value = False for bid in idslist: try: value = value or db.get_custom( bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass if action == 'newest': value = None for bid in idslist: try: ivalue = db.get_custom(bid, label=label, index_is_id=True) if not value or ivalue > value: value = ivalue found = True except: # if not set, it's None and fails. pass if action == 'oldest': value = None for bid in idslist: try: ivalue = db.get_custom(bid, label=label, index_is_id=True) if not value or ivalue < value: value = ivalue found = True except: # if not set, it's None and fails. pass if action == 'union': if not coldef['is_multiple']: action = 'concat' else: value = set() for bid in idslist: try: value = value.union( db.get_custom(bid, label=label, index_is_id=True)) found = True except: # if not set, it's None and fails. pass if action == 'concat': value = "" for bid in idslist: try: value = value + ' ' + db.get_custom( bid, label=label, index_is_id=True) found = True except: # if not set, it's None and fails. pass value = value.strip() if found and value != None: db.set_custom(book_id, value, label=label, commit=False) db.commit() logger.debug("4:%s" % (time.time() - self.t)) self.t = time.time() self.gui.library_view.model().books_added(1) self.gui.library_view.select_rows([book_id]) logger.debug("5:%s" % (time.time() - self.t)) self.t = time.time() confirm( '\n' + _('''The book for the new Merged EPUB has been created and default metadata filled in. However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.''' ), 'epubmerge_created_now_edit_again', self.gui) self.gui.iactions['Edit Metadata'].edit_metadata(False) logger.debug("5:%s" % (time.time() - self.t)) self.t = time.time() self.gui.tags_view.recount() totalsize = sum(map(lambda x: x['epub_size'], book_list)) logger.debug("merging %s EPUBs totaling %s" % (len(book_list), gethumanreadable(totalsize))) if len(book_list) > 100 or totalsize > 5 * 1024 * 1024: confirm( '\n' + _('''You're merging %s EPUBs totaling %s. Calibre will be locked until the merge is finished.''' ) % (len(book_list), gethumanreadable(totalsize)), 'epubmerge_edited_now_merge_again', self.gui) self.gui.status_bar.show_message( _('Merging %s EPUBs...') % len(book_list), 60000) mi = db.get_metadata(book_id, index_is_id=True) mergedepub = PersistentTemporaryFile(suffix='.epub') epubstomerge = map(lambda x: x['epub'], book_list) coverjpgpath = None if mi.has_cover: # grab the path to the real image. coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') self.do_merge(mergedepub, epubstomerge, authoropts=mi.authors, titleopt=mi.title, descopt=mi.comments, tags=mi.tags, languages=mi.languages, titlenavpoints=prefs['titlenavpoints'], flattentoc=prefs['flattentoc'], printtimes=True, coverjpgpath=coverjpgpath, keepmetadatafiles=prefs['keepmeta']) logger.debug("6:%s" % (time.time() - self.t)) logger.debug(_("Merge finished, output in:\n%s") % mergedepub.name) self.t = time.time() db.add_format_with_hooks(book_id, 'EPUB', mergedepub, index_is_id=True) logger.debug("7:%s" % (time.time() - self.t)) self.t = time.time() self.gui.status_bar.show_message( _('Finished merging %s EPUBs.') % len(book_list), 3000) self.gui.library_view.model().refresh_ids([book_id]) self.gui.tags_view.recount() current = self.gui.library_view.currentIndex() self.gui.library_view.model().current_changed( current, self.previous)
def get_metadata(stream): from calibre.ebooks.metadata import MetaInformation from calibre.ptempfile import TemporaryDirectory from calibre.ebooks.mobi.reader.headers import MetadataHeader from calibre.ebooks.mobi.reader.mobi6 import MobiReader from calibre import CurrentDir try: from PIL import Image as PILImage PILImage except ImportError: import Image as PILImage stream.seek(0) try: raw = stream.read(3) except: raw = '' stream.seek(0) if raw == b'TPZ': from calibre.ebooks.metadata.topaz import get_metadata return get_metadata(stream) from calibre.utils.logging import Log log = Log() try: mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')]) except: mi = MetaInformation(_('Unknown'), [_('Unknown')]) mh = MetadataHeader(stream, log) if mh.title and mh.title != _('Unknown'): mi.title = mh.title if mh.exth is not None: if mh.exth.mi is not None: mi = mh.exth.mi else: size = 1024**3 if hasattr(stream, 'seek') and hasattr(stream, 'tell'): pos = stream.tell() stream.seek(0, 2) size = stream.tell() stream.seek(pos) if size < 4*1024*1024: with TemporaryDirectory('_mobi_meta_reader') as tdir: with CurrentDir(tdir): mr = MobiReader(stream, log) parse_cache = {} mr.extract_content(tdir, parse_cache) if mr.embedded_mi is not None: mi = mr.embedded_mi if hasattr(mh.exth, 'cover_offset'): cover_index = mh.first_image_index + mh.exth.cover_offset data = mh.section_data(int(cover_index)) else: try: data = mh.section_data(mh.first_image_index) except: data = '' buf = cStringIO.StringIO(data) try: im = PILImage.open(buf) except: log.exception('Failed to read MOBI cover') else: obuf = cStringIO.StringIO() im.convert('RGB').save(obuf, format='JPEG') mi.cover_data = ('jpg', obuf.getvalue()) return mi